diff --git a/TMessagesProj/build.gradle b/TMessagesProj/build.gradle index 75c73cd3b..aee5fc27c 100644 --- a/TMessagesProj/build.gradle +++ b/TMessagesProj/build.gradle @@ -3,15 +3,15 @@ import cn.hutool.core.util.RuntimeUtil apply plugin: "com.android.application" apply plugin: "kotlin-android" -def verName = "7.8.0-1" -def verCode = 340 +def verName = "7.8.1-preview01" +def verCode = 345 if (System.getenv("DEBUG_BUILD") == "true") { verName += "-" + RuntimeUtil.execForStr("git log --pretty=format:'%h' -n 1") } -def officialVer = "7.8.0" -def officialCode = 2360 +def officialVer = "7.8.1" +def officialCode = 2372 def serviceAccountCredentialsFile = rootProject.file("service_account_credentials.json") @@ -390,7 +390,7 @@ def playCoreVersion = "1.10.0" dependencies { implementation "androidx.browser:browser:1.3.0" - implementation "androidx.core:core-ktx:1.6.0-beta01" + implementation "androidx.core:core-ktx:1.6.0" implementation "androidx.palette:palette-ktx:1.0.0" implementation "androidx.viewpager:viewpager:1.0.0" implementation "androidx.exifinterface:exifinterface:1.3.2" diff --git a/TMessagesProj/jni/CMakeLists.txt b/TMessagesProj/jni/CMakeLists.txt index cdee8f550..88e818c90 100644 --- a/TMessagesProj/jni/CMakeLists.txt +++ b/TMessagesProj/jni/CMakeLists.txt @@ -718,7 +718,7 @@ target_include_directories(${NATIVE_LIB} PUBLIC lz4) target_link_libraries(${NATIVE_LIB} - -Wl,--whole-archive rnnoise voipandroid -Wl,--no-whole-archive + -Wl,--whole-archive rnnoise openh264 voipandroid -Wl,--no-whole-archive tgvoip tgcalls tgcalls_tp diff --git a/TMessagesProj/jni/image.cpp b/TMessagesProj/jni/image.cpp index 771e5b65e..f38b62c65 100644 --- a/TMessagesProj/jni/image.cpp +++ b/TMessagesProj/jni/image.cpp @@ -1182,7 +1182,7 @@ std::vector> gatherPositions(std::vector> current = gatherPositions(positions, phase); auto colorsArray = (uint8_t *) env->GetIntArrayElements(colors, nullptr); - /*float *newPixelCache = nullptr; + float *newPixelCache = nullptr; if (pixelCache == nullptr) { newPixelCache = new float[width * height * 2]; - }*/ + } float directPixelY; float centerDistanceY; float centerDistanceY2; int32_t colorsCount = colorsArray[12] == 0 ? 3 : 4; for (int y = 0; y < height; y++) { - //if (pixelCache == nullptr) { + if (pixelCache == nullptr) { directPixelY = (float) y / (float) height; centerDistanceY = directPixelY - 0.5f; centerDistanceY2 = centerDistanceY * centerDistanceY; - //} + } uint32_t offset = y * stride; for (int x = 0; x < width; x++) { float pixelX; float pixelY; - /*if (pixelCache != nullptr) { + if (pixelCache != nullptr) { pixelX = pixelCache[(y * width + x) * 2]; - pixelX = pixelCache[(y * width + x) * 2 + 1]; - } else {*/ + pixelY = pixelCache[(y * width + x) * 2 + 1]; + } else { float directPixelX = (float) x / (float) width; float centerDistanceX = directPixelX - 0.5f; @@ -1250,9 +1250,9 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_generateGradient(JNIEnv *en float sinTheta = sinf(theta); float cosTheta = cosf(theta); - pixelX = /*newPixelCache[(y * width + x) * 2] =*/ std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * cosTheta - centerDistanceY * sinTheta)); - pixelY = /*newPixelCache[(y * width + x) * 2 + 1] =*/ std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * sinTheta + centerDistanceY * cosTheta)); - //} + pixelX = newPixelCache[(y * width + x) * 2] = std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * cosTheta - centerDistanceY * sinTheta)); + pixelY = newPixelCache[(y * width + x) * 2 + 1] = std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * sinTheta + centerDistanceY * cosTheta)); + } float distanceSum = 0.0f; @@ -1282,10 +1282,10 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_generateGradient(JNIEnv *en pixels[offset + x * 4 + 3] = 0xff; } } - /*if (newPixelCache != nullptr) { + if (newPixelCache != nullptr) { delete [] pixelCache; pixelCache = newPixelCache; - }*/ + } env->ReleaseIntArrayElements(colors, (jint *) colorsArray, JNI_ABORT); diff --git a/TMessagesProj/jni/tgnet/ConnectionsManager.cpp b/TMessagesProj/jni/tgnet/ConnectionsManager.cpp index 8eec80a63..be4d9c191 100644 --- a/TMessagesProj/jni/tgnet/ConnectionsManager.cpp +++ b/TMessagesProj/jni/tgnet/ConnectionsManager.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include "ConnectionsManager.h" @@ -38,7 +39,7 @@ JavaVM *javaVm = nullptr; //JNIEnv *jniEnv[MAX_ACCOUNT_COUNT]; std::vector jniEnv(10); jclass jclass_ByteBuffer = nullptr; -jmethodID jclass_ByteBuffer_allocateDirect = 0; +jmethodID jclass_ByteBuffer_allocateDirect = nullptr; #endif static bool done = false; @@ -101,7 +102,7 @@ ConnectionsManager::ConnectionsManager(int32_t instance) { exit(1); } - EventObject *eventObject = new EventObject(pipeFd, EventObjectTypePipe); + auto eventObject = new EventObject(pipeFd, EventObjectTypePipe); epoll_event eventMask = {}; eventMask.events = EPOLLIN; @@ -119,7 +120,7 @@ ConnectionsManager::ConnectionsManager(int32_t instance) { exit(1); } - pthread_mutex_init(&mutex, NULL); + pthread_mutex_init(&mutex, nullptr); } ConnectionsManager::~ConnectionsManager() { @@ -154,7 +155,7 @@ ConnectionsManager& ConnectionsManager::getInstance(int32_t instanceNum) { int ConnectionsManager::callEvents(int64_t now) { if (!events.empty()) { - for (std::list::iterator iter = events.begin(); iter != events.end();) { + for (auto iter = events.begin(); iter != events.end();) { EventObject *eventObject = (*iter); if (eventObject->time <= now) { iter = events.erase(iter); @@ -168,7 +169,7 @@ int ConnectionsManager::callEvents(int64_t now) { if (!networkPaused) { return 1000; } - int32_t timeToPushPing = (int32_t) ((sendingPushPing ? 30000 : nextPingTimeOffset) - llabs(now - lastPushPingTime)); + auto timeToPushPing = (int32_t) ((sendingPushPing ? 30000 : nextPingTimeOffset) - llabs(now - lastPushPingTime)); if (timeToPushPing <= 0) { return 1000; } @@ -203,7 +204,7 @@ void ConnectionsManager::select() { int64_t now = getCurrentTimeMonotonicMillis(); callEvents(now); for (int32_t a = 0; a < eventsCount; a++) { - EventObject *eventObject = (EventObject *) epollEvents[a].data.ptr; + auto eventObject = (EventObject *) epollEvents[a].data.ptr; eventObject->onEvent(epollEvents[a].events); } activeConnectionsCopy.resize(activeConnections.size()); @@ -240,8 +241,8 @@ void ConnectionsManager::select() { if (lastPauseTime != 0 && llabs(now - lastPauseTime) >= nextSleepTimeout) { bool dontSleep = !requestingSaltsForDc.empty(); if (!dontSleep) { - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (request->connectionType & ConnectionTypeDownload || request->connectionType & ConnectionTypeUpload) { dontSleep = true; break; @@ -249,8 +250,8 @@ void ConnectionsManager::select() { } } if (!dontSleep) { - for (requestsIter iter = requestsQueue.begin(); iter != requestsQueue.end(); iter++) { - Request *request = iter->get(); + for (auto & iter : requestsQueue) { + Request *request = iter.get(); if (request->connectionType & ConnectionTypeDownload || request->connectionType & ConnectionTypeUpload) { dontSleep = true; break; @@ -260,8 +261,8 @@ void ConnectionsManager::select() { if (!dontSleep) { if (!networkPaused) { if (LOGS_ENABLED) DEBUG_D("pausing network and timers by sleep time = %d", nextSleepTimeout); - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - iter->second->suspendConnections(false); + for (auto & dc : datacenters) { + dc.second->suspendConnections(false); } } networkPaused = true; @@ -273,11 +274,11 @@ void ConnectionsManager::select() { } if (networkPaused) { networkPaused = false; - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - if (iter->second->isHandshaking(false)) { - iter->second->createGenericConnection()->connect(); - } else if (iter->second->isHandshaking(true)) { - iter->second->createGenericMediaConnection()->connect(); + for (auto & dc : datacenters) { + if (dc.second->isHandshaking(false)) { + dc.second->createGenericConnection()->connect(); + } else if (dc.second->isHandshaking(true)) { + dc.second->createGenericMediaConnection()->connect(); } } if (LOGS_ENABLED) DEBUG_D("resume network and timers"); @@ -321,7 +322,7 @@ void ConnectionsManager::scheduleEvent(EventObject *eventObject, uint32_t time) } void ConnectionsManager::removeEvent(EventObject *eventObject) { - for (std::list::iterator iter = events.begin(); iter != events.end(); iter++) { + for (auto iter = events.begin(); iter != events.end(); iter++) { if (*iter == eventObject) { events.erase(iter); break; @@ -340,9 +341,9 @@ void ConnectionsManager::wakeup() { void *ConnectionsManager::ThreadProc(void *data) { if (LOGS_ENABLED) DEBUG_D("network thread started"); - ConnectionsManager *networkManager = (ConnectionsManager *) (data); + auto networkManager = (ConnectionsManager *) (data); #ifdef ANDROID - javaVm->AttachCurrentThread(&jniEnv[networkManager->instanceNum], NULL); + javaVm->AttachCurrentThread(&jniEnv[networkManager->instanceNum], nullptr); #endif if (networkManager->currentUserId != 0 && networkManager->pushConnectionEnabled) { Datacenter *datacenter = networkManager->getDatacenterWithId(networkManager->currentDatacenterId); @@ -398,7 +399,7 @@ void ConnectionsManager::loadConfig() { count = buffer->readUint32(nullptr); for (uint32_t a = 0; a < count; a++) { - Datacenter *datacenter = new Datacenter(instanceNum, buffer); + auto datacenter = new Datacenter(instanceNum, buffer); datacenters[datacenter->getDatacenterId()] = datacenter; if (LOGS_ENABLED) DEBUG_D("datacenter(%p) %u loaded (hasAuthKey = %d, 0x%" PRIx64 ")", datacenter, datacenter->getDatacenterId(), (int) datacenter->hasPermanentAuthKey(), datacenter->getPermanentAuthKeyId()); } @@ -425,7 +426,7 @@ void ConnectionsManager::loadConfig() { initDatacenters(); - if ((datacenters.size() != 0 && currentDatacenterId == 0) || pushSessionId == 0) { + if ((!datacenters.empty() && currentDatacenterId == 0) || pushSessionId == 0) { if (pushSessionId == 0) { RAND_bytes((uint8_t *) &pushSessionId, 8); } @@ -455,15 +456,15 @@ void ConnectionsManager::saveConfigInternal(NativeByteBuffer *buffer) { std::vector sessions; currentDatacenter->getSessions(sessions); - uint32_t count = (uint32_t) sessions.size(); + auto count = (uint32_t) sessions.size(); buffer->writeInt32(count); for (uint32_t a = 0; a < count; a++) { buffer->writeInt64(sessions[a]); } count = (uint32_t) datacenters.size(); buffer->writeInt32(count); - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - iter->second->serializeToStream(buffer); + for (auto & datacenter : datacenters) { + datacenter.second->serializeToStream(buffer); } } } @@ -583,7 +584,7 @@ int32_t ConnectionsManager::getTimeDifference() { } int64_t ConnectionsManager::generateMessageId() { - int64_t messageId = (int64_t) ((((double) getCurrentTimeMillis() + ((double) timeDifference) * 1000) * 4294967296.0) / 1000.0); + auto messageId = (int64_t) ((((double) getCurrentTimeMillis() + ((double) timeDifference) * 1000) * 4294967296.0) / 1000.0); if (messageId <= lastOutgoingMessageId) { messageId = lastOutgoingMessageId + 1; } @@ -600,7 +601,7 @@ bool ConnectionsManager::isNetworkAvailable() { void ConnectionsManager::cleanUp(bool resetKeys, int32_t datacenterId) { scheduleTask([&, resetKeys, datacenterId] { - for (requestsIter iter = requestsQueue.begin(); iter != requestsQueue.end();) { + for (auto iter = requestsQueue.begin(); iter != requestsQueue.end();) { Request *request = iter->get(); if (datacenterId != -1) { Datacenter *requestDatacenter = getDatacenterWithId(request->datacenterId); @@ -614,7 +615,7 @@ void ConnectionsManager::cleanUp(bool resetKeys, int32_t datacenterId) { continue; } if (request->onCompleteRequestCallback != nullptr) { - TL_error *error = new TL_error(); + auto error = new TL_error(); error->code = -1000; error->text = ""; request->onComplete(nullptr, error, 0, 0); @@ -622,7 +623,7 @@ void ConnectionsManager::cleanUp(bool resetKeys, int32_t datacenterId) { } iter = requestsQueue.erase(iter); } - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end();) { + for (auto iter = runningRequests.begin(); iter != runningRequests.end();) { Request *request = iter->get(); if (datacenterId != -1) { Datacenter *requestDatacenter = getDatacenterWithId(request->datacenterId); @@ -636,7 +637,7 @@ void ConnectionsManager::cleanUp(bool resetKeys, int32_t datacenterId) { continue; } if (request->onCompleteRequestCallback != nullptr) { - TL_error *error = new TL_error(); + auto error = new TL_error(); error->code = -1000; error->text = ""; request->onComplete(nullptr, error, 0, 0); @@ -646,15 +647,15 @@ void ConnectionsManager::cleanUp(bool resetKeys, int32_t datacenterId) { } quickAckIdToRequestIds.clear(); - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - if (datacenterId != -1 && iter->second->getDatacenterId() != datacenterId) { + for (auto & datacenter : datacenters) { + if (datacenterId != -1 && datacenter.second->getDatacenterId() != datacenterId) { continue; } if (resetKeys) { - iter->second->clearAuthKey(HandshakeTypeAll); + datacenter.second->clearAuthKey(HandshakeTypeAll); } - iter->second->recreateSessions(HandshakeTypeAll); - iter->second->authorized = false; + datacenter.second->recreateSessions(HandshakeTypeAll); + datacenter.second->authorized = false; } if (datacenterId == -1) { sessionsToDestroy.clear(); @@ -734,11 +735,11 @@ void ConnectionsManager::onConnectionClosed(Connection *connection, int reason) lastPushPingTime = getCurrentTimeMonotonicMillis() - nextPingTimeOffset + 4000; } else if (connection->getConnectionType() == ConnectionTypeProxy) { scheduleTask([&, connection] { - for (std::vector>::iterator iter = proxyActiveChecks.begin(); iter != proxyActiveChecks.end(); iter++) { + for (auto iter = proxyActiveChecks.begin(); iter != proxyActiveChecks.end(); iter++) { ProxyCheckInfo *proxyCheckInfo = iter->get(); if (proxyCheckInfo->connectionNum == connection->getConnectionNum()) { bool found = false; - for (requestsIter iter2 = runningRequests.begin(); iter2 != runningRequests.end(); iter2++) { + for (auto iter2 = runningRequests.begin(); iter2 != runningRequests.end(); iter2++) { Request *request = iter2->get(); if (connection->getConnectionToken() == request->connectionToken && request->requestToken == proxyCheckInfo->requestToken && (request->connectionType & 0x0000ffff) == ConnectionTypeProxy) { request->completed = true; @@ -789,12 +790,12 @@ void ConnectionsManager::onConnectionConnected(Connection *connection) { } void ConnectionsManager::onConnectionQuickAckReceived(Connection *connection, int32_t ack) { - std::map>::iterator iter = quickAckIdToRequestIds.find(ack); + auto iter = quickAckIdToRequestIds.find(ack); if (iter == quickAckIdToRequestIds.end()) { return; } - for (requestsIter iter2 = runningRequests.begin(); iter2 != runningRequests.end(); iter2++) { - Request *request = iter2->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (std::find(iter->second.begin(), iter->second.end(), request->requestToken) != iter->second.end()) { request->onQuickAck(); } @@ -903,7 +904,7 @@ void ConnectionsManager::onConnectionDataReceived(Connection *connection, Native length -= padding; } } - if (length < 24 + 32 || !connection->allowsCustomPadding() && (length - 24) % 16 != 0 || !datacenter->decryptServerResponse(keyId, data->bytes() + mark + 8, data->bytes() + mark + 24, length - 24, connection)) { + if (length < 24 + 32 || (!connection->allowsCustomPadding() && (length - 24) % 16 != 0) || !datacenter->decryptServerResponse(keyId, data->bytes() + mark + 8, data->bytes() + mark + 24, length - 24, connection)) { if (LOGS_ENABLED) DEBUG_E("connection(%p) unable to decrypt server response", connection); connection->reconnect(); return; @@ -985,11 +986,11 @@ bool ConnectionsManager::hasPendingRequestsForConnection(Connection *connection) return true; } } - for (requestsIter iter2 = runningRequests.begin(); iter2 != runningRequests.end(); iter2++) { - Request *request = iter2->get(); - uint8_t connectionNum = (uint8_t) (request->connectionType >> 16); - ConnectionType connectionType = (ConnectionType) (request->connectionType & 0x0000ffff); - if (connectionType == type && connectionNum == num || request->connectionToken == token) { + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); + auto connectionNum = (uint8_t) (request->connectionType >> 16); + auto connectionType = (ConnectionType) (request->connectionType & 0x0000ffff); + if ((connectionType == type && connectionNum == num) || request->connectionToken == token) { return true; } } @@ -999,8 +1000,8 @@ bool ConnectionsManager::hasPendingRequestsForConnection(Connection *connection) } TLObject *ConnectionsManager::getRequestWithMessageId(int64_t messageId) { - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (request->messageId == messageId) { return request->rawRequest; } @@ -1020,16 +1021,14 @@ TLObject *ConnectionsManager::TLdeserialize(TLObject *request, uint32_t bytes, N TLObject *object = TLClassStore::TLdeserialize(data, bytes, constructor, instanceNum, error); if (error) { - if (object != nullptr) { - delete object; - } + delete object; data->position(position); return nullptr; } if (object == nullptr) { if (request != nullptr) { - TL_api_request *apiRequest = dynamic_cast(request); + auto apiRequest = dynamic_cast(request); if (apiRequest != nullptr) { object = apiRequest->deserializeResponse(data, bytes, instanceNum, error); if (LOGS_ENABLED) DEBUG_D("api request constructor 0x%x, don't parse", constructor); @@ -1054,24 +1053,24 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag const std::type_info &typeInfo = typeid(*message); if (LOGS_ENABLED) DEBUG_D("process server response %p - %s", message, typeInfo.name()); - int64_t timeMessage = (int64_t) ((messageId != 0 ? messageId : innerMsgId) / 4294967296.0 * 1000); + auto timeMessage = (int64_t) ((messageId != 0 ? messageId : innerMsgId) / 4294967296.0 * 1000); Datacenter *datacenter = connection->getDatacenter(); if (typeInfo == typeid(TL_new_session_created)) { - TL_new_session_created *response = (TL_new_session_created *) message; + auto response = (TL_new_session_created *) message; if (!connection->isSessionProcessed(response->unique_id)) { if (LOGS_ENABLED) DEBUG_D("connection(%p, account%u, dc%u, type %d) new session created (first message id: 0x%" PRIx64 ", server salt: 0x%" PRIx64 ", unique id: 0x%" PRIx64 ")", connection, instanceNum, datacenter->getDatacenterId(), connection->getConnectionType(), (uint64_t) response->first_msg_id, (uint64_t) response->server_salt, (uint64_t) response->unique_id); - std::unique_ptr salt = std::unique_ptr(new TL_future_salt()); + std::unique_ptr salt = std::make_unique(); salt->valid_until = salt->valid_since = getCurrentTime(); salt->valid_until += 30 * 60; salt->salt = response->server_salt; datacenter->addServerSalt(salt, Connection::isMediaConnectionType(connection->getConnectionType())); - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); Datacenter *requestDatacenter = getDatacenterWithId(request->datacenterId); if (request->messageId < response->first_msg_id && request->connectionType & connection->getConnectionType() && requestDatacenter != nullptr && requestDatacenter->getDatacenterId() == datacenter->getDatacenterId()) { if (LOGS_ENABLED) DEBUG_D("clear request %p - %s", request->rawRequest, typeid(*request->rawRequest).name()); @@ -1093,7 +1092,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag connection->addProcessedSession(response->unique_id); } } else if (typeInfo == typeid(TL_msg_container)) { - TL_msg_container *response = (TL_msg_container *) message; + auto response = (TL_msg_container *) message; size_t count = response->messages.size(); if (LOGS_ENABLED) DEBUG_D("received container with %d items", (int32_t) count); for (uint32_t a = 0; a < count; a++) { @@ -1134,12 +1133,12 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag if (LOGS_ENABLED) DEBUG_D("connection(%p, account%u, dc%u, type %d) received push ping", connection, instanceNum, datacenter->getDatacenterId(), connection->getConnectionType()); sendingPushPing = false; } else { - TL_pong *response = (TL_pong *) message; + auto response = (TL_pong *) message; if (response->ping_id >= 2000000) { - for (std::vector>::iterator iter = proxyActiveChecks.begin(); iter != proxyActiveChecks.end(); iter++) { + for (auto iter = proxyActiveChecks.begin(); iter != proxyActiveChecks.end(); iter++) { ProxyCheckInfo *proxyCheckInfo = iter->get(); if (proxyCheckInfo->pingId == response->ping_id) { - for (requestsIter iter2 = runningRequests.begin(); iter2 != runningRequests.end(); iter2++) { + for (auto iter2 = runningRequests.begin(); iter2 != runningRequests.end(); iter2++) { Request *request = iter2->get(); if (request->requestToken == proxyCheckInfo->requestToken) { int64_t ping = llabs(getCurrentTimeMonotonicMillis() - request->startTimeMillis); @@ -1173,9 +1172,9 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } } } else if (typeInfo == typeid(TL_future_salts)) { - TL_future_salts *response = (TL_future_salts *) message; + auto response = (TL_future_salts *) message; int64_t requestMid = response->req_msg_id; - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { + for (auto iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { Request *request = iter->get(); if (request->respondsToMessageId(requestMid)) { request->onComplete(response, nullptr, connection->currentNetworkType, timeMessage); @@ -1185,16 +1184,16 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } } } else if (dynamic_cast(message)) { - DestroySessionRes *response = (DestroySessionRes *) message; + auto response = (DestroySessionRes *) message; if (LOGS_ENABLED) DEBUG_D("destroyed session 0x%" PRIx64 " (%s)", (uint64_t) response->session_id, typeInfo == typeid(TL_destroy_session_ok) ? "ok" : "not found"); } else if (typeInfo == typeid(TL_rpc_result)) { - TL_rpc_result *response = (TL_rpc_result *) message; + auto response = (TL_rpc_result *) message; int64_t resultMid = response->req_msg_id; if (resultMid == lastInvokeAfterMessageId) { lastInvokeAfterMessageId = 0; } - bool hasResult = response->result.get() != nullptr; + bool hasResult = response->result != nullptr; bool ignoreResult = false; if (hasResult) { TLObject *object = response->result.get(); @@ -1213,7 +1212,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag std::string &possibleError = migrateErrors[a]; if (error->error_message.find(possibleError) != std::string::npos) { std::string num = error->error_message.substr(possibleError.size(), error->error_message.size() - possibleError.size()); - uint32_t val = (uint32_t) atoi(num.c_str()); + auto val = (uint32_t) atoi(num.c_str()); migrateToDatacenterId = val; } } @@ -1229,7 +1228,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag uint32_t retryRequestsConnections = 0; if (!ignoreResult) { - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { + for (auto iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { Request *request = iter->get(); if (!request->respondsToMessageId(resultMid)) { continue; @@ -1244,7 +1243,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag NativeByteBuffer *unpacked_data = nullptr; TLObject *result = response->result.get(); if (typeid(*result) == typeid(TL_gzip_packed)) { - TL_gzip_packed *innerResponse = (TL_gzip_packed *) result; + auto innerResponse = (TL_gzip_packed *) result; unpacked_data = decompressGZip(innerResponse->packed_data.get()); TLObject *object = TLdeserialize(request->rawRequest, unpacked_data->limit(), unpacked_data); if (object != nullptr) { @@ -1254,7 +1253,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } } - hasResult = response->result.get() != nullptr; + hasResult = response->result != nullptr; error = hasResult ? dynamic_cast(response->result.get()) : nullptr; TL_error *error2 = hasResult ? dynamic_cast(response->result.get()) : nullptr; if (error != nullptr) { @@ -1279,11 +1278,11 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } else if ((request->requestFlags & RequestFlagFailOnServerErrors) == 0 || processEvenFailed) { if (error->error_code == 500 || error->error_code < 0) { static std::string waitFailed = "MSG_WAIT_FAILED"; + static std::string waitTimeout = "MSG_WAIT_TIMEOUT"; if (error->error_message.find(waitFailed) != std::string::npos) { - request->minStartTime = (int32_t) (getCurrentTimeMonotonicMillis() / 1000 + 1); request->startTime = 0; request->startTimeMillis = 0; - request->requestFlags &=~ RequestFlagInvokeAfter; + request->requestFlags |= RequestFlagResendAfter; } else { if (isWorkerBusy) { request->minStartTime = 0; @@ -1319,11 +1318,12 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } else if (error->error_code == 400) { static std::string waitFailed = "MSG_WAIT_FAILED"; static std::string bindFailed = "ENCRYPTED_MESSAGE_INVALID"; - if (error->error_message.find(waitFailed) != std::string::npos) { + static std::string waitTimeout = "MSG_WAIT_TIMEOUT"; + if (error->error_message.find(waitTimeout) != std::string::npos || error->error_message.find(waitFailed) != std::string::npos) { discardResponse = true; - request->minStartTime = (int32_t) (getCurrentTimeMonotonicMillis() / 1000 + 1); request->startTime = 0; request->startTimeMillis = 0; + request->requestFlags |= RequestFlagResendAfter; } else if (error->error_message.find(bindFailed) != std::string::npos && typeid(*request->rawRequest) == typeid(TL_auth_bindTempAuthKey)) { int datacenterId; if (delegate != nullptr && getDatacenterWithId(DEFAULT_DATACENTER_ID) == datacenter) { @@ -1355,9 +1355,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag if (implicitError != nullptr || error2 != nullptr) { isError = true; request->onComplete(nullptr, implicitError != nullptr ? implicitError : error2, connection->currentNetworkType, timeMessage); - if (error2 != nullptr) { - delete error2; - } + delete error2; } else { request->onComplete(response->result.get(), nullptr, connection->currentNetworkType, timeMessage); } @@ -1399,9 +1397,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag if (unpacked_data != nullptr) { unpacked_data->reuse(); } - if (implicitError != nullptr) { - delete implicitError; - } + delete implicitError; } if (!discardResponse) { @@ -1439,7 +1435,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } else if (typeInfo == typeid(TL_msgs_ack)) { } else if (typeInfo == typeid(TL_bad_msg_notification)) { - TL_bad_msg_notification *result = (TL_bad_msg_notification *) message; + auto result = (TL_bad_msg_notification *) message; if (LOGS_ENABLED) DEBUG_E("bad message notification %d for messageId 0x%" PRIx64 ", seqno %d", result->error_code, result->bad_msg_id, result->bad_msg_seqno); switch (result->error_code) { case 16: @@ -1454,7 +1450,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } if (realId != 0) { - int64_t time = (int64_t) (messageId / 4294967296.0 * 1000); + auto time = (int64_t) (messageId / 4294967296.0 * 1000); int64_t currentTime = getCurrentTimeMillis(); timeDifference = (int32_t) ((time - currentTime) / 1000 - currentPingTime / 2); } @@ -1467,8 +1463,8 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag break; } case 20: { - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (request->respondsToMessageId(result->bad_msg_id)) { if (request->completed) { break; @@ -1486,18 +1482,18 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag bool media = Connection::isMediaConnectionType(connection->getConnectionType()); requestSaltsForDatacenter(datacenter, media, connection->getConnectionType() == ConnectionTypeTemp); if (messageId != 0) { - int64_t time = (int64_t) (messageId / 4294967296.0 * 1000); + auto time = (int64_t) (messageId / 4294967296.0 * 1000); int64_t currentTime = getCurrentTimeMillis(); timeDifference = (int32_t) ((time - currentTime) / 1000 - currentPingTime / 2); lastOutgoingMessageId = (messageId > lastOutgoingMessageId ? messageId : lastOutgoingMessageId); } if ((connection->getConnectionType() & ConnectionTypeDownload) == 0 || !datacenter->containsServerSalt(messageSalt, media)) { - TL_bad_server_salt *response = (TL_bad_server_salt *) message; + auto response = (TL_bad_server_salt *) message; int64_t resultMid = response->bad_msg_id; if (resultMid != 0) { bool beginHandshake = false; - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (!beginHandshake && request->datacenterId == datacenter->getDatacenterId() && typeid(*request->rawRequest) == typeid(TL_auth_bindTempAuthKey) && request->respondsToMessageId(response->bad_msg_id)) { beginHandshake = true; } @@ -1517,7 +1513,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag datacenter->clearServerSalts(media); - std::unique_ptr salt = std::unique_ptr(new TL_future_salt()); + std::unique_ptr salt = std::make_unique(); salt->valid_until = salt->valid_since = getCurrentTime(); salt->valid_until += 30 * 60; salt->salt = messageSalt; @@ -1529,15 +1525,15 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } } } else if (typeInfo == typeid(MsgsStateInfo)) { - MsgsStateInfo *response = (MsgsStateInfo *) message; + auto response = (MsgsStateInfo *) message; if (LOGS_ENABLED) DEBUG_D("connection(%p, account%u, dc%u, type %d) got %s for messageId 0x%" PRIx64, connection, instanceNum, datacenter->getDatacenterId(), connection->getConnectionType(), typeInfo.name(), response->req_msg_id); - std::map::iterator mIter = resendRequests.find(response->req_msg_id); + auto mIter = resendRequests.find(response->req_msg_id); if (mIter != resendRequests.end()) { if (LOGS_ENABLED) DEBUG_D("found resend for messageId 0x%" PRIx64, mIter->second); connection->addMessageToConfirm(mIter->second); - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (request->respondsToMessageId(mIter->second)) { if (request->completed) { break; @@ -1549,21 +1545,21 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag resendRequests.erase(mIter); } } else if (dynamic_cast(message)) { - MsgDetailedInfo *response = (MsgDetailedInfo *) message; + auto response = (MsgDetailedInfo *) message; bool requestResend = false; bool confirm = true; if (LOGS_ENABLED) DEBUG_D("connection(%p, account%u, dc%u, type %d) got %s for messageId 0x%" PRIx64, connection, instanceNum, datacenter->getDatacenterId(), connection->getConnectionType(), typeInfo.name(), response->msg_id); if (typeInfo == typeid(TL_msg_detailed_info)) { - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (request->respondsToMessageId(response->msg_id)) { if (request->completed) { break; } if (LOGS_ENABLED) DEBUG_D("got TL_msg_detailed_info for rpc request %p - %s", request->rawRequest, typeid(*request->rawRequest).name()); - int32_t currentTime = (int32_t) (getCurrentTimeMonotonicMillis() / 1000); + auto currentTime = (int32_t) (getCurrentTimeMonotonicMillis() / 1000); if (request->lastResendTime == 0 || abs(currentTime - request->lastResendTime) >= 60) { request->lastResendTime = currentTime; requestResend = true; @@ -1580,10 +1576,10 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag } if (requestResend) { - TL_msg_resend_req *request = new TL_msg_resend_req(); + auto request = new TL_msg_resend_req(); request->msg_ids.push_back(response->answer_msg_id); - NetworkMessage *networkMessage = new NetworkMessage(); - networkMessage->message = std::unique_ptr(new TL_message()); + auto networkMessage = new NetworkMessage(); + networkMessage->message = std::make_unique(); networkMessage->message->msg_id = generateMessageId(); networkMessage->message->bytes = request->getObjectSize(); networkMessage->message->body = std::unique_ptr(request); @@ -1598,7 +1594,7 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag connection->addMessageToConfirm(response->answer_msg_id); } } else if (typeInfo == typeid(TL_gzip_packed)) { - TL_gzip_packed *response = (TL_gzip_packed *) message; + auto response = (TL_gzip_packed *) message; NativeByteBuffer *data = decompressGZip(response->packed_data.get()); TLObject *object = TLdeserialize(getRequestWithMessageId(messageId), data->limit(), data); if (object != nullptr) { @@ -1650,7 +1646,7 @@ void ConnectionsManager::sendPing(Datacenter *datacenter, bool usePushConnection if (connection == nullptr || (!usePushConnection && connection->getConnectionToken() == 0)) { return; } - TL_ping_delay_disconnect *request = new TL_ping_delay_disconnect(); + auto request = new TL_ping_delay_disconnect(); request->ping_id = ++lastPingId; if (usePushConnection) { request->disconnect_delay = 60 * 7; @@ -1659,8 +1655,8 @@ void ConnectionsManager::sendPing(Datacenter *datacenter, bool usePushConnection pingTime = (int32_t) (getCurrentTimeMonotonicMillis() / 1000); } - NetworkMessage *networkMessage = new NetworkMessage(); - networkMessage->message = std::unique_ptr(new TL_message()); + auto networkMessage = new NetworkMessage(); + networkMessage->message = std::make_unique(); networkMessage->message->msg_id = generateMessageId(); networkMessage->message->bytes = request->getObjectSize(); networkMessage->message->body = std::unique_ptr(request); @@ -1752,7 +1748,7 @@ void ConnectionsManager::attachConnection(ConnectionSocket *connection) { } void ConnectionsManager::detachConnection(ConnectionSocket *connection) { - std::vector::iterator iter = std::find(activeConnections.begin(), activeConnections.end(), connection); + auto iter = std::find(activeConnections.begin(), activeConnections.end(), connection); if (iter != activeConnections.end()) { activeConnections.erase(iter); } @@ -1764,7 +1760,7 @@ int32_t ConnectionsManager::sendRequestInternal(TLObject *object, onCompleteFunc delete object; return 0; } - Request *request = new Request(instanceNum, lastRequestToken++, connetionType, flags, datacenterId, onComplete, onQuickAck, nullptr); + auto request = new Request(instanceNum, lastRequestToken++, connetionType, flags, datacenterId, onComplete, onQuickAck, nullptr); request->rawRequest = object; request->rpcRequest = wrapInLayer(object, getDatacenterWithId(datacenterId), request); requestsQueue.push_back(std::unique_ptr(request)); @@ -1789,7 +1785,7 @@ int32_t ConnectionsManager::sendRequest(TLObject *object, onCompleteFunc onCompl requestToken = lastRequestToken++; } scheduleTask([&, requestToken, object, onComplete, onQuickAck, flags, datacenterId, connetionType, immediate] { - Request *request = new Request(instanceNum, requestToken, connetionType, flags, datacenterId, onComplete, onQuickAck, nullptr); + auto request = new Request(instanceNum, requestToken, connetionType, flags, datacenterId, onComplete, onQuickAck, nullptr); request->rawRequest = object; request->rpcRequest = wrapInLayer(object, getDatacenterWithId(datacenterId), request); requestsQueue.push_back(std::unique_ptr(request)); @@ -1826,7 +1822,7 @@ void ConnectionsManager::sendRequest(TLObject *object, onCompleteFunc onComplete } scheduleTask([&, requestToken, object, onComplete, onQuickAck, onWriteToSocket, flags, datacenterId, connetionType, immediate, ptr1, ptr2, ptr3] { if (LOGS_ENABLED) DEBUG_D("send request %p - %s", object, typeid(*object).name()); - Request *request = new Request(instanceNum, requestToken, connetionType, flags, datacenterId, onComplete, onQuickAck, onWriteToSocket); + auto request = new Request(instanceNum, requestToken, connetionType, flags, datacenterId, onComplete, onQuickAck, onWriteToSocket); request->rawRequest = object; request->ptr1 = ptr1; request->ptr2 = ptr2; @@ -1843,13 +1839,13 @@ void ConnectionsManager::sendRequest(TLObject *object, onCompleteFunc onComplete void ConnectionsManager::cancelRequestsForGuid(int32_t guid) { scheduleTask([&, guid] { - std::map>::iterator iter = requestsByGuids.find(guid); + auto iter = requestsByGuids.find(guid); if (iter != requestsByGuids.end()) { std::vector &requests = iter->second; size_t count = requests.size(); for (uint32_t a = 0; a < count; a++) { cancelRequestInternal(requests[a], 0, true, false); - std::map::iterator iter2 = guidsByRequests.find(requests[a]); + auto iter2 = guidsByRequests.find(requests[a]); if (iter2 != guidsByRequests.end()) { guidsByRequests.erase(iter2); } @@ -1861,7 +1857,7 @@ void ConnectionsManager::cancelRequestsForGuid(int32_t guid) { void ConnectionsManager::bindRequestToGuid(int32_t requestToken, int32_t guid) { scheduleTask([&, requestToken, guid] { - std::map>::iterator iter = requestsByGuids.find(guid); + auto iter = requestsByGuids.find(guid); if (iter != requestsByGuids.end()) { iter->second.push_back(requestToken); } else { @@ -1905,11 +1901,11 @@ void ConnectionsManager::switchBackend() { } void ConnectionsManager::removeRequestFromGuid(int32_t requestToken) { - std::map::iterator iter2 = guidsByRequests.find(requestToken); + auto iter2 = guidsByRequests.find(requestToken); if (iter2 != guidsByRequests.end()) { - std::map>::iterator iter = requestsByGuids.find(iter2->first); + auto iter = requestsByGuids.find(iter2->first); if (iter != requestsByGuids.end()) { - std::vector::iterator iter3 = std::find(iter->second.begin(), iter->second.end(), iter->first); + auto iter3 = std::find(iter->second.begin(), iter->second.end(), iter->first); if (iter3 != iter->second.end()) { iter->second.erase(iter3); if (iter->second.empty()) { @@ -1922,9 +1918,9 @@ void ConnectionsManager::removeRequestFromGuid(int32_t requestToken) { } bool ConnectionsManager::cancelRequestInternal(int32_t token, int64_t messageId, bool notifyServer, bool removeFromClass) { - for (requestsIter iter = requestsQueue.begin(); iter != requestsQueue.end(); iter++) { + for (auto iter = requestsQueue.begin(); iter != requestsQueue.end(); iter++) { Request *request = iter->get(); - if (token != 0 && request->requestToken == token || messageId != 0 && request->respondsToMessageId(messageId)) { + if ((token != 0 && request->requestToken == token) || (messageId != 0 && request->respondsToMessageId(messageId))) { request->cancelled = true; if (LOGS_ENABLED) DEBUG_D("cancelled queued rpc request %p - %s", request->rawRequest, typeid(*request->rawRequest).name()); requestsQueue.erase(iter); @@ -1935,11 +1931,11 @@ bool ConnectionsManager::cancelRequestInternal(int32_t token, int64_t messageId, } } - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { + for (auto iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { Request *request = iter->get(); - if (token != 0 && request->requestToken == token || messageId != 0 && request->respondsToMessageId(messageId)) { + if ((token != 0 && request->requestToken == token) || (messageId != 0 && request->respondsToMessageId(messageId))) { if (notifyServer) { - TL_rpc_drop_answer *dropAnswer = new TL_rpc_drop_answer(); + auto dropAnswer = new TL_rpc_drop_answer(); dropAnswer->req_msg_id = request->messageId; sendRequest(dropAnswer, nullptr, nullptr, RequestFlagEnableUnauthorized | RequestFlagWithoutLogin | RequestFlagFailOnServerErrors, request->datacenterId, request->connectionType, true); } @@ -2019,7 +2015,7 @@ void ConnectionsManager::sendMessagesToConnection(std::vector>::iterator iter = quickAckIdToRequestIds.find(quickAckId); + auto iter = quickAckIdToRequestIds.find(quickAckId); if (iter == quickAckIdToRequestIds.end()) { quickAckIdToRequestIds[quickAckId] = requestIds; } else { @@ -2067,10 +2063,10 @@ void ConnectionsManager::requestSaltsForDatacenter(Datacenter *datacenter, bool connectionType = ConnectionTypeGeneric; } requestingSaltsForDc.push_back(id); - TL_get_future_salts *request = new TL_get_future_salts(); + auto request = new TL_get_future_salts(); request->num = 32; sendRequest(request, [&, datacenter, id, media](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { - std::vector::iterator iter = std::find(requestingSaltsForDc.begin(), requestingSaltsForDc.end(), id); + auto iter = std::find(requestingSaltsForDc.begin(), requestingSaltsForDc.end(), id); if (iter != requestingSaltsForDc.end()) { requestingSaltsForDc.erase(iter); } @@ -2082,13 +2078,13 @@ void ConnectionsManager::requestSaltsForDatacenter(Datacenter *datacenter, bool } void ConnectionsManager::clearRequestsForDatacenter(Datacenter *datacenter, HandshakeType type) { - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end(); iter++) { - Request *request = iter->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); Datacenter *requestDatacenter = getDatacenterWithId(request->datacenterId); if (requestDatacenter->getDatacenterId() != datacenter->getDatacenterId()) { continue; } - if (type == HandshakeTypePerm || type == HandshakeTypeAll || type == HandshakeTypeMediaTemp && request->isMediaRequest() || type == HandshakeTypeTemp && !request->isMediaRequest()) { + if (type == HandshakeTypePerm || type == HandshakeTypeAll || (type == HandshakeTypeMediaTemp && request->isMediaRequest()) || (type == HandshakeTypeTemp && !request->isMediaRequest())) { request->clear(true); } } @@ -2100,7 +2096,7 @@ void ConnectionsManager::registerForInternalPushUpdates() { } registeredForInternalPush = false; registeringForPush = true; - TL_account_registerDevice *request = new TL_account_registerDevice(); + auto request = new TL_account_registerDevice(); request->token_type = 7; request->token = to_string_uint64((uint64_t) pushSessionId); @@ -2119,7 +2115,7 @@ void ConnectionsManager::registerForInternalPushUpdates() { inline void addMessageToDatacenter(uint32_t datacenterId, NetworkMessage *networkMessage, std::map>> &messagesToDatacenters) { - std::map>>::iterator iter = messagesToDatacenters.find(datacenterId); + auto iter = messagesToDatacenters.find(datacenterId); if (iter == messagesToDatacenters.end()) { std::vector> &array = messagesToDatacenters[datacenterId] = std::vector>(); array.push_back(std::unique_ptr(networkMessage)); @@ -2138,11 +2134,13 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t downloadRunningRequestCount.clear(); int64_t currentTimeMillis = getCurrentTimeMonotonicMillis(); - int32_t currentTime = (int32_t) (currentTimeMillis / 1000); + auto currentTime = (int32_t) (currentTimeMillis / 1000); uint32_t genericRunningRequestCount = 0; uint32_t uploadRunningRequestCount = 0; + bool hasInvokeAfterMessage = false; + bool hasInvokeWaitMessage = false; - for (requestsIter iter = runningRequests.begin(); iter != runningRequests.end();) { + for (auto iter = runningRequests.begin(); iter != runningRequests.end();) { Request *request = iter->get(); const std::type_info &typeInfo = typeid(*request->rawRequest); @@ -2154,6 +2152,16 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t } datacenterId = currentDatacenterId; } + if ((request->requestFlags & RequestFlagResendAfter) != 0) { + hasInvokeWaitMessage = true; + if (hasInvokeAfterMessage) { + iter++; + continue; + } + } + if (!hasInvokeAfterMessage && (request->requestFlags & RequestFlagInvokeAfter) != 0) { + hasInvokeAfterMessage = true; + } switch (request->connectionType & 0x0000ffff) { case ConnectionTypeGeneric: @@ -2161,7 +2169,7 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t break; case ConnectionTypeDownload: { uint32_t currentCount; - std::map::iterator dcIter = downloadRunningRequestCount.find(datacenterId); + auto dcIter = downloadRunningRequestCount.find(datacenterId); if (dcIter != downloadRunningRequestCount.end()) { currentCount = dcIter->second; } else { @@ -2272,6 +2280,7 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t if (request->connectionToken != 0 && request->connectionToken != connection->getConnectionToken()) { request->lastResendTime = 0; + request->isResending = true; } request->retryCount++; @@ -2288,7 +2297,7 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t } if (request->retryCount >= retryMax) { if (LOGS_ENABLED) DEBUG_E("timed out %s", typeInfo.name()); - TL_error *error = new TL_error(); + auto error = new TL_error(); error->code = -123; error->text = "RETRY_LIMIT"; request->onComplete(nullptr, error, connection->currentNetworkType, 0); @@ -2311,14 +2320,15 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t request->startTime = currentTime; request->startTimeMillis = currentTimeMillis; - NetworkMessage *networkMessage = new NetworkMessage(); - networkMessage->message = std::unique_ptr(new TL_message()); + auto networkMessage = new NetworkMessage(); + networkMessage->forceContainer = request->isResending; + networkMessage->message = std::make_unique(); networkMessage->message->msg_id = request->messageId; networkMessage->message->bytes = request->serializedLength; networkMessage->message->outgoingBody = request->getRpcRequest(); networkMessage->message->seqno = request->messageSeqNo; networkMessage->requestId = request->requestToken; - networkMessage->invokeAfter = (request->requestFlags & RequestFlagInvokeAfter) != 0; + networkMessage->invokeAfter = (request->requestFlags & RequestFlagInvokeAfter) != 0 && (request->requestFlags & RequestFlagResendAfter) == 0; networkMessage->needQuickAck = (request->requestFlags & RequestFlagNeedQuickAck) != 0; request->connectionToken = connection->getConnectionToken(); @@ -2358,15 +2368,15 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t if (defaultDatacenter != nullptr) { genericConnection = defaultDatacenter->getGenericConnection(true, 0); if (genericConnection != nullptr && !sessionsToDestroy.empty() && genericConnection->getConnectionToken() != 0) { - std::vector::iterator iter = sessionsToDestroy.begin(); + auto iter = sessionsToDestroy.begin(); if (abs(currentTime - lastDestroySessionRequestTime) > 2) { lastDestroySessionRequestTime = currentTime; - TL_destroy_session *request = new TL_destroy_session(); + auto request = new TL_destroy_session(); request->session_id = *iter; - NetworkMessage *networkMessage = new NetworkMessage(); - networkMessage->message = std::unique_ptr(new TL_message()); + auto networkMessage = new NetworkMessage(); + networkMessage->message = std::make_unique(); networkMessage->message->msg_id = generateMessageId(); networkMessage->message->bytes = request->getObjectSize(); networkMessage->message->body = std::unique_ptr(request); @@ -2377,12 +2387,22 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t } } - for (requestsIter iter = requestsQueue.begin(); iter != requestsQueue.end();) { + for (auto iter = requestsQueue.begin(); iter != requestsQueue.end();) { Request *request = iter->get(); if (request->cancelled) { iter = requestsQueue.erase(iter); continue; } + if (hasInvokeWaitMessage && (request->requestFlags & RequestFlagInvokeAfter) != 0 && (request->requestFlags & RequestFlagResendAfter) == 0) { + request->requestFlags |= RequestFlagResendAfter; + } + if (hasInvokeAfterMessage && (request->requestFlags & RequestFlagResendAfter) != 0) { + iter++; + continue; + } + if (!hasInvokeAfterMessage && (request->requestFlags & RequestFlagInvokeAfter) != 0) { + hasInvokeAfterMessage = true; + } uint32_t datacenterId = request->datacenterId; if (datacenterId == DEFAULT_DATACENTER_ID) { @@ -2410,11 +2430,11 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t } if (requestStartTime != 0 && abs(currentTime - requestStartTime) >= timeout) { std::vector allDc; - for (std::map::iterator iter2 = datacenters.begin(); iter2 != datacenters.end(); iter2++) { - if (iter2->first == datacenterId || iter2->second->isCdnDatacenter) { + for (auto & datacenter : datacenters) { + if (datacenter.first == datacenterId || datacenter.second->isCdnDatacenter) { continue; } - allDc.push_back(iter2->first); + allDc.push_back(datacenter.first); } uint8_t index; RAND_bytes(&index, 1); @@ -2475,7 +2495,7 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t break; case ConnectionTypeDownload: { uint32_t currentCount; - std::map::iterator dcIter = downloadRunningRequestCount.find(datacenterId); + auto dcIter = downloadRunningRequestCount.find(datacenterId); if (dcIter != downloadRunningRequestCount.end()) { currentCount = dcIter->second; } else { @@ -2519,7 +2539,7 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t request->rpcRequest->serializeToStream(original); NativeByteBuffer *buffer = compressGZip(original); if (buffer != nullptr) { - TL_gzip_packed *packed = new TL_gzip_packed(); + auto packed = new TL_gzip_packed(); packed->originalRequest = std::move(request->rpcRequest); packed->packed_data_to_send = buffer; request->rpcRequest = std::unique_ptr(packed); @@ -2534,14 +2554,15 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t request->startTimeMillis = currentTimeMillis; request->connectionToken = connection->getConnectionToken(); - NetworkMessage *networkMessage = new NetworkMessage(); - networkMessage->message = std::unique_ptr(new TL_message()); + auto networkMessage = new NetworkMessage(); + networkMessage->message = std::make_unique(); + networkMessage->forceContainer = request->isResending; networkMessage->message->msg_id = request->messageId; networkMessage->message->bytes = request->serializedLength; networkMessage->message->outgoingBody = request->getRpcRequest(); networkMessage->message->seqno = request->messageSeqNo; networkMessage->requestId = request->requestToken; - networkMessage->invokeAfter = (request->requestFlags & RequestFlagInvokeAfter) != 0; + networkMessage->invokeAfter = (request->requestFlags & RequestFlagInvokeAfter) != 0 && (request->requestFlags & RequestFlagResendAfter) == 0; networkMessage->needQuickAck = (request->requestFlags & RequestFlagNeedQuickAck) != 0; if (!hasPendingRequestsForConnection(connection)) { @@ -2579,9 +2600,9 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t iter = requestsQueue.erase(iter); } - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - Datacenter *datacenter = iter->second; - std::map>>::iterator iter2 = genericMessagesToDatacenters.find(datacenter->getDatacenterId()); + for (auto & iter : datacenters) { + Datacenter *datacenter = iter.second; + auto iter2 = genericMessagesToDatacenters.find(datacenter->getDatacenterId()); if (iter2 == genericMessagesToDatacenters.end()) { Connection *connection = datacenter->getGenericConnection(false, 1); if (connection != nullptr && connection->getConnectionToken() != 0 && connection->hasMessagesToConfirm()) { @@ -2606,13 +2627,13 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t } } - for (std::map>>::iterator iter = genericMessagesToDatacenters.begin(); iter != genericMessagesToDatacenters.end(); iter++) { - Datacenter *datacenter = getDatacenterWithId(iter->first); + for (auto & genericMessagesToDatacenter : genericMessagesToDatacenters) { + Datacenter *datacenter = getDatacenterWithId(genericMessagesToDatacenter.first); if (datacenter != nullptr) { bool scannedPreviousRequests = false; bool needQuickAck = false; int64_t lastSentMessageRpcId = 0; - std::vector> &array = iter->second; + std::vector> &array = genericMessagesToDatacenter.second; size_t count = array.size(); for (uint32_t b = 0; b < count; b++) { NetworkMessage *networkMessage = array[b].get(); @@ -2633,13 +2654,13 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t int64_t maxRequestId = 0; if (lastInvokeAfterMessageId != 0) { - int64_t timeMessage = (int64_t) (lastInvokeAfterMessageId / 4294967296.0); + auto timeMessage = (int64_t) (lastInvokeAfterMessageId / 4294967296.0); if (getCurrentTime() - timeMessage <= 5) { maxRequestId = lastInvokeAfterMessageId; } } - for (requestsIter iter2 = runningRequests.begin(); iter2 != runningRequests.end(); iter2++) { - Request *request = iter2->get(); + for (auto & runningRequest : runningRequests) { + Request *request = runningRequest.get(); if (request->requestFlags & RequestFlagInvokeAfter) { if (request->messageId > maxRequestId && std::find(currentRequests.begin(), currentRequests.end(), request->messageId) == currentRequests.end()) { maxRequestId = request->messageId; @@ -2653,7 +2674,7 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t TL_message *message = networkMessage->message.get(); if (lastSentMessageRpcId != 0 && lastSentMessageRpcId != message->msg_id) { - TL_invokeAfterMsg *request = new TL_invokeAfterMsg(); + auto request = new TL_invokeAfterMsg(); request->msg_id = lastSentMessageRpcId; if (message->outgoingBody != nullptr) { if (LOGS_ENABLED) DEBUG_D("wrap outgoingBody(%p, %s) to TL_invokeAfterMsg, token = %d, after 0x%" PRIx64, message->outgoingBody, typeid(*message->outgoingBody).name(), networkMessage->requestId, request->msg_id); @@ -2676,24 +2697,24 @@ void ConnectionsManager::processRequestQueue(uint32_t connectionTypes, uint32_t } } - for (std::map>>::iterator iter = tempMessagesToDatacenters.begin(); iter != tempMessagesToDatacenters.end(); iter++) { - Datacenter *datacenter = getDatacenterWithId(iter->first); + for (auto & tempMessagesToDatacenter : tempMessagesToDatacenters) { + Datacenter *datacenter = getDatacenterWithId(tempMessagesToDatacenter.first); if (datacenter != nullptr) { - std::vector> &array = iter->second; + std::vector> &array = tempMessagesToDatacenter.second; sendMessagesToConnectionWithConfirmation(array, datacenter->getTempConnection(true), false); } } - for (std::map>>::iterator iter = genericMediaMessagesToDatacenters.begin(); iter != genericMediaMessagesToDatacenters.end(); iter++) { - Datacenter *datacenter = getDatacenterWithId(iter->first); + for (auto & genericMediaMessagesToDatacenter : genericMediaMessagesToDatacenters) { + Datacenter *datacenter = getDatacenterWithId(genericMediaMessagesToDatacenter.first); if (datacenter != nullptr) { - std::vector> &array = iter->second; + std::vector> &array = genericMediaMessagesToDatacenter.second; sendMessagesToConnectionWithConfirmation(array, datacenter->getGenericMediaConnection(true, 1), false); } } if (connectionTypes == ConnectionTypeGeneric && dc == currentDatacenterId) { - std::map>>::iterator iter2 = genericMessagesToDatacenters.find(currentDatacenterId); + auto iter2 = genericMessagesToDatacenters.find(currentDatacenterId); if (iter2 == genericMessagesToDatacenters.end()) { sendPing(getDatacenterWithId(currentDatacenterId), false); } @@ -2728,7 +2749,7 @@ Datacenter *ConnectionsManager::getDatacenterWithId(uint32_t datacenterId) { if (datacenterId == DEFAULT_DATACENTER_ID) { return datacenters[currentDatacenterId]; } - std::map::iterator iter = datacenters.find(datacenterId); + auto iter = datacenters.find(datacenterId); return iter != datacenters.end() ? iter->second : nullptr; } @@ -2744,7 +2765,7 @@ std::unique_ptr ConnectionsManager::wrapInLayer(TLObject *object, Data } else { baseRequest->isInitRequest = true; } - initConnection *request = new initConnection(); + auto request = new initConnection(); if (delegate != nullptr) { request->flags = delegate->getInitFlags(instanceNum); } else { @@ -2758,31 +2779,31 @@ std::unique_ptr ConnectionsManager::wrapInLayer(TLObject *object, Data request->system_lang_code = currentSystemLangCode; - TL_jsonObject *jsonObject = new TL_jsonObject(); + auto jsonObject = new TL_jsonObject(); request->params = std::unique_ptr(jsonObject); if (!currentRegId.empty()) { - TL_jsonObjectValue *objectValue = new TL_jsonObjectValue(); + auto objectValue = new TL_jsonObjectValue(); jsonObject->value.push_back(std::unique_ptr(objectValue)); - TL_jsonString *jsonString = new TL_jsonString(); + auto jsonString = new TL_jsonString(); jsonString->value = currentRegId; objectValue->key = "device_token"; objectValue->value = std::unique_ptr(jsonString); } if (!certFingerprint.empty()) { - TL_jsonObjectValue *objectValue = new TL_jsonObjectValue(); + auto objectValue = new TL_jsonObjectValue(); jsonObject->value.push_back(std::unique_ptr(objectValue)); - TL_jsonString *jsonString = new TL_jsonString(); + auto jsonString = new TL_jsonString(); jsonString->value = certFingerprint; objectValue->key = "data"; objectValue->value = std::unique_ptr(jsonString); } - TL_jsonObjectValue *objectValue = new TL_jsonObjectValue(); + auto objectValue = new TL_jsonObjectValue(); jsonObject->value.push_back(std::unique_ptr(objectValue)); - TL_jsonString *jsonString = new TL_jsonString(); + auto jsonString = new TL_jsonString(); jsonString->value = installer; objectValue->key = "installer"; objectValue->value = std::unique_ptr(jsonString); @@ -2797,7 +2818,7 @@ std::unique_ptr ConnectionsManager::wrapInLayer(TLObject *object, Data objectValue = new TL_jsonObjectValue(); jsonObject->value.push_back(std::unique_ptr(objectValue)); - TL_jsonNumber *jsonNumber = new TL_jsonNumber(); + auto jsonNumber = new TL_jsonNumber(); jsonNumber->value = currentDeviceTimezone; objectValue->key = "tz_offset"; objectValue->value = std::unique_ptr(jsonNumber); @@ -2806,7 +2827,7 @@ std::unique_ptr ConnectionsManager::wrapInLayer(TLObject *object, Data if (!proxyAddress.empty() && !proxySecret.empty()) { request->flags |= 1; - request->proxy = std::unique_ptr(new TL_inputClientProxy()); + request->proxy = std::make_unique(); if (proxyAddress == "127.0.0.1") { request->proxy->address = "neko.services"; } else { @@ -2834,7 +2855,7 @@ std::unique_ptr ConnectionsManager::wrapInLayer(TLObject *object, Data if (request->system_version.empty()) { request->system_version = "n/a"; } - invokeWithLayer *request2 = new invokeWithLayer(); + auto request2 = new invokeWithLayer(); request2->layer = currentLayer; request2->query = std::unique_ptr(request); if (LOGS_ENABLED) DEBUG_D("wrap in layer %s, flags = %d", typeid(*object).name(), request->flags); @@ -2906,8 +2927,8 @@ std::string base64UrlDecode(std::string base64) { inline std::string decodeSecret(std::string secret) { bool allHex = true; - for (size_t i = 0; i < secret.size(); i++) { - if (!(secret[i] >= '0' && secret[i] <= '9' || secret[i] >= 'a' && secret[i] <= 'f' || secret[i] >= 'A' && secret[i] <= 'F')) { + for (char i : secret) { + if (!((i >= '0' && i <= '9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F'))) { allHex = false; break; } @@ -2939,14 +2960,14 @@ void ConnectionsManager::updateDcSettings(uint32_t dcNum, bool workaround) { updatingDcStartTime = (int32_t) (getCurrentTimeMonotonicMillis() / 1000); } - TL_help_getConfig *request = new TL_help_getConfig(); + auto request = new TL_help_getConfig(); sendRequest(request, [&, workaround](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { - if (!workaround && !updatingDcSettings || workaround && !updatingDcSettingsWorkaround) { + if ((!workaround && !updatingDcSettings) || (workaround && !updatingDcSettingsWorkaround)) { return; } if (response != nullptr) { - TL_config *config = (TL_config *) response; + auto config = (TL_config *) response; clientBlocked = (config->flags & 256) != 0; if (!workaround) { int32_t updateIn = config->expires - getCurrentTime(); @@ -2981,8 +3002,8 @@ void ConnectionsManager::updateDcSettings(uint32_t dcNum, bool workaround) { addresses = &addressesIpv4; } } - for (std::vector::iterator iter = addresses->begin(); iter != addresses->end(); iter++) { - if (iter->address == dcOption->ip_address && iter->port == dcOption->port) { + for (auto & addresse : *addresses) { + if (addresse.address == dcOption->ip_address && addresse.port == dcOption->port) { return; } } @@ -2999,7 +3020,7 @@ void ConnectionsManager::updateDcSettings(uint32_t dcNum, bool workaround) { size_t count = config->dc_options.size(); for (uint32_t a = 0; a < count; a++) { TL_dcOption *dcOption = config->dc_options[a].get(); - std::map>::iterator iter = map.find((uint32_t) dcOption->id); + auto iter = map.find((uint32_t) dcOption->id); DatacenterInfo *info; if (iter == map.end()) { map[dcOption->id] = std::unique_ptr(info = new DatacenterInfo); @@ -3010,20 +3031,20 @@ void ConnectionsManager::updateDcSettings(uint32_t dcNum, bool workaround) { } if (!map.empty()) { - for (std::map>::iterator iter = map.begin(); iter != map.end(); iter++) { - Datacenter *datacenter = getDatacenterWithId(iter->first); - DatacenterInfo *info = iter->second.get(); + for (auto & iter : map) { + Datacenter *datacenter = getDatacenterWithId(iter.first); + DatacenterInfo *info = iter.second.get(); if (datacenter == nullptr) { - datacenter = new Datacenter(instanceNum, iter->first); - datacenters[iter->first] = datacenter; + datacenter = new Datacenter(instanceNum, iter.first); + datacenters[iter.first] = datacenter; } datacenter->replaceAddresses(info->addressesIpv4, info->isCdn ? 8 : 0); datacenter->replaceAddresses(info->addressesIpv6, info->isCdn ? 9 : 1); datacenter->replaceAddresses(info->addressesIpv4Download, info->isCdn ? 10 : 2); datacenter->replaceAddresses(info->addressesIpv6Download, info->isCdn ? 11 : 3); - if (iter->first == movingToDatacenterId) { + if (iter.first == movingToDatacenterId) { movingToDatacenterId = DEFAULT_DATACENTER_ID; - moveToDatacenter(iter->first); + moveToDatacenter(iter.first); } } saveConfig(); @@ -3053,7 +3074,7 @@ void ConnectionsManager::moveToDatacenter(uint32_t datacenterId) { clearRequestsForDatacenter(currentDatacenter, HandshakeTypeAll); if (currentUserId) { - TL_auth_exportAuthorization *request = new TL_auth_exportAuthorization(); + auto request = new TL_auth_exportAuthorization(); request->dc_id = datacenterId; sendRequest(request, [&, datacenterId](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { if (error == nullptr) { @@ -3084,7 +3105,7 @@ void ConnectionsManager::authorizeOnMovingDatacenter() { } if (movingAuthorization != nullptr) { - TL_auth_importAuthorization *request = new TL_auth_importAuthorization(); + auto request = new TL_auth_importAuthorization(); request->id = currentUserId; request->bytes = std::move(movingAuthorization); sendRequest(request, [&](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { @@ -3114,7 +3135,7 @@ void ConnectionsManager::applyDatacenterAddress(uint32_t datacenterId, std::stri Datacenter *datacenter = getDatacenterWithId(datacenterId); if (datacenter != nullptr) { std::vector addresses; - addresses.push_back(TcpAddress(ipAddress, port, 0, "")); + addresses.emplace_back(ipAddress, port, 0, ""); datacenter->suspendConnections(true); datacenter->replaceAddresses(addresses, 0); datacenter->resetAddressAndPortNum(); @@ -3160,7 +3181,7 @@ inline bool checkPhoneByPrefixesRules(std::string phone, std::string rules) { std::stringstream ss(rules); std::string prefix; while (std::getline(ss, prefix, ',')) { - if (prefix == "") { + if (prefix.empty()) { found = true; } else if (prefix[0] == '+' && phone.find(prefix.substr(1)) == 0) { found = true; @@ -3185,24 +3206,24 @@ void ConnectionsManager::applyDnsConfig(NativeByteBuffer *buffer, std::string ph timeDifference += (realDate - currentDate); requestingSecondAddressByTlsHashMismatch = false; } - for (std::vector>::iterator iter = config->rules.begin(); iter != config->rules.end(); iter++) { - TL_accessPointRule *rule = iter->get(); + for (auto & iter : config->rules) { + TL_accessPointRule *rule = iter.get(); if (!checkPhoneByPrefixesRules(phone, rule->phone_prefix_rules)) { continue; } Datacenter *datacenter = getDatacenterWithId(rule->dc_id); if (datacenter != nullptr) { std::vector addresses; - for (std::vector>::iterator iter2 = rule->ips.begin(); iter2 != rule->ips.end(); iter2++) { + for (auto iter2 = rule->ips.begin(); iter2 != rule->ips.end(); iter2++) { IpPort *port = iter2->get(); const std::type_info &typeInfo = typeid(*port); if (typeInfo == typeid(TL_ipPort)) { - TL_ipPort *ipPort = (TL_ipPort *) port; - addresses.push_back(TcpAddress(ipPort->ipv4, ipPort->port, 0, "")); + auto ipPort = (TL_ipPort *) port; + addresses.emplace_back(ipPort->ipv4, ipPort->port, 0, ""); if (LOGS_ENABLED) DEBUG_D("got address %s and port %d for dc%d", ipPort->ipv4.c_str(), ipPort->port, rule->dc_id); } else if (typeInfo == typeid(TL_ipPortSecret)) { - TL_ipPortSecret *ipPort = (TL_ipPortSecret *) port; - addresses.push_back(TcpAddress(ipPort->ipv4, ipPort->port, 0, std::string((const char *) ipPort->secret->bytes, ipPort->secret->length))); + auto ipPort = (TL_ipPortSecret *) port; + addresses.emplace_back(ipPort->ipv4, ipPort->port, 0, std::string((const char *) ipPort->secret->bytes, ipPort->secret->length)); if (LOGS_ENABLED) DEBUG_D("got address %s and port %d for dc%d with secret", ipPort->ipv4.c_str(), ipPort->port, rule->dc_id); } } @@ -3284,8 +3305,8 @@ void ConnectionsManager::init(uint32_t version, int32_t layer, int32_t apiId, st bool needLoadConfig = false; if (systemLangCode.compare(lastInitSystemLangcode) != 0) { lastInitSystemLangcode = systemLangCode; - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - iter->second->resetInitVersion(); + for (auto & datacenter : datacenters) { + datacenter.second->resetInitVersion(); } needLoadConfig = true; saveConfig(); @@ -3297,7 +3318,7 @@ void ConnectionsManager::init(uint32_t version, int32_t layer, int32_t apiId, st } } - pthread_create(&networkThread, NULL, (ConnectionsManager::ThreadProc), this); + pthread_create(&networkThread, nullptr, (ConnectionsManager::ThreadProc), this); if (needLoadConfig) { updateDcSettings(0, false); @@ -3332,8 +3353,8 @@ void ConnectionsManager::setProxySettings(std::string address, uint16_t port, st } } if (reconnect) { - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - iter->second->suspendConnections(true); + for (auto & datacenter : datacenters) { + datacenter.second->suspendConnections(true); } Datacenter *datacenter = getDatacenterWithId(DEFAULT_DATACENTER_ID); if (datacenter != nullptr && datacenter->isHandshakingAny()) { @@ -3346,12 +3367,12 @@ void ConnectionsManager::setProxySettings(std::string address, uint16_t port, st void ConnectionsManager::setLangCode(std::string langCode) { scheduleTask([&, langCode] { - if (currentLangCode.compare(langCode) == 0) { + if (currentLangCode == langCode) { return; } currentLangCode = langCode; - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - iter->second->resetInitVersion(); + for (auto & datacenter : datacenters) { + datacenter.second->resetInitVersion(); } saveConfig(); }); @@ -3359,12 +3380,12 @@ void ConnectionsManager::setLangCode(std::string langCode) { void ConnectionsManager::setRegId(std::string regId) { scheduleTask([&, regId] { - if (currentRegId.compare(regId) == 0) { + if (currentRegId == regId) { return; } currentRegId = regId; - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - iter->second->resetInitVersion(); + for (auto & datacenter : datacenters) { + datacenter.second->resetInitVersion(); } updateDcSettings(0, false); saveConfig(); @@ -3373,12 +3394,12 @@ void ConnectionsManager::setRegId(std::string regId) { void ConnectionsManager::setSystemLangCode(std::string langCode) { scheduleTask([&, langCode] { - if (currentSystemLangCode.compare(langCode) == 0) { + if (currentSystemLangCode == langCode) { return; } lastInitSystemLangcode = currentSystemLangCode = langCode; - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - iter->second->resetInitVersion(); + for (auto & datacenter : datacenters) { + datacenter.second->resetInitVersion(); } saveConfig(); updateDcSettings(0, false); @@ -3414,11 +3435,11 @@ void ConnectionsManager::resumeNetwork(bool partial) { if (LOGS_ENABLED) DEBUG_D("wakeup network account%u", instanceNum); } if (!networkPaused) { - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - if (iter->second->isHandshaking(false)) { - iter->second->createGenericConnection()->connect(); - } else if (iter->second->isHandshaking(true)) { - iter->second->createGenericMediaConnection()->connect(); + for (auto & datacenter : datacenters) { + if (datacenter.second->isHandshaking(false)) { + datacenter.second->createGenericConnection()->connect(); + } else if (datacenter.second->isHandshaking(true)) { + datacenter.second->createGenericMediaConnection()->connect(); } } } @@ -3442,11 +3463,11 @@ void ConnectionsManager::setNetworkAvailable(bool value, int32_t type, bool slow if (!networkAvailable) { connectionState = ConnectionStateWaitingForNetwork; } else { - for (std::map::iterator iter = datacenters.begin(); iter != datacenters.end(); iter++) { - if (iter->second->isHandshaking(false)) { - iter->second->createGenericConnection()->connect(); - } else if (iter->second->isHandshaking(true)) { - iter->second->createGenericMediaConnection()->connect(); + for (auto & datacenter : datacenters) { + if (datacenter.second->isHandshaking(false)) { + datacenter.second->createGenericConnection()->connect(); + } else if (datacenter.second->isHandshaking(true)) { + datacenter.second->createGenericMediaConnection()->connect(); } } } @@ -3463,7 +3484,7 @@ void ConnectionsManager::setIpStrategy(uint8_t value) { } int64_t ConnectionsManager::checkProxy(std::string address, uint16_t port, std::string username, std::string password, std::string secret, onRequestTimeFunc requestTimeFunc, jobject ptr1) { - ProxyCheckInfo *proxyCheckInfo = new ProxyCheckInfo(); + auto proxyCheckInfo = new ProxyCheckInfo(); proxyCheckInfo->address = address; proxyCheckInfo->port = port; proxyCheckInfo->username = username; @@ -3490,8 +3511,8 @@ void ConnectionsManager::checkProxyInternal(ProxyCheckInfo *proxyCheckInfo) { if (proxyActiveChecks.size() != PROXY_CONNECTIONS_COUNT) { for (int32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { bool found = false; - for (std::vector>::iterator iter = proxyActiveChecks.begin(); iter != proxyActiveChecks.end(); iter++) { - if (iter->get()->connectionNum == a) { + for (auto & proxyActiveCheck : proxyActiveChecks) { + if (proxyActiveCheck.get()->connectionNum == a) { found = true; break; } @@ -3505,14 +3526,14 @@ void ConnectionsManager::checkProxyInternal(ProxyCheckInfo *proxyCheckInfo) { if (freeConnectionNum == -1) { proxyCheckQueue.push_back(std::unique_ptr(proxyCheckInfo)); } else { - ConnectionType connectionType = (ConnectionType) (ConnectionTypeProxy | (freeConnectionNum << 16)); + auto connectionType = (ConnectionType) (ConnectionTypeProxy | (freeConnectionNum << 16)); Datacenter *datacenter = getDatacenterWithId(DEFAULT_DATACENTER_ID); Connection *connection = datacenter->getProxyConnection((uint8_t) freeConnectionNum, true, false); if (connection != nullptr) { connection->setOverrideProxy(proxyCheckInfo->address, proxyCheckInfo->port, proxyCheckInfo->username, proxyCheckInfo->password, proxyCheckInfo->secret); connection->suspendConnection(); proxyCheckInfo->connectionNum = freeConnectionNum; - TL_ping *request = new TL_ping(); + auto request = new TL_ping(); request->ping_id = proxyCheckInfo->pingId; proxyCheckInfo->requestToken = sendRequest(request, nullptr, nullptr, RequestFlagEnableUnauthorized | RequestFlagWithoutLogin, DEFAULT_DATACENTER_ID, connectionType, true, 0); proxyActiveChecks.push_back(std::unique_ptr(proxyCheckInfo)); @@ -3529,18 +3550,18 @@ void ConnectionsManager::checkProxyInternal(ProxyCheckInfo *proxyCheckInfo) { void ConnectionsManager::useJavaVM(JavaVM *vm, bool useJavaByteBuffers) { javaVm = vm; if (useJavaByteBuffers) { - JNIEnv *env = 0; + JNIEnv *env = nullptr; if (javaVm->GetEnv((void **) &env, JNI_VERSION_1_6) != JNI_OK) { if (LOGS_ENABLED) DEBUG_E("can't get jnienv"); exit(1); } jclass_ByteBuffer = (jclass) env->NewGlobalRef(env->FindClass("java/nio/ByteBuffer")); - if (jclass_ByteBuffer == 0) { + if (jclass_ByteBuffer == nullptr) { if (LOGS_ENABLED) DEBUG_E("can't find java ByteBuffer class"); exit(1); } jclass_ByteBuffer_allocateDirect = env->GetStaticMethodID(jclass_ByteBuffer, "allocateDirect", "(I)Ljava/nio/ByteBuffer;"); - if (jclass_ByteBuffer_allocateDirect == 0) { + if (jclass_ByteBuffer_allocateDirect == nullptr) { if (LOGS_ENABLED) DEBUG_E("can't find java ByteBuffer allocateDirect"); exit(1); } diff --git a/TMessagesProj/jni/tgnet/Datacenter.cpp b/TMessagesProj/jni/tgnet/Datacenter.cpp index 80790b48d..37f84d3c6 100644 --- a/TMessagesProj/jni/tgnet/Datacenter.cpp +++ b/TMessagesProj/jni/tgnet/Datacenter.cpp @@ -32,27 +32,27 @@ thread_local static SHA256_CTX sha256Ctx; Datacenter::Datacenter(int32_t instance, uint32_t id) { instanceNum = instance; datacenterId = id; - for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { - uploadConnection[a] = nullptr; + for (auto & a : uploadConnection) { + a = nullptr; } - for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { - downloadConnection[a] = nullptr; + for (auto & a : downloadConnection) { + a = nullptr; } - for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { - proxyConnection[a] = nullptr; + for (auto & a : proxyConnection) { + a = nullptr; } } Datacenter::Datacenter(int32_t instance, NativeByteBuffer *data) { instanceNum = instance; - for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { - uploadConnection[a] = nullptr; + for (auto & a : uploadConnection) { + a = nullptr; } - for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { - downloadConnection[a] = nullptr; + for (auto & a : downloadConnection) { + a = nullptr; } - for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { - proxyConnection[a] = nullptr; + for (auto & a : proxyConnection) { + a = nullptr; } uint32_t currentVersion = data->readUint32(nullptr); if (currentVersion >= 2 && currentVersion <= configVersion) { @@ -146,7 +146,7 @@ Datacenter::Datacenter(int32_t instance, NativeByteBuffer *data) { authorized = data->readInt32(nullptr) != 0; len = data->readUint32(nullptr); for (uint32_t a = 0; a < len; a++) { - TL_future_salt *salt = new TL_future_salt(); + auto salt = new TL_future_salt(); salt->valid_since = data->readInt32(nullptr); salt->valid_until = data->readInt32(nullptr); salt->salt = data->readInt64(nullptr); @@ -155,7 +155,7 @@ Datacenter::Datacenter(int32_t instance, NativeByteBuffer *data) { if (currentVersion >= 13) { len = data->readUint32(nullptr); for (uint32_t a = 0; a < len; a++) { - TL_future_salt *salt = new TL_future_salt(); + auto salt = new TL_future_salt(); salt->valid_since = data->readInt32(nullptr); salt->valid_until = data->readInt32(nullptr); salt->salt = data->readInt64(nullptr); @@ -223,9 +223,9 @@ TcpAddress *Datacenter::getCurrentAddress(uint32_t flags) { return nullptr; } if ((flags & TcpAddressFlagStatic) != 0) { - for (std::vector::iterator iter = addresses->begin(); iter != addresses->end(); iter++) { - if ((iter->flags & TcpAddressFlagStatic) != 0) { - return &(*iter); + for (auto & addresse : *addresses) { + if ((addresse.flags & TcpAddressFlagStatic) != 0) { + return &addresse; } } } @@ -288,8 +288,8 @@ int32_t Datacenter::getCurrentPort(uint32_t flags) { if ((flags & TcpAddressFlagStatic) != 0) { uint32_t num = 0; - for (std::vector::iterator iter = addresses->begin(); iter != addresses->end(); iter++) { - if ((iter->flags & TcpAddressFlagStatic) != 0) { + for (auto & addresse : *addresses) { + if ((addresse.flags & TcpAddressFlagStatic) != 0) { currentAddressNum = num; break; } @@ -362,8 +362,8 @@ void Datacenter::addAddressAndPort(std::string address, uint32_t port, uint32_t addresses = &addressesIpv4; } } - for (std::vector::iterator iter = addresses->begin(); iter != addresses->end(); iter++) { - if (iter->address == address && iter->port == port) { + for (auto & addresse : *addresses) { + if (addresse.address == address && addresse.port == port) { return; } } @@ -749,14 +749,14 @@ void Datacenter::suspendConnections(bool suspendPush) { if (tempConnection != nullptr) { tempConnection->suspendConnection(); } - for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { - if (uploadConnection[a] != nullptr) { - uploadConnection[a]->suspendConnection(); + for (auto & a : uploadConnection) { + if (a != nullptr) { + a->suspendConnection(); } } - for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { - if (downloadConnection[a] != nullptr) { - downloadConnection[a]->suspendConnection(); + for (auto & a : downloadConnection) { + if (a != nullptr) { + a->suspendConnection(); } } } @@ -771,19 +771,19 @@ void Datacenter::getSessions(std::vector &sessions) { if (tempConnection != nullptr) { sessions.push_back(tempConnection->getSessionId()); } - for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { - if (uploadConnection[a] != nullptr) { - sessions.push_back(uploadConnection[a]->getSessionId()); + for (auto & a : uploadConnection) { + if (a != nullptr) { + sessions.push_back(a->getSessionId()); } } - for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { - if (downloadConnection[a] != nullptr) { - sessions.push_back(downloadConnection[a]->getSessionId()); + for (auto & a : downloadConnection) { + if (a != nullptr) { + sessions.push_back(a->getSessionId()); } } - for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { - if (proxyConnection[a] != nullptr) { - sessions.push_back(proxyConnection[a]->getSessionId()); + for (auto & a : proxyConnection) { + if (a != nullptr) { + sessions.push_back(a->getSessionId()); } } } @@ -796,21 +796,21 @@ void Datacenter::recreateSessions(HandshakeType type) { if (tempConnection != nullptr) { tempConnection->recreateSession(); } - for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { - if (uploadConnection[a] != nullptr) { - uploadConnection[a]->recreateSession(); + for (auto & a : uploadConnection) { + if (a != nullptr) { + a->recreateSession(); } } - for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { - if (proxyConnection[a] != nullptr) { - proxyConnection[a]->recreateSession(); + for (auto & a : proxyConnection) { + if (a != nullptr) { + a->recreateSession(); } } } if (type == HandshakeTypeAll || type == HandshakeTypeMediaTemp || type == HandshakeTypePerm) { - for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { - if (downloadConnection[a] != nullptr) { - downloadConnection[a]->recreateSession(); + for (auto & a : downloadConnection) { + if (a != nullptr) { + a->recreateSession(); } } if (genericMediaConnection != nullptr) { @@ -883,8 +883,8 @@ bool Datacenter::isHandshaking(bool media) { if (media && (isCdnDatacenter || !PFS_ENABLED)) { media = false; } - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); if (handshake->getType() == HandshakeTypePerm || (media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) { return true; } @@ -896,8 +896,8 @@ bool Datacenter::isHandshaking(HandshakeType type) { if (handshakes.empty()) { return false; } - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); if (handshake->getType() == type) { return true; } @@ -907,28 +907,28 @@ bool Datacenter::isHandshaking(HandshakeType type) { void Datacenter::beginHandshake(HandshakeType handshakeType, bool reconnect) { if (handshakeType == HandshakeTypeCurrent) { - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); handshake->beginHandshake(reconnect); } } else { if (authKeyPerm == nullptr) { if (!isHandshaking(HandshakeTypePerm)) { - Handshake *handshake = new Handshake(this, HandshakeTypePerm, this); + auto handshake = new Handshake(this, HandshakeTypePerm, this); handshakes.push_back(std::unique_ptr(handshake)); handshake->beginHandshake(reconnect); } } else if (PFS_ENABLED) { if (handshakeType == HandshakeTypeAll || handshakeType == HandshakeTypeTemp) { if (!isHandshaking(HandshakeTypeTemp)) { - Handshake *handshake = new Handshake(this, HandshakeTypeTemp, this); + auto handshake = new Handshake(this, HandshakeTypeTemp, this); handshakes.push_back(std::unique_ptr(handshake)); handshake->beginHandshake(reconnect); } } if ((handshakeType == HandshakeTypeAll || handshakeType == HandshakeTypeMediaTemp) && hasMediaAddress()) { if (!isHandshaking(HandshakeTypeMediaTemp)) { - Handshake *handshake = new Handshake(this, HandshakeTypeMediaTemp, this); + auto handshake = new Handshake(this, HandshakeTypeMediaTemp, this); handshakes.push_back(std::unique_ptr(handshake)); handshake->beginHandshake(reconnect); } @@ -942,9 +942,9 @@ void Datacenter::onHandshakeConnectionClosed(Connection *connection) { return; } bool media = connection->getConnectionType() == ConnectionTypeGenericMedia; - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); - if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); + if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) { handshake->onHandshakeConnectionClosed(); } } @@ -955,9 +955,9 @@ void Datacenter::onHandshakeConnectionConnected(Connection *connection) { return; } bool media = connection->getConnectionType() == ConnectionTypeGenericMedia; - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); - if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); + if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) { handshake->onHandshakeConnectionConnected(); } } @@ -986,9 +986,9 @@ void Datacenter::processHandshakeResponse(bool media, TLObject *message, int64_t if (handshakes.empty()) { return; } - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); - if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); + if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) { handshake->processHandshakeResponse(message, messageId); } } @@ -998,9 +998,9 @@ TLObject *Datacenter::getCurrentHandshakeRequest(bool media) { if (handshakes.empty()) { return nullptr; } - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); - if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); + if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) { return handshake->getCurrentHandshakeRequest(); } } @@ -1069,9 +1069,9 @@ ByteArray *Datacenter::getAuthKey(ConnectionType connectionType, bool perm, int6 bool media = Connection::isMediaConnectionType(connectionType) && hasMediaAddress(); ByteArray *authKeyPending = nullptr; int64_t authKeyPendingId = 0; - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { - Handshake *handshake = iter->get(); - if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() == HandshakeTypeTemp) { + for (auto & iter : handshakes) { + Handshake *handshake = iter.get(); + if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() == HandshakeTypeTemp)) { authKeyPending = handshake->getPendingAuthKey(); authKeyPendingId = handshake->getPendingAuthKeyId(); break; @@ -1118,12 +1118,12 @@ NativeByteBuffer *Datacenter::createRequestsData(std::vectorgetConnectionType(), (uint64_t) connection->getSessionId(), networkMessage->message->seqno, (uint64_t) networkMessage->message->msg_id, typeid(*messageBody).name(), messageBody); - int64_t messageTime = (int64_t) (networkMessage->message->msg_id / 4294967296.0 * 1000); + auto messageTime = (int64_t) (networkMessage->message->msg_id / 4294967296.0 * 1000); int64_t currentTime = ConnectionsManager::getInstance(instanceNum).getCurrentTimeMillis() + (int64_t) ConnectionsManager::getInstance(instanceNum).getTimeDifference() * 1000; - if (!pfsInit && (messageTime < currentTime - 30000 || messageTime > currentTime + 25000)) { + if (!pfsInit && (networkMessage->forceContainer || messageTime < currentTime - 30000 || messageTime > currentTime + 25000)) { if (LOGS_ENABLED) DEBUG_D("wrap message in container"); - TL_msg_container *messageContainer = new TL_msg_container(); + auto messageContainer = new TL_msg_container(); messageContainer->messages.push_back(std::move(networkMessage->message)); messageId = ConnectionsManager::getInstance(instanceNum).generateMessageId(); @@ -1136,7 +1136,7 @@ NativeByteBuffer *Datacenter::createRequestsData(std::vectorgetType(); - for (std::vector>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { + for (auto iter = handshakes.begin(); iter != handshakes.end(); iter++) { if (iter->get() == handshake) { handshakes.erase(iter); if (type == HandshakeTypePerm) { @@ -1431,13 +1431,13 @@ void Datacenter::exportAuthorization() { return; } exportingAuthorization = true; - TL_auth_exportAuthorization *request = new TL_auth_exportAuthorization(); + auto request = new TL_auth_exportAuthorization(); request->dc_id = datacenterId; if (LOGS_ENABLED) DEBUG_D("dc%u begin export authorization", datacenterId); ConnectionsManager::getInstance(instanceNum).sendRequest(request, [&](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { if (error == nullptr) { - TL_auth_exportedAuthorization *res = (TL_auth_exportedAuthorization *) response; - TL_auth_importAuthorization *request2 = new TL_auth_importAuthorization(); + auto res = (TL_auth_exportedAuthorization *) response; + auto request2 = new TL_auth_importAuthorization(); request2->bytes = std::move(res->bytes); request2->id = res->id; if (LOGS_ENABLED) DEBUG_D("dc%u begin import authorization", datacenterId); @@ -1497,7 +1497,7 @@ TL_help_configSimple *Datacenter::decodeSimpleConfig(NativeByteBuffer *buffer) { BIO *keyBio = BIO_new(BIO_s_mem()); BIO_write(keyBio, public_key.c_str(), (int) public_key.length()); - RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, NULL, NULL, NULL); + RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, nullptr, nullptr, nullptr); if (rsaKey == nullptr) { if (rsaKey == nullptr) { if (LOGS_ENABLED) DEBUG_E("Invalid rsa public key"); diff --git a/TMessagesProj/jni/tgnet/Defines.h b/TMessagesProj/jni/tgnet/Defines.h index bfd2e4fcc..9331db469 100644 --- a/TMessagesProj/jni/tgnet/Defines.h +++ b/TMessagesProj/jni/tgnet/Defines.h @@ -19,7 +19,6 @@ #define USE_DEBUG_SESSION false #define READ_BUFFER_SIZE 1024 * 128 //#define DEBUG_VERSION -#define USE_OLD_KEYS #define PFS_ENABLED 1 #define DEFAULT_DATACENTER_ID INT_MAX #define DC_UPDATE_TIME 60 * 60 @@ -64,6 +63,7 @@ typedef struct NetworkMessage { std::unique_ptr message; bool invokeAfter = false; bool needQuickAck = false; + bool forceContainer = false; int32_t requestId; } NetworkMessage; @@ -172,7 +172,8 @@ enum RequestFlag { RequestFlagForceDownload = 32, RequestFlagInvokeAfter = 64, RequestFlagNeedQuickAck = 128, - RequestFlagUseUnboundKey = 256 + RequestFlagUseUnboundKey = 256, + RequestFlagResendAfter = 512 }; inline std::string to_string_int32(int32_t value) { diff --git a/TMessagesProj/jni/tgnet/Handshake.cpp b/TMessagesProj/jni/tgnet/Handshake.cpp index ad0fa7365..e0bf38e9d 100644 --- a/TMessagesProj/jni/tgnet/Handshake.cpp +++ b/TMessagesProj/jni/tgnet/Handshake.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -55,19 +56,11 @@ void Handshake::beginHandshake(bool reconnect) { connection->connect(); } -#ifdef USE_OLD_KEYS - TL_req_pq *request = new TL_req_pq(); - request->nonce = std::unique_ptr(new ByteArray(16)); + auto request = new TL_req_pq_multi(); + request->nonce = std::make_unique(16); RAND_bytes(request->nonce->bytes, 16); authNonce = new ByteArray(request->nonce.get()); sendRequestData(request, true); -#else - TL_req_pq_multi *request = new TL_req_pq_multi(); - request->nonce = std::unique_ptr(new ByteArray(16)); - RAND_bytes(request->nonce->bytes, 16); - authNonce = new ByteArray(request->nonce.get()); - sendRequestData(request, true); -#endif } void Handshake::cleanupHandshake() { @@ -335,14 +328,14 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) { } handshakeState = 2; - TL_resPQ *result = (TL_resPQ *) message; + auto result = (TL_resPQ *) message; if (authNonce->isEqualTo(result->nonce.get())) { - std::string key; + std::string key = ""; int64_t keyFingerprint = 0; size_t count1 = result->server_public_key_fingerprints.size(); if (currentDatacenter->isCdnDatacenter) { - std::map::iterator iter = cdnPublicKeysFingerprints.find(currentDatacenter->datacenterId); + auto iter = cdnPublicKeysFingerprints.find(currentDatacenter->datacenterId); if (iter != cdnPublicKeysFingerprints.end()) { for (uint32_t a = 0; a < count1; a++) { if ((uint64_t) result->server_public_key_fingerprints[a] == iter->second) { @@ -353,95 +346,35 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) { } } else { if (serverPublicKeys.empty()) { -#ifdef USE_OLD_KEYS - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAwVACPi9w23mF3tBkdZz+zwrzKOaaQdr01vAbU4E1pvkfj4sqDsm6\n" - "lyDONS789sVoD/xCS9Y0hkkC3gtL1tSfTlgCMOOul9lcixlEKzwKENj1Yz/s7daS\n" - "an9tqw3bfUV/nqgbhGX81v/+7RFAEd+RwFnK7a+XYl9sluzHRyVVaTTveB2GazTw\n" - "Efzk2DWgkBluml8OREmvfraX3bkHZJTKX4EQSjBbbdJ2ZXIsRrYOXfaA+xayEGB+\n" - "8hdlLmAjbCVfaigxX0CDqWeR1yFL9kwd9P0NsZRPsmoqVwMbMu7mStFai6aIhc3n\n" - "Slv8kg9qv1m6XHVQY3PnEw+QQtqSIXklHwIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0xc3b42b026ce86b21LL); - - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAxq7aeLAqJR20tkQQMfRn+ocfrtMlJsQ2Uksfs7Xcoo77jAid0bRt\n" - "ksiVmT2HEIJUlRxfABoPBV8wY9zRTUMaMA654pUX41mhyVN+XoerGxFvrs9dF1Ru\n" - "vCHbI02dM2ppPvyytvvMoefRoL5BTcpAihFgm5xCaakgsJ/tH5oVl74CdhQw8J5L\n" - "xI/K++KJBUyZ26Uba1632cOiq05JBUW0Z2vWIOk4BLysk7+U9z+SxynKiZR3/xdi\n" - "XvFKk01R3BHV+GUKM2RYazpS/P8v7eyKhAbKxOdRcFpHLlVwfjyM1VlDQrEZxsMp\n" - "NTLYXb6Sce1Uov0YtNx5wEowlREH1WOTlwIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0x9a996a1db11c729bLL); - - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAsQZnSWVZNfClk29RcDTJQ76n8zZaiTGuUsi8sUhW8AS4PSbPKDm+\n" - "DyJgdHDWdIF3HBzl7DHeFrILuqTs0vfS7Pa2NW8nUBwiaYQmPtwEa4n7bTmBVGsB\n" - "1700/tz8wQWOLUlL2nMv+BPlDhxq4kmJCyJfgrIrHlX8sGPcPA4Y6Rwo0MSqYn3s\n" - "g1Pu5gOKlaT9HKmE6wn5Sut6IiBjWozrRQ6n5h2RXNtO7O2qCDqjgB2vBxhV7B+z\n" - "hRbLbCmW0tYMDsvPpX5M8fsO05svN+lKtCAuz1leFns8piZpptpSCFn7bWxiA9/f\n" - "x5x17D7pfah3Sy2pA+NDXyzSlGcKdaUmwQIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0xb05b2a6f70cdea78LL); - - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAwqjFW0pi4reKGbkc9pK83Eunwj/k0G8ZTioMMPbZmW99GivMibwa\n" - "xDM9RDWabEMyUtGoQC2ZcDeLWRK3W8jMP6dnEKAlvLkDLfC4fXYHzFO5KHEqF06i\n" - "qAqBdmI1iBGdQv/OQCBcbXIWCGDY2AsiqLhlGQfPOI7/vvKc188rTriocgUtoTUc\n" - "/n/sIUzkgwTqRyvWYynWARWzQg0I9olLBBC2q5RQJJlnYXZwyTL3y9tdb7zOHkks\n" - "WV9IMQmZmyZh/N7sMbGWQpt4NMchGpPGeJ2e5gHBjDnlIf2p1yZOYeUYrdbwcS0t\n" - "UiggS4UeE8TzIuXFQxw7fzEIlmhIaq3FnwIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0x71e025b6c76033e3LL); -#endif - - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAruw2yP/BCcsJliRoW5eBVBVle9dtjJw+OYED160Wybum9SXtBBLX\n" - "riwt4rROd9csv0t0OHCaTmRqBcQ0J8fxhN6/cpR1GWgOZRUAiQxoMnlt0R93LCX/\n" - "j1dnVa/gVbCjdSxpbrfY2g2L4frzjJvdl84Kd9ORYjDEAyFnEA7dD556OptgLQQ2\n" - "e2iVNq8NZLYTzLp5YpOdO1doK+ttrltggTCy5SrKeLoCPPbOgGsdxJxyz5KKcZnS\n" - "Lj16yE5HvJQn0CNpRdENvRUXe6tBP78O39oJ8BTHp9oIjd6XWXAsp2CvK45Ol8wF\n" - "XGF710w9lwCGNbmNxNYhtIkdqfsEcwR5JwIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0xbc35f3509f7b7a5LL); - - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAvfLHfYH2r9R70w8prHblWt/nDkh+XkgpflqQVcnAfSuTtO05lNPs\n" - "pQmL8Y2XjVT4t8cT6xAkdgfmmvnvRPOOKPi0OfJXoRVylFzAQG/j83u5K3kRLbae\n" - "7fLccVhKZhY46lvsueI1hQdLgNV9n1cQ3TDS2pQOCtovG4eDl9wacrXOJTG2990V\n" - "jgnIKNA0UMoP+KF03qzryqIt3oTvZq03DyWdGK+AZjgBLaDKSnC6qD2cFY81UryR\n" - "WOab8zKkWAnhw2kFpcqhI0jdV5QaSCExvnsjVaX0Y1N0870931/5Jb9ICe4nweZ9\n" - "kSDF/gip3kWLG0o8XQpChDfyvsqB9OLV/wIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0x15ae5fa8b5529542LL); - - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAs/ditzm+mPND6xkhzwFIz6J/968CtkcSE/7Z2qAJiXbmZ3UDJPGr\n" - "zqTDHkO30R8VeRM/Kz2f4nR05GIFiITl4bEjvpy7xqRDspJcCFIOcyXm8abVDhF+\n" - "th6knSU0yLtNKuQVP6voMrnt9MV1X92LGZQLgdHZbPQz0Z5qIpaKhdyA8DEvWWvS\n" - "Uwwc+yi1/gGaybwlzZwqXYoPOhwMebzKUk0xW14htcJrRrq+PXXQbRzTMynseCoP\n" - "Ioke0dtCodbA3qQxQovE16q9zz4Otv2k4j63cz53J+mhkVWAeWxVGI0lltJmWtEY\n" - "K6er8VqqWot3nqmWMXogrgRLggv/NbbooQIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0xaeae98e13cd7f94fLL); - - serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" - "MIIBCgKCAQEAvmpxVY7ld/8DAjz6F6q05shjg8/4p6047bn6/m8yPy1RBsvIyvuD\n" - "uGnP/RzPEhzXQ9UJ5Ynmh2XJZgHoE9xbnfxL5BXHplJhMtADXKM9bWB11PU1Eioc\n" - "3+AXBB8QiNFBn2XI5UkO5hPhbb9mJpjA9Uhw8EdfqJP8QetVsI/xrCEbwEXe0xvi\n" - "fRLJbY08/Gp66KpQvy7g8w7VB8wlgePexW3pT13Ap6vuC+mQuJPyiHvSxjEKHgqe\n" - "Pji9NP3tJUFQjcECqcm0yV7/2d0t/pbCm+ZH1sadZspQCEPPrtbkQBlvHb4OLiIW\n" - "PGHKSMeRFvp3IWcmdJqXahxLCUS1Eh6MAQIDAQAB\n" - "-----END RSA PUBLIC KEY-----"); - serverPublicKeysFingerprints.push_back(0x5a181b2235057d98LL); + if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) { + serverPublicKeys.emplace_back("-----BEGIN RSA PUBLIC KEY-----\n" + "MIIBCgKCAQEAyMEdY1aR+sCR3ZSJrtztKTKqigvO/vBfqACJLZtS7QMgCGXJ6XIR\n" + "yy7mx66W0/sOFa7/1mAZtEoIokDP3ShoqF4fVNb6XeqgQfaUHd8wJpDWHcR2OFwv\n" + "plUUI1PLTktZ9uW2WE23b+ixNwJjJGwBDJPQEQFBE+vfmH0JP503wr5INS1poWg/\n" + "j25sIWeYPHYeOrFp/eXaqhISP6G+q2IeTaWTXpwZj4LzXq5YOpk4bYEQ6mvRq7D1\n" + "aHWfYmlEGepfaYR8Q0YqvvhYtMte3ITnuSJs171+GDqpdKcSwHnd6FudwGO4pcCO\n" + "j4WcDuXc2CTHgH8gFTNhp/Y8/SpDOhvn9QIDAQAB\n" + "-----END RSA PUBLIC KEY-----"); + serverPublicKeysFingerprints.push_back(0xb25898df208d2603); + } else { + serverPublicKeys.emplace_back("-----BEGIN RSA PUBLIC KEY-----\n" + "MIIBCgKCAQEA6LszBcC1LGzyr992NzE0ieY+BSaOW622Aa9Bd4ZHLl+TuFQ4lo4g\n" + "5nKaMBwK/BIb9xUfg0Q29/2mgIR6Zr9krM7HjuIcCzFvDtr+L0GQjae9H0pRB2OO\n" + "62cECs5HKhT5DZ98K33vmWiLowc621dQuwKWSQKjWf50XYFw42h21P2KXUGyp2y/\n" + "+aEyZ+uVgLLQbRA1dEjSDZ2iGRy12Mk5gpYc397aYp438fsJoHIgJ2lgMv5h7WY9\n" + "t6N/byY9Nw9p21Og3AoXSL2q/2IJ1WRUhebgAdGVMlV1fkuOQoEzR7EdpqtQD9Cs\n" + "5+bfo3Nhmcyvk5ftB0WkJ9z6bNZ7yxrP8wIDAQAB\n" + "-----END RSA PUBLIC KEY-----"); + serverPublicKeysFingerprints.push_back(0xd09d1d85de64fd85); + } } size_t count2 = serverPublicKeysFingerprints.size(); - for (uint32_t a = 0; a < count2; a++) { - for (uint32_t b = 0; b < count1; b++) { - if ((uint64_t) result->server_public_key_fingerprints[b] == serverPublicKeysFingerprints[a]) { - keyFingerprint = result->server_public_key_fingerprints[b]; - key = serverPublicKeys[a]; + for (uint32_t a = 0; a < count1; a++) { + for (uint32_t b = 0; b < count2; b++) { + if ((uint64_t) result->server_public_key_fingerprints[a] == serverPublicKeysFingerprints[b]) { + keyFingerprint = result->server_public_key_fingerprints[a]; + key = serverPublicKeys[b]; break; } } @@ -478,15 +411,15 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) { return; } - TL_req_DH_params *request = new TL_req_DH_params(); - request->nonce = std::unique_ptr(new ByteArray(authNonce)); - request->server_nonce = std::unique_ptr(new ByteArray(authServerNonce)); - request->p = std::unique_ptr(new ByteArray(4)); + auto request = new TL_req_DH_params(); + request->nonce = std::make_unique(new ByteArray(authNonce)); + request->server_nonce = std::make_unique(new ByteArray(authServerNonce)); + request->p = std::make_unique(new ByteArray(4)); request->p->bytes[3] = (uint8_t) p; request->p->bytes[2] = (uint8_t) (p >> 8); request->p->bytes[1] = (uint8_t) (p >> 16); request->p->bytes[0] = (uint8_t) (p >> 24); - request->q = std::unique_ptr(new ByteArray(4)); + request->q = std::make_unique(new ByteArray(4)); request->q->bytes[3] = (uint8_t) q; request->q->bytes[2] = (uint8_t) (q >> 8); request->q->bytes[1] = (uint8_t) (q >> 16); @@ -495,13 +428,13 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) { TLObject *innerData; if (handshakeType == HandshakeTypePerm) { - TL_p_q_inner_data_dc *tl_p_q_inner_data = new TL_p_q_inner_data_dc(); - tl_p_q_inner_data->nonce = std::unique_ptr(new ByteArray(authNonce)); - tl_p_q_inner_data->server_nonce = std::unique_ptr(new ByteArray(authServerNonce)); - tl_p_q_inner_data->pq = std::unique_ptr(new ByteArray(result->pq.get())); - tl_p_q_inner_data->p = std::unique_ptr(new ByteArray(request->p.get())); - tl_p_q_inner_data->q = std::unique_ptr(new ByteArray(request->q.get())); - tl_p_q_inner_data->new_nonce = std::unique_ptr(new ByteArray(32)); + auto tl_p_q_inner_data = new TL_p_q_inner_data_dc(); + tl_p_q_inner_data->nonce = std::make_unique(authNonce); + tl_p_q_inner_data->server_nonce = std::make_unique(authServerNonce); + tl_p_q_inner_data->pq = std::make_unique(new ByteArray(result->pq.get())); + tl_p_q_inner_data->p = std::make_unique(new ByteArray(request->p.get())); + tl_p_q_inner_data->q = std::make_unique(new ByteArray(request->q.get())); + tl_p_q_inner_data->new_nonce = std::make_unique(new ByteArray(32)); if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) { tl_p_q_inner_data->dc = 10000 + currentDatacenter->datacenterId; } else { @@ -511,13 +444,13 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) { authNewNonce = new ByteArray(tl_p_q_inner_data->new_nonce.get()); innerData = tl_p_q_inner_data; } else { - TL_p_q_inner_data_temp_dc *tl_p_q_inner_data_temp = new TL_p_q_inner_data_temp_dc(); - tl_p_q_inner_data_temp->nonce = std::unique_ptr(new ByteArray(authNonce)); - tl_p_q_inner_data_temp->server_nonce = std::unique_ptr(new ByteArray(authServerNonce)); - tl_p_q_inner_data_temp->pq = std::unique_ptr(new ByteArray(result->pq.get())); - tl_p_q_inner_data_temp->p = std::unique_ptr(new ByteArray(request->p.get())); - tl_p_q_inner_data_temp->q = std::unique_ptr(new ByteArray(request->q.get())); - tl_p_q_inner_data_temp->new_nonce = std::unique_ptr(new ByteArray(32)); + auto tl_p_q_inner_data_temp = new TL_p_q_inner_data_temp_dc(); + tl_p_q_inner_data_temp->nonce = std::make_unique(new ByteArray(authNonce)); + tl_p_q_inner_data_temp->server_nonce = std::make_unique(new ByteArray(authServerNonce)); + tl_p_q_inner_data_temp->pq = std::make_unique(new ByteArray(result->pq.get())); + tl_p_q_inner_data_temp->p = std::make_unique(new ByteArray(request->p.get())); + tl_p_q_inner_data_temp->q = std::make_unique(new ByteArray(request->q.get())); + tl_p_q_inner_data_temp->new_nonce = std::make_unique(new ByteArray(32)); if (handshakeType == HandshakeTypeMediaTemp) { if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) { tl_p_q_inner_data_temp->dc = -(10000 + currentDatacenter->datacenterId); @@ -538,29 +471,79 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) { } uint32_t innerDataSize = innerData->getObjectSize(); - uint32_t additionalSize = innerDataSize + SHA_DIGEST_LENGTH < 255 ? 255 - (innerDataSize + SHA_DIGEST_LENGTH) : 0; - NativeByteBuffer *innerDataBuffer = BuffersStorage::getInstance().getFreeBuffer(innerDataSize + additionalSize + SHA_DIGEST_LENGTH); - innerDataBuffer->position(SHA_DIGEST_LENGTH); + if (innerDataSize > 144) { + if (LOGS_ENABLED) DEBUG_E("account%u dc%u handshake: inner data too large %d, type = %d", currentDatacenter->instanceNum, currentDatacenter->datacenterId, innerDataSize, handshakeType); + delete innerData; + beginHandshake(false); + return; + } + uint32_t keySize = 32; + uint32_t ivSize = 32; + uint32_t paddedDataSize = 192; + uint32_t encryptedDataSize = keySize + paddedDataSize + SHA256_DIGEST_LENGTH; + uint32_t additionalSize = innerDataSize < paddedDataSize ? paddedDataSize - innerDataSize : 0; + NativeByteBuffer *innerDataBuffer = BuffersStorage::getInstance().getFreeBuffer(encryptedDataSize + paddedDataSize + ivSize + SHA256_DIGEST_LENGTH); + + innerDataBuffer->position(encryptedDataSize); innerData->serializeToStream(innerDataBuffer); delete innerData; - SHA1(innerDataBuffer->bytes() + SHA_DIGEST_LENGTH, innerDataSize, innerDataBuffer->bytes()); - if (additionalSize != 0) { - RAND_bytes(innerDataBuffer->bytes() + SHA_DIGEST_LENGTH + innerDataSize, additionalSize); - } - BIO *keyBio = BIO_new(BIO_s_mem()); BIO_write(keyBio, key.c_str(), (int) key.length()); - RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, NULL, NULL, NULL); + RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, nullptr, nullptr, nullptr); BIO_free(keyBio); + + while (true) { + RAND_bytes(innerDataBuffer->bytes() + encryptedDataSize + innerDataSize, additionalSize); + for (uint32_t i = 0; i < paddedDataSize; i++) { + innerDataBuffer->bytes()[keySize + i] = innerDataBuffer->bytes()[encryptedDataSize + paddedDataSize - i - 1]; + } + + RAND_bytes(innerDataBuffer->bytes(), keySize); + SHA256_CTX sha256Ctx; + SHA256_Init(&sha256Ctx); + SHA256_Update(&sha256Ctx, innerDataBuffer->bytes(), keySize); + SHA256_Update(&sha256Ctx, innerDataBuffer->bytes() + encryptedDataSize, paddedDataSize); + SHA256_Final(innerDataBuffer->bytes() + keySize + paddedDataSize, &sha256Ctx); + + memset(innerDataBuffer->bytes() + encryptedDataSize + paddedDataSize, 0, ivSize); + Datacenter::aesIgeEncryption(innerDataBuffer->bytes() + keySize, innerDataBuffer->bytes(), innerDataBuffer->bytes() + encryptedDataSize + paddedDataSize, true, true, paddedDataSize + SHA256_DIGEST_LENGTH); + + SHA256_Init(&sha256Ctx); + SHA256_Update(&sha256Ctx, innerDataBuffer->bytes() + keySize, paddedDataSize + SHA256_DIGEST_LENGTH); + SHA256_Final(innerDataBuffer->bytes() + encryptedDataSize + paddedDataSize + ivSize, &sha256Ctx); + + for (uint32_t i = 0; i < keySize; i++) { + innerDataBuffer->bytes()[i] ^= innerDataBuffer->bytes()[encryptedDataSize + paddedDataSize + ivSize + i]; + } + + bool ok = false; + size_t resLen = BN_bn2bin(rsaKey->n, innerDataBuffer->bytes() + encryptedDataSize); + const auto shift = (256 - resLen); + + for (auto i = 0; i != 256; ++i) { + const auto a = innerDataBuffer->bytes()[i]; + const auto b = (i < shift) ? 0 : innerDataBuffer->bytes()[encryptedDataSize + i - shift]; + if (a > b) { + break; + } else if (a < b) { + ok = true; + break; + } + } + if (ok) { + break; + } + } + if (bnContext == nullptr) { bnContext = BN_CTX_new(); } - BIGNUM *a = BN_bin2bn(innerDataBuffer->bytes(), innerDataBuffer->limit(), NULL); + BIGNUM *a = BN_bin2bn(innerDataBuffer->bytes(), encryptedDataSize, nullptr); BIGNUM *r = BN_new(); BN_mod_exp(r, a, rsaKey->e, rsaKey->n, bnContext); uint32_t size = BN_num_bytes(r); - ByteArray *rsaEncryptedData = new ByteArray(size >= 256 ? size : 256); + auto rsaEncryptedData = new ByteArray(size >= 256 ? size : 256); size_t resLen = BN_bn2bin(r, rsaEncryptedData->bytes); if (256 - resLen > 0) { memset(rsaEncryptedData->bytes + resLen, 0, 256 - resLen); @@ -911,7 +894,7 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) { } void Handshake::sendAckRequest(int64_t messageId) { - /*TL_msgs_ack *msgsAck = new TL_msgs_ack(); + /*auto msgsAck = new TL_msgs_ack(); msgsAck->msg_ids.push_back(messageId); sendRequestData(msgsAck, false);*/ } @@ -923,10 +906,10 @@ TLObject *Handshake::getCurrentHandshakeRequest() { void Handshake::saveCdnConfigInternal(NativeByteBuffer *buffer) { buffer->writeInt32(1); buffer->writeInt32((int32_t) cdnPublicKeys.size()); - for (std::map::iterator iter = cdnPublicKeys.begin(); iter != cdnPublicKeys.end(); iter++) { - buffer->writeInt32(iter->first); - buffer->writeString(iter->second); - buffer->writeInt64(cdnPublicKeysFingerprints[iter->first]); + for (auto & cdnPublicKey : cdnPublicKeys) { + buffer->writeInt32(cdnPublicKey.first); + buffer->writeString(cdnPublicKey.second); + buffer->writeInt64(cdnPublicKeysFingerprints[cdnPublicKey.first]); } } @@ -934,7 +917,7 @@ void Handshake::saveCdnConfig(Datacenter *datacenter) { if (cdnConfig == nullptr) { cdnConfig = new Config(datacenter->instanceNum, "cdnkeys.dat"); } - thread_local static NativeByteBuffer *sizeCalculator = new NativeByteBuffer(true); + thread_local static auto sizeCalculator = new NativeByteBuffer(true); sizeCalculator->clearCapacity(); saveCdnConfigInternal(sizeCalculator); NativeByteBuffer *buffer = BuffersStorage::getInstance().getFreeBuffer(sizeCalculator->capacity()); @@ -978,11 +961,11 @@ void Handshake::loadCdnConfig(Datacenter *datacenter) { } } loadingCdnKeys = true; - TL_help_getCdnConfig *request = new TL_help_getCdnConfig(); + auto request = new TL_help_getCdnConfig(); ConnectionsManager::getInstance(datacenter->instanceNum).sendRequest(request, [&, datacenter](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { if (response != nullptr) { - TL_cdnConfig *config = (TL_cdnConfig *) response; + auto config = (TL_cdnConfig *) response; size_t count = config->public_keys.size(); BIO *keyBio = BIO_new(BIO_s_mem()); NativeByteBuffer *buffer = BuffersStorage::getInstance().getFreeBuffer(1024); @@ -992,7 +975,7 @@ void Handshake::loadCdnConfig(Datacenter *datacenter) { cdnPublicKeys[publicKey->dc_id] = publicKey->public_key; BIO_write(keyBio, publicKey->public_key.c_str(), (int) publicKey->public_key.length()); - RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, NULL, NULL, NULL); + RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, nullptr, nullptr, nullptr); int nBytes = BN_num_bytes(rsaKey->n); int eBytes = BN_num_bytes(rsaKey->e); diff --git a/TMessagesProj/jni/tgnet/Request.h b/TMessagesProj/jni/tgnet/Request.h index b15e22127..c9c6808ac 100644 --- a/TMessagesProj/jni/tgnet/Request.h +++ b/TMessagesProj/jni/tgnet/Request.h @@ -47,6 +47,7 @@ public: int64_t startTimeMillis = 0; int32_t minStartTime = 0; int32_t lastResendTime = 0; + bool isResending = false; int32_t instanceNum = 0; uint32_t serverFailureCount = 0; TLObject *rawRequest; diff --git a/TMessagesProj/jni/third_party/openh264/CONTRIBUTORS b/TMessagesProj/jni/third_party/openh264/CONTRIBUTORS new file mode 100644 index 000000000..24da4913a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/CONTRIBUTORS @@ -0,0 +1,55 @@ +# Contributors to the OpenH264 project + +Patrick Ai +Sijia Chen +ZhaoZheng Chu +Paley Du +Martin Ettl +Andreas Gal +Xu Guang +Licai Guo +Yi Guo +Horace Huang +Steven Huang +Ethan Hugg +Cullen Jennings +Zhaofeng Jia +Derrick Jin +Jesse Li +Jifei Li +Kai Li +Karina Li +Matt Li +Xiang Li +Bourne Ling +Alex Liu +Wayne Liu +Varun Patil +Eric Rescorla +Adam Roach +Sawyer Shan +Siping Tao +Martin Storsjö +Brion Vibber +James Wang +Juanny Wang +Zhiliang Wang +Hervé Willems +Gregory J Wolfe +Katherine Wu +Guang Xu +Jeffery Xu +Gang Yang +Li Yao +Jiessie Zhang +Rory Zhang +Volvet Zhang +Ling Zhu +James Zhu +Dong Zhang +Haibo Zhu +Huade Shi + + + + diff --git a/TMessagesProj/jni/third_party/openh264/LICENSE b/TMessagesProj/jni/third_party/openh264/LICENSE new file mode 100644 index 000000000..8e730c4e2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) 2013, Cisco Systems +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/TMessagesProj/jni/third_party/openh264/README.md b/TMessagesProj/jni/third_party/openh264/README.md new file mode 100644 index 000000000..e173d1c0b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/README.md @@ -0,0 +1,211 @@ +OpenH264 +======== +OpenH264 is a codec library which supports H.264 encoding and decoding. It is suitable for use in real time applications such as WebRTC. See http://www.openh264.org/ for more details. + +Encoder Features +---------------- +- Constrained Baseline Profile up to Level 5.2 (Max frame size is 36864 macro-blocks) +- Arbitrary resolution, not constrained to multiples of 16x16 +- Rate control with adaptive quantization, or constant quantization +- Slice options: 1 slice per frame, N slices per frame, N macroblocks per slice, or N bytes per slice +- Multiple threads automatically used for multiple slices +- Temporal scalability up to 4 layers in a dyadic hierarchy +- Simulcast AVC up to 4 resolutions from a single input +- Spatial simulcast up to 4 resolutions from a single input +- Long Term Reference (LTR) frames +- Memory Management Control Operation (MMCO) +- Reference picture list modification +- Single reference frame for inter prediction +- Multiple reference frames when using LTR and/or 3-4 temporal layers +- Periodic and on-demand Instantaneous Decoder Refresh (IDR) frame insertion +- Dynamic changes to bit rate, frame rate, and resolution +- Annex B byte stream output +- YUV 4:2:0 planar input + +Decoder Features +---------------- +- Constrained Baseline Profile up to Level 5.2 (Max frame size is 36864 macro-blocks) +- Arbitrary resolution, not constrained to multiples of 16x16 +- Single thread for all slices +- Long Term Reference (LTR) frames +- Memory Management Control Operation (MMCO) +- Reference picture list modification +- Multiple reference frames when specified in Sequence Parameter Set (SPS) +- Annex B byte stream input +- YUV 4:2:0 planar output + +OS Support +---------- +- Windows 64-bit and 32-bit +- Mac OS X 64-bit and 32-bit +- Linux 64-bit and 32-bit +- Android 64-bit and 32-bit +- iOS 64-bit and 32-bit +- Windows Phone 32-bit + +Processor Support +----------------- +- Intel x86 optionally with MMX/SSE (no AVX yet, help is welcome) +- ARMv7 optionally with NEON, AArch64 optionally with NEON +- Any architecture using C/C++ fallback functions + +Building the Library +-------------------- +NASM needed to be installed for assembly code: workable version 2.10.06 or above, NASM can downloaded from http://www.nasm.us/. +For Mac OSX 64-bit NASM needed to be below version 2.11.08 as NASM 2.11.08 will introduce error when using RIP-relative addresses in Mac OSX 64-bit + +To build the arm assembly for Windows Phone, gas-preprocessor is required. It can be downloaded from git://git.libav.org/gas-preprocessor.git + +For Android Builds +------------------ +To build for android platform, You need to install android sdk and ndk. You also need to export `**ANDROID_SDK**/tools` to PATH. On Linux, this can be done by + + export PATH=**ANDROID_SDK**/tools:$PATH + +The codec and demo can be built by + + make OS=android NDKROOT=**ANDROID_NDK** TARGET=**ANDROID_TARGET** + +Valid `**ANDROID_TARGET**` can be found in `**ANDROID_SDK**/platforms`, such as `android-12`. +You can also set `ARCH`, `NDKLEVEL` according to your device and NDK version. +`ARCH` specifies the architecture of android device. Currently `arm`, `arm64`, `x86` and `x86_64` are supported, the default is `arm`. (`mips` and `mips64` can also be used, but there's no specific optimization for those architectures.) +`NDKLEVEL` specifies android api level, the default is 12. Available possibilities can be found in `**ANDROID_NDK**/platforms`, such as `android-21` (strip away the `android-` prefix). + +By default these commands build for the `armeabi-v7a` ABI. To build for the other android +ABIs, add `ARCH=arm64`, `ARCH=x86`, `ARCH=x86_64`, `ARCH=mips` or `ARCH=mips64`. +To build for the older `armeabi` ABI (which has armv5te as baseline), add `APP_ABI=armeabi` (`ARCH=arm` is implicit). +To build for 64-bit ABI, such as `arm64`, explicitly set `NDKLEVEL` to 21 or higher. + +For iOS Builds +-------------- +You can build the libraries and demo applications using xcode project files +located in `codec/build/iOS/dec` and `codec/build/iOS/enc`. + +You can also build the libraries (but not the demo applications) using the +make based build system from the command line. Build with + + make OS=ios ARCH=**ARCH** + +Valid values for `**ARCH**` are the normal iOS architecture names such as +`armv7`, `armv7s`, `arm64`, and `i386` and `x86_64` for the simulator. +Another settable iOS specific parameter +is `SDK_MIN`, specifying the minimum deployment target for the built library. +For other details on building using make on the command line, see +'For All Platforms' below. + +For Linux Builds +-------------- + +You can build the libraries (but not the demo applications) using the +make based build system from the command line. Build with + + make OS=linux ARCH=**ARCH** + + You can set `ARCH` according to your linux device . +`ARCH` specifies the architecture of the device. Currently `arm`, `arm64`, `x86` and `x86_64` are supported + + NOTICE: + If your computer is x86 architecture, for build the libnary which be used on arm/aarch64 machine, you may need to use cross-compiler, for example: + make OS=linux CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ ARCH=arm64 + or + make OS=linux CC=arm-linux-gnueabi-gcc CXX=arm-linux-gnueabi-g++ ARCH=arm + + +For Windows Builds +------------------ + +Our Windows builds use MinGW which can be downloaded from http://www.mingw.org/ + +To build with gcc, add the MinGW bin directory (e.g. `/c/MinGW/bin`) to your path and follow the 'For All Platforms' instructions below. + +To build with Visual Studio you will need to set up your path to run cl.exe. The easiest way is to start MSYS from a developer command line session. Instructions can be found at http://msdn.microsoft.com/en-us/library/ms229859(v=vs.110).aspx. If you need to do it by hand here is an example from a Windows 64bit install of VS2012: + + export PATH="$PATH:/c/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin:/c/Program Files (x86)/Microsoft Visual Studio 11.0/Common7/IDE" + +You will also need to set your INCLUDE and LIB paths to point to your VS and SDK installs. Something like this, again from Win64 with VS2012 (note the use of Windows-style paths here). + + export INCLUDE="C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\include;C:\Program Files (x86)\Windows Kits\8.0\Include\um;C:\Program Files (x86)\Windows Kits\8.0\Include\shared" + export LIB="C:\Program Files (x86)\Windows Kits\8.0\Lib\Win8\um\x86;C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\lib" + +Then add `OS=msvc` to the make line of the 'For All Platforms' instructions. + +For Windows Phone Builds +------------------------ + +Follow the instructions above for normal Windows builds, but use `OS=msvc-wp` +instead of `OS=msvc`. You will also need gas-preprocessor (as mentioned below +"Building the Library"). + +If building for Windows Phone with MSVC 2013, there's no included bat file that sets the lib paths to the Windows Phone kit, but that can be done with a command like this: + + export LIB="c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\lib\store\arm;c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\lib\arm;c:\Program Files (x86)\Windows Phone Kits\8.1\lib\arm" + +This is only necessary for building the DLL; the static library can be built without setting this. + +Note, only Windows Phone 8.1 or newer is supported, 8.0 is no longer supported. + +For All Platforms +------------------- + +Using make +---------- + +From the main project directory: +- `make` for automatically detecting architecture and building accordingly +- `make ARCH=i386` for x86 32-bit builds +- `make ARCH=x86_64` for x86 64-bit builds +- `make V=No` for a silent build (not showing the actual compiler commands) +- `make DEBUGSYMBOLS=True` for two libraries, one is normal libraries, another one is removed the debugging symbol table entries (those created by the -g option) + +The command line programs `h264enc` and `h264dec` will appear in the main project directory. + +A shell script to run the command-line apps is in `testbin/CmdLineExample.sh` + +Usage information can be found in `testbin/CmdLineReadMe` + +Using meson +----------- + +Meson build definitions have been added, and are known to work on Linux +and Windows, for x86 and x86 64-bit. + +See for instructions on how to +install meson, then: + +``` shell +meson builddir +ninja -C builddir +``` + +Run the tests with: + +``` shell +meson test -C builddir -v +``` + +Install with: + +``` shell +ninja -C builddir install +``` + +Using the Source +---------------- +- `codec` - encoder, decoder, console (test app), build (makefile, vcproj) +- `build` - scripts for Makefile build system +- `test` - GTest unittest files +- `testbin` - autobuild scripts, test app config files +- `res` - yuv and bitstream test files + +Known Issues +------------ +See the issue tracker on https://github.com/cisco/openh264/issues +- Encoder errors when resolution exceeds 3840x2160 +- Encoder errors when compressed frame size exceeds half uncompressed size +- Decoder errors when compressed frame size exceeds 1MB +- Encoder RC requires frame skipping to be enabled to hit the target bitrate, + if frame skipping is disabled the target bitrate may be exceeded + +License +------- +BSD, see `LICENSE` file for details. diff --git a/TMessagesProj/jni/third_party/openh264/RELEASES b/TMessagesProj/jni/third_party/openh264/RELEASES new file mode 100644 index 000000000..349549db7 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/RELEASES @@ -0,0 +1,346 @@ + +Releases +----------- +v2.1.0 +------ +- Experimentally support for multi-thread decoding(default disabled,and may result in random problems if enabled) +- Assembly optimization for loongson platform +- Update meson version to 5 +- Some minor bug fixes + +v2.0.0 +------ +- B-frame decoding support for Main and High Profile with two test cases +- Add support for loongson(https://en.wikipedia.org/wiki/Loongson) platform +- Add clang support for arm/arm64/x86 for NDK version over 17 +- Enable stack protector +- Add some test cases +- Avoid using C++/CX code for threads for Windows Phone/Windows Store/UWP +- Remove extra visual studio projects for the decoder +- Remove check for working compiler in NDK +- Bug fixes + +v1.8.0 +------ +- Add meson build for Linux/Windows platform +- Disable background detection for screen route +- Add a workaround for Visual Studio 2013 C++ x64 compiler bug on AVX2. That bug will cause crash and has been fixed in Visual Studio 2014 +- Change the default profile from baseline to high if user does not set it and CABAC is specified +- Skip frames that are marked as IDR due to scene change and simultaneously marked as skip frame to reduce bit rate +- Refine threshold calculation algorithms for rate control in lower frame rate to get better effect +- Encoder return with a specific return value instead of uninitialize encoder when input resolution is invalid +- Refine strategy on level change to avoid frequent IDR. Encoder will not be reset if level is changed to a smaller one +- Support to set the min and max QP values on screen content mode +- Fix a memory issue that may cause encoder crash when temporal layer change +- Corrected some statistics information +- Refine error concealment algorithms to improve user experience +- Support to get information about current output picture is reference picture or not on decoder side +- Bug fix for decoder when 8x8 prediction mode is enabled on the input bitstream +- Enable NEON for ChromeOS devices +- Support for Fuchsia operating systerm +- Support for building arm64 with MSVC +- Remove some warnings when building with MSVC +- Fix clang compiler error when building arm assembly funtions +- Bug fixes for unit test + +v1.7.0 +------ +- Changed SPS/PPS strategy option name,See enum ENCODER_OPTION +- Changed NAL size length parameter from static array to pointer to support more NALs.See struct SParserBsInfo +- Changed semaphores to condition variables on apple platform +- Changed version update mechanism as Major.Minor.patch,like 1.7.0 +- Supported to force IDR independently for each layer in simulcast AVC case.See API ForceIntraFrame() +- Supported LTR request independently for each layer in simulcast AVC case.See struct SLTRRecoverRequest and SLTRMarkingFeedback +- Supported to set sample aspect ratio in VUI on encoder side. See struct SSpatialLayerConfig +- Supported to set profile and level, changed the default level as 4.1 if the user doesn’t set it. See enum ELevelIdc +- Supported to get profile and level info on decoder side.See enum DECODER_OPTION +- Supported for enable/disable AVX2 build option. Build option: HAVE_AVX2 +- Supported to set decoder statistics log interval, Add DECODER_OPTION_STATISTICS_LOG_INTERVAL.See DECODER_OPTION. +- Supported for AU delimiter NAL on decoder side. AU delimiter refers to section 7.3.2.4 +- Supported for x86 PIC assembly and build option. Build option: ENABLEPIC. git issues:#2263 #2534 +- Supported for Cygwin x86_64 build +- Supported to get sample aspect ratio by GetOption on decoder. Add option: DECODER_OPTION_GET_SAR_INFO +- Set constraint_set4_flag constraint_set5_flag to align to CHP definition in latest H264 standard +- Improved VUI support on decoder side +- Improved decoder statistics info output +- Refined the return value when failed in memory allocation +- Added SSSE3 motion compensation routines +- Added AVX2 motion compensation routines +- Optimization on some of SSE2/MMX functions +- Refactor rate control for RC_BUFFERBASED_MODE and RC_QUALITY_MODE mode +- Added more unit tests for random resolution input,slice mode switch,profile/level setting +- Refined logs +- Bug fixes for 4:0:0 format support on decoder +- Bug fixes for complexity calculation for screen content mode +- Bug fixes for loadbalancing turn on, git issue:#2618 +- Bug fixes for parser subsps, scalling list, parser longer bitstream + +v1.6.0 +------ +- Adjusted the encoder API structures +- Removed the unused data format in decoder API +- Encoder support of simulcast AVC +- Added support of video signal type present information +- Added support of encoder load-balancing +- Improved encoder multi-threads, rate control and down-sampling +- Fixed the frame size constraint in encoder +- Bug fixes for rate control, multi-threading, simulcasting in encoder +- Bug fixes for interface call, return value check, memory leak in decoder +- Bug fixes for UT and statistic information +- Bug fixes for assembly code +- Remove the unused and redundant code +- Improvements on UT, memory allocation failed protection, error-protection in decoder, input parameters checking in encoder, assembly for AVX2 support, assembly code performance, logging and documentation +- Correct some typos in source code and documents + +v1.5.3 +------ +- Bug fixes for GMP Plugin + +v1.5.2 +------ +- Fix GMP Plugin causing the Browser crash on Android + +v1.5.1 +------ +- Bug fixes for GMP Plugin + +v1.5.0 +------ +- Correct a typo in codec return value (github issue#2046, cmUnkonwReason -> cmUnknownReason) +- Added Codec demo and auto build script for WP8 +- Decoder support of 'Constrained High Profile' of H.264 +- Encoder support of CABAC of H.264 +- Encoder support of input frame rate 60 +- Improved syntax of gaps_in_frame_num_value_allowed_flag in encoder +- Improved memory usage for multi-threading in encoder +- Added VUI info for base layer in encoder +- Added encoder interface to get external setting of iMaxQp and iMinQp for rate control +- Bug fixes for Rate Control, multi-threading and simulcasting in encoder +- Bug fixes for NoDelay API, ParseOnly functions, error-concealment off functiond and error-detection in decoder +- Bug fixes for UT +- Fixes to avoid valgrind warnings, potential crash and calculation overflow +- Merged files for decoder/encoder and remove unused files +- Improvements on build scripts, UT, error-protection in decoder, input param checking in encoder, assembly for 64bit support, downsampling, logging and documentation + +Note: +'Constrained High Profile' = 'Constrained Baseline Profile' plus: +- CABAC +- Intra 8x8 mode support +- 8x8 transform +- QP scaling matrices +- QP per chroma component +- Mono 4:0:0 (experimental) +- Weighted prediction + +v1.4.0 +------ +- Decoder new interface of DecodeFrameNoDelay +- Added new encoder and decoder statistics +- Added option for generating pdb in windows builds +- Added new rate control mode (RC_TIMESTAMP_MODE) for inconstant frame rate input +- Added new Sps/Pps strategies for real-time video +— Added support for simulcast avc +- Improvements in code structure, assembly, input parameter checking, logging, UT and comments +- In gmp-openh264, return decoder error correctly and other fixes +- Decoder bug fixes when for Error Concealment disabled +- Bug fixes for ParseOnly functions +- Bug fixes for encoding large frame size (>32767MBs) +- Fixes to avoid valgrind warnings, potential crash and calculation overflow + +----------- +v1.3.1 +------ +- Fixed and enhanced protection to avoid crash when reading lossy bitstreams +- Adjust the default mode of Error Concealment used by gmp-openh264 + +----------- +v1.3.0 +------ +- Removed manual API document, now using wiki: https://github.com/cisco/openh264/wiki (0af48e5 for v1.3.0) +- Added API version in API header files +- Added pkg-config file +- Added decoder support of parsing only (bParseOnly) for only parsing bit stream but not decoding +- Added timestamp and max nal size in gmp-openh264.cpp when calling encoding +- Added timestamp info in decoder input and return structure +- Added support of level 9 in decoder +- Added total length of the encoded frame in encoder return structure +- Added SetOption(ENCODER_OPTION_SVC_ENCODE_PARAM_BASE,&base) for encoder +- Set constraint set 0 and 1 flags for non-scalable +- Improved error concealment algorithms and provide more modes of error-concealment +- Improved rate control algorithms and reference selection algorithms for screen content encoding +- Added encoder and decoder statistics interface +- Improved input parameter checking and logging +- Bug fixes, warning reductions, and test improvements + +----------- +v1.2.0 +------ +- Add and modify encoder APIs related to rate control and screen content encoding +- Remove PauseFrame in encoder APIs +- Improve rate control and compression ratio for screen content encoding +- Improve error concealment algorithm +- Improve validation of input parameters +- Add ARM64 assembly +- bug fixes + +----------- +v1.1.0 +------ +- Modify some APIs (see API doc for detail) +- Improve the compression ratio of screen content encoding +- ARM64 assembly support for most of core functions in encoder & decoder +- Modify error concealment logic to always return decoding error info until IDR picture comes +- fix some bugs + + +Binaries +----------- +These binary releases are distributed under this license: +http://www.openh264.org/BINARY_LICENSE.txt + +v2.1.0 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-osx32.5.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-osx64.5.dylib.bz2 +http://ciscobinary.openh264.org/openh264-2.1.0-win32.dll.bz2 +http://ciscobinary.openh264.org/openh264-2.1.0-win64.dll.bz2 + +v2.0.0 +------ +http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-osx32.5.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-osx64.5.dylib.bz2 +http://ciscobinary.openh264.org/openh264-2.0.0-win32.dll.bz2 +http://ciscobinary.openh264.org/openh264-2.0.0-win64.dll.bz2 + +v1.8.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.8.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-android19.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-ios.a.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-linux32.4.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-linux32.4.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-linux64.4.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-linux64.4.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-osx32.4.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.8.0-osx64.4.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.8.0-win32.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.8.0-win64.dll.bz2 + +v1.7.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.7.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-android19.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-ios.a.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-linux32.4.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-linux32.4.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-linux64.4.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-linux64.4.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-osx32.4.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.7.0-osx64.4.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.7.0-win32.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.7.0-win64.dll.bz2 + +v1.6.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.6.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.6.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-1.6.0-linux32.3.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.6.0-linux64.3.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.6.0-osx32.3.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.6.0-osx64.3.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.6.0-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.6.0-win64msvc.dll.bz2 + +v1.5.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.5.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.5.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-1.5.0-linux32.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.5.0-linux64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.5.0-osx32.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.5.0-osx64.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.5.0-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.5.0-win64msvc.dll.bz2 + +v1.4.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.4.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.4.0-linux32.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.4.0-linux64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.4.0-osx32.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.4.0-osx64.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.4.0-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.4.0-win64msvc.dll.bz2 + +v1.3.1 +------ +http://ciscobinary.openh264.org/libopenh264-1.3.1-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.1-linux32.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.1-linux64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.1-osx32.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.1-osx64.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.3.1-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.3.1-win64msvc.dll.bz2 + +v1.3.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.3.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.0-linux32.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.0-linux64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.0-osx32.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.3.0-osx64.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.3.0-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.3.0-win64msvc.dll.bz2 + +v1.2.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.2.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.2.0-linux32.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.2.0-linux64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.2.0-osx32.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.2.0-osx64.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.2.0-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.2.0-win64msvc.dll.bz2 + +v1.1.0 +------ +http://ciscobinary.openh264.org/libopenh264-1.1.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.1.0-linux32.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.1.0-linux64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.1.0-osx32.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-1.1.0-osx64.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.1.0-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.1.0-win64msvc.dll.bz2 + +v1.0.0 +------ + +http://ciscobinary.openh264.org/libopenh264-1.0.0-android19.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.0.0-linux32.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.0.0-linux64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-1.0.0-osx64.dylib.bz2 +http://ciscobinary.openh264.org/openh264-1.0.0-win32msvc.dll.bz2 +http://ciscobinary.openh264.org/openh264-1.0.0-win64msvc.dll.bz2 + + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_api.h b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_api.h new file mode 100644 index 000000000..a1326c8f0 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_api.h @@ -0,0 +1,592 @@ +/*! + *@page License + * + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_VIDEO_CODEC_SVC_API_H__ +#define WELS_VIDEO_CODEC_SVC_API_H__ + +#ifndef __cplusplus +#if defined(_MSC_VER) && (_MSC_VER < 1800) +typedef unsigned char bool; +#else +#include +#endif +#endif + +#include "codec_app_def.h" +#include "codec_def.h" + +#if defined(_WIN32) || defined(__cdecl) +#define EXTAPI __cdecl +#else +#define EXTAPI +#endif + +/** + * @file codec_api.h +*/ + +/** + * @page Overview + * * This page is for openh264 codec API usage. + * * For how to use the encoder,please refer to page UsageExampleForEncoder + * * For how to use the decoder,please refer to page UsageExampleForDecoder + * * For more detail about ISVEncoder,please refer to page ISVCEncoder + * * For more detail about ISVDecoder,please refer to page ISVCDecoder +*/ + +/** + * @page DecoderUsageExample + * + * @brief + * * An example for using the decoder for Decoding only or Parsing only + * + * Step 1:decoder declaration + * @code + * + * //decoder declaration + * ISVCDecoder *pSvcDecoder; + * //input: encoded bitstream start position; should include start code prefix + * unsigned char *pBuf =...; + * //input: encoded bit stream length; should include the size of start code prefix + * int iSize =...; + * //output: [0~2] for Y,U,V buffer for Decoding only + * unsigned char *pData[3] =...; + * //in-out: for Decoding only: declare and initialize the output buffer info, this should never co-exist with Parsing only + * SBufferInfo sDstBufInfo; + * memset(&sDstBufInfo, 0, sizeof(SBufferInfo)); + * //in-out: for Parsing only: declare and initialize the output bitstream buffer info for parse only, this should never co-exist with Decoding only + * SParserBsInfo sDstParseInfo; + * memset(&sDstParseInfo, 0, sizeof(SParserBsInfo)); + * sDstParseInfo.pDstBuff = new unsigned char[PARSE_SIZE]; //In Parsing only, allocate enough buffer to save transcoded bitstream for a frame + * + * @endcode + * + * Step 2:decoder creation + * @code + * WelsCreateDecoder(&pSvcDecoder); + * @endcode + * + * Step 3:declare required parameter, used to differentiate Decoding only and Parsing only + * @code + * SDecodingParam sDecParam = {0}; + * sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_AVC; + * //for Parsing only, the assignment is mandatory + * sDecParam.bParseOnly = true; + * @endcode + * + * Step 4:initialize the parameter and decoder context, allocate memory + * @code + * pSvcDecoder->Initialize(&sDecParam); + * @endcode + * + * Step 5:do actual decoding process in slice level; + * this can be done in a loop until data ends + * @code + * //for Decoding only + * iRet = pSvcDecoder->DecodeFrameNoDelay(pBuf, iSize, pData, &sDstBufInfo); + * //or + * iRet = pSvcDecoder->DecodeFrame2(pBuf, iSize, pData, &sDstBufInfo); + * //for Parsing only + * iRet = pSvcDecoder->DecodeParser(pBuf, iSize, &sDstParseInfo); + * //decode failed + * If (iRet != 0){ + * //error handling (RequestIDR or something like that) + * } + * //for Decoding only, pData can be used for render. + * if (sDstBufInfo.iBufferStatus==1){ + * //output handling (pData[0], pData[1], pData[2]) + * } + * //for Parsing only, sDstParseInfo can be used for, e.g., HW decoding + * if (sDstBufInfo.iNalNum > 0){ + * //Hardware decoding sDstParseInfo; + * } + * //no-delay decoding can be realized by directly calling DecodeFrameNoDelay(), which is the recommended usage. + * //no-delay decoding can also be realized by directly calling DecodeFrame2() again with NULL input, as in the following. In this case, decoder would immediately reconstruct the input data. This can also be used similarly for Parsing only. Consequent decoding error and output indication should also be considered as above. + * iRet = pSvcDecoder->DecodeFrame2(NULL, 0, pData, &sDstBufInfo); + * //judge iRet, sDstBufInfo.iBufferStatus ... + * @endcode + * + * Step 6:uninitialize the decoder and memory free + * @code + * pSvcDecoder->Uninitialize(); + * @endcode + * + * Step 7:destroy the decoder + * @code + * DestroyDecoder(pSvcDecoder); + * @endcode + * +*/ + +/** + * @page EncoderUsageExample1 + * + * @brief + * * An example for using encoder with basic parameter + * + * Step1:setup encoder + * @code + * ISVCEncoder* encoder_; + * int rv = WelsCreateSVCEncoder (&encoder_); + * assert (rv == 0); + * assert (encoder_ != NULL); + * @endcode + * + * Step2:initilize with basic parameter + * @code + * SEncParamBase param; + * memset (¶m, 0, sizeof (SEncParamBase)); + * param.iUsageType = usageType; //from EUsageType enum + * param.fMaxFrameRate = frameRate; + * param.iPicWidth = width; + * param.iPicHeight = height; + * param.iTargetBitrate = 5000000; + * encoder_->Initialize (¶m); + * @endcode + * + * Step3:set option, set option during encoding process + * @code + * encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &g_LevelSetting); + * int videoFormat = videoFormatI420; + * encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat); + * @endcode + * + * Step4: encode and store ouput bistream + * @code + * int frameSize = width * height * 3 / 2; + * BufferedData buf; + * buf.SetLength (frameSize); + * assert (buf.Length() == (size_t)frameSize); + * SFrameBSInfo info; + * memset (&info, 0, sizeof (SFrameBSInfo)); + * SSourcePicture pic; + * memset (&pic, 0, sizeof (SsourcePicture)); + * pic.iPicWidth = width; + * pic.iPicHeight = height; + * pic.iColorFormat = videoFormatI420; + * pic.iStride[0] = pic.iPicWidth; + * pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1; + * pic.pData[0] = buf.data(); + * pic.pData[1] = pic.pData[0] + width * height; + * pic.pData[2] = pic.pData[1] + (width * height >> 2); + * for(int num = 0;numEncodeFrame (&pic, &info); + * assert (rv == cmResultSuccess); + * if (info.eFrameType != videoFrameTypeSkip) { + * //output bitstream handling + * } + * } + * @endcode + * + * Step5:teardown encoder + * @code + * if (encoder_) { + * encoder_->Uninitialize(); + * WelsDestroySVCEncoder (encoder_); + * } + * @endcode + * + */ + +/** + * @page EncoderUsageExample2 + * + * @brief + * * An example for using the encoder with extension parameter. + * * The same operation on Step 1,3,4,5 with Example-1 + * + * Step 2:initialize with extension parameter + * @code + * SEncParamExt param; + * encoder_->GetDefaultParams (¶m); + * param.iUsageType = usageType; + * param.fMaxFrameRate = frameRate; + * param.iPicWidth = width; + * param.iPicHeight = height; + * param.iTargetBitrate = 5000000; + * param.bEnableDenoise = denoise; + * param.iSpatialLayerNum = layers; + * //SM_DYN_SLICE don't support multi-thread now + * if (sliceMode != SM_SINGLE_SLICE && sliceMode != SM_DYN_SLICE) + * param.iMultipleThreadIdc = 2; + * + * for (int i = 0; i < param.iSpatialLayerNum; i++) { + * param.sSpatialLayers[i].iVideoWidth = width >> (param.iSpatialLayerNum - 1 - i); + * param.sSpatialLayers[i].iVideoHeight = height >> (param.iSpatialLayerNum - 1 - i); + * param.sSpatialLayers[i].fFrameRate = frameRate; + * param.sSpatialLayers[i].iSpatialBitrate = param.iTargetBitrate; + * + * param.sSpatialLayers[i].sSliceCfg.uiSliceMode = sliceMode; + * if (sliceMode == SM_DYN_SLICE) { + * param.sSpatialLayers[i].sSliceCfg.sSliceArgument.uiSliceSizeConstraint = 600; + * param.uiMaxNalSize = 1500; + * } + * } + * param.iTargetBitrate *= param.iSpatialLayerNum; + * encoder_->InitializeExt (¶m); + * int videoFormat = videoFormatI420; + * encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat); + * + * @endcode + */ + + + + +#ifdef __cplusplus +/** +* @brief Endocder definition +*/ +class ISVCEncoder { + public: + /** + * @brief Initialize the encoder + * @param pParam basic encoder parameter + * @return CM_RETURN: 0 - success; otherwise - failed; + */ + virtual int EXTAPI Initialize (const SEncParamBase* pParam) = 0; + + /** + * @brief Initilaize encoder by using extension parameters. + * @param pParam extension parameter for encoder + * @return CM_RETURN: 0 - success; otherwise - failed; + */ + virtual int EXTAPI InitializeExt (const SEncParamExt* pParam) = 0; + + /** + * @brief Get the default extension parameters. + * If you want to change some parameters of encoder, firstly you need to get the default encoding parameters, + * after that you can change part of parameters you want to. + * @param pParam extension parameter for encoder + * @return CM_RETURN: 0 - success; otherwise - failed; + * */ + virtual int EXTAPI GetDefaultParams (SEncParamExt* pParam) = 0; + /// uninitialize the encoder + virtual int EXTAPI Uninitialize() = 0; + + /** + * @brief Encode one frame + * @param kpSrcPic the pointer to the source luminance plane + * chrominance data: + * CbData = kpSrc + m_iMaxPicWidth * m_iMaxPicHeight; + * CrData = CbData + (m_iMaxPicWidth * m_iMaxPicHeight)/4; + * the application calling this interface needs to ensure the data validation between the location + * @param pBsInfo output bit stream + * @return 0 - success; otherwise -failed; + */ + virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo) = 0; + + /** + * @brief Encode the parameters from output bit stream + * @param pBsInfo output bit stream + * @return 0 - success; otherwise - failed; + */ + virtual int EXTAPI EncodeParameterSets (SFrameBSInfo* pBsInfo) = 0; + + /** + * @brief Force encoder to encoder frame as IDR if bIDR set as true + * @param bIDR true: force encoder to encode frame as IDR frame;false, return 1 and nothing to do + * @return 0 - success; otherwise - failed; + */ + virtual int EXTAPI ForceIntraFrame (bool bIDR, int iLayerId = -1) = 0; + + /** + * @brief Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION. + * @param pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,... + * @return CM_RETURN: 0 - success; otherwise - failed; + */ + virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) = 0; + + /** + * @brief Get option for encoder, detail option type, please refer to enumurate ENCODER_OPTION. + * @param pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,... + * @return CM_RETURN: 0 - success; otherwise - failed; + */ + virtual int EXTAPI GetOption (ENCODER_OPTION eOptionId, void* pOption) = 0; + virtual ~ISVCEncoder() {} +}; + + + +/** +* @brief Decoder definition +*/ +class ISVCDecoder { + public: + + /** + * @brief Initilaize decoder + * @param pParam parameter for decoder + * @return 0 - success; otherwise - failed; + */ + virtual long EXTAPI Initialize (const SDecodingParam* pParam) = 0; + + /// Uninitialize the decoder + virtual long EXTAPI Uninitialize() = 0; + + /** + * @brief Decode one frame + * @param pSrc the h264 stream to be decoded + * @param iSrcLen the length of h264 stream + * @param ppDst buffer pointer of decoded data (YUV) + * @param pStride output stride + * @param iWidth output width + * @param iHeight output height + * @return 0 - success; otherwise -failed; + */ + virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* pSrc, + const int iSrcLen, + unsigned char** ppDst, + int* pStride, + int& iWidth, + int& iHeight) = 0; + + /** + * @brief For slice level DecodeFrameNoDelay() (4 parameters input), + * whatever the function return value is, the output data + * of I420 format will only be available when pDstInfo->iBufferStatus == 1,. + * This function will parse and reconstruct the input frame immediately if it is complete + * It is recommended as the main decoding function for H.264/AVC format input + * @param pSrc the h264 stream to be decoded + * @param iSrcLen the length of h264 stream + * @param ppDst buffer pointer of decoded data (YUV) + * @param pDstInfo information provided to API(width, height, etc.) + * @return 0 - success; otherwise -failed; + */ + virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* pSrc, + const int iSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo) = 0; + + /** + * @brief For slice level DecodeFrame2() (4 parameters input), + * whatever the function return value is, the output data + * of I420 format will only be available when pDstInfo->iBufferStatus == 1,. + * (e.g., in multi-slice cases, only when the whole picture + * is completely reconstructed, this variable would be set equal to 1.) + * @param pSrc the h264 stream to be decoded + * @param iSrcLen the length of h264 stream + * @param ppDst buffer pointer of decoded data (YUV) + * @param pDstInfo information provided to API(width, height, etc.) + * @return 0 - success; otherwise -failed; + */ + virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* pSrc, + const int iSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo) = 0; + + + /** + * @brief This function gets a decoded ready frame remaining in buffers after the last frame has been decoded. + * Use GetOption with option DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER to get the number of frames remaining in buffers. + * Note that it is only applicable for profile_idc != 66 + * @param ppDst buffer pointer of decoded data (YUV) + * @param pDstInfo information provided to API(width, height, etc.) + * @return 0 - success; otherwise -failed; + */ + virtual DECODING_STATE EXTAPI FlushFrame (unsigned char** ppDst, + SBufferInfo* pDstInfo) = 0; + + /** + * @brief This function parse input bitstream only, and rewrite possible SVC syntax to AVC syntax + * @param pSrc the h264 stream to be decoded + * @param iSrcLen the length of h264 stream + * @param pDstInfo bit stream info + * @return 0 - success; otherwise -failed; + */ + virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* pSrc, + const int iSrcLen, + SParserBsInfo* pDstInfo) = 0; + + /** + * @brief This API does not work for now!! This is for future use to support non-I420 color format output. + * @param pSrc the h264 stream to be decoded + * @param iSrcLen the length of h264 stream + * @param pDst buffer pointer of decoded data (YUV) + * @param iDstStride output stride + * @param iDstLen bit stream info + * @param iWidth output width + * @param iHeight output height + * @param iColorFormat output color format + * @return to do ... + */ + virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* pSrc, + const int iSrcLen, + unsigned char* pDst, + int iDstStride, + int& iDstLen, + int& iWidth, + int& iHeight, + int& iColorFormat) = 0; + + /** + * @brief Set option for decoder, detail option type, please refer to enumurate DECODER_OPTION. + * @param pOption option for decoder such as OutDataFormat, Eos Flag, EC method, ... + * @return CM_RETURN: 0 - success; otherwise - failed; + */ + virtual long EXTAPI SetOption (DECODER_OPTION eOptionId, void* pOption) = 0; + + /** + * @brief Get option for decoder, detail option type, please refer to enumurate DECODER_OPTION. + * @param pOption option for decoder such as OutDataFormat, Eos Flag, EC method, ... + * @return CM_RETURN: 0 - success; otherwise - failed; + */ + virtual long EXTAPI GetOption (DECODER_OPTION eOptionId, void* pOption) = 0; + virtual ~ISVCDecoder() {} +}; + + +extern "C" +{ +#else + +typedef struct ISVCEncoderVtbl ISVCEncoderVtbl; +typedef const ISVCEncoderVtbl* ISVCEncoder; +struct ISVCEncoderVtbl { + +int (*Initialize) (ISVCEncoder*, const SEncParamBase* pParam); +int (*InitializeExt) (ISVCEncoder*, const SEncParamExt* pParam); + +int (*GetDefaultParams) (ISVCEncoder*, SEncParamExt* pParam); + +int (*Uninitialize) (ISVCEncoder*); + +int (*EncodeFrame) (ISVCEncoder*, const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo); +int (*EncodeParameterSets) (ISVCEncoder*, SFrameBSInfo* pBsInfo); + +int (*ForceIntraFrame) (ISVCEncoder*, bool bIDR); + +int (*SetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption); +int (*GetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption); +}; + +typedef struct ISVCDecoderVtbl ISVCDecoderVtbl; +typedef const ISVCDecoderVtbl* ISVCDecoder; +struct ISVCDecoderVtbl { +long (*Initialize) (ISVCDecoder*, const SDecodingParam* pParam); +long (*Uninitialize) (ISVCDecoder*); + +DECODING_STATE (*DecodeFrame) (ISVCDecoder*, const unsigned char* pSrc, + const int iSrcLen, + unsigned char** ppDst, + int* pStride, + int* iWidth, + int* iHeight); + +DECODING_STATE (*DecodeFrameNoDelay) (ISVCDecoder*, const unsigned char* pSrc, + const int iSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo); + +DECODING_STATE (*DecodeFrame2) (ISVCDecoder*, const unsigned char* pSrc, + const int iSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo); + +DECODING_STATE (*FlushFrame) (ISVCDecoder*, unsigned char** ppDst, + SBufferInfo* pDstInfo); + +DECODING_STATE (*DecodeParser) (ISVCDecoder*, const unsigned char* pSrc, + const int iSrcLen, + SParserBsInfo* pDstInfo); + +DECODING_STATE (*DecodeFrameEx) (ISVCDecoder*, const unsigned char* pSrc, + const int iSrcLen, + unsigned char* pDst, + int iDstStride, + int* iDstLen, + int* iWidth, + int* iHeight, + int* iColorFormat); + +long (*SetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption); +long (*GetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption); +}; +#endif + +typedef void (*WelsTraceCallback) (void* ctx, int level, const char* string); + +/** @brief Create encoder + * @param ppEncoder encoder + * @return 0 - success; otherwise - failed; +*/ +int WelsCreateSVCEncoder (ISVCEncoder** ppEncoder); + + +/** @brief Destroy encoder +* @param pEncoder encoder + * @return void +*/ +void WelsDestroySVCEncoder (ISVCEncoder* pEncoder); + + +/** @brief Get the capability of decoder + * @param pDecCapability decoder capability + * @return 0 - success; otherwise - failed; +*/ +int WelsGetDecoderCapability (SDecoderCapability* pDecCapability); + + +/** @brief Create decoder + * @param ppDecoder decoder + * @return 0 - success; otherwise - failed; +*/ +long WelsCreateDecoder (ISVCDecoder** ppDecoder); + + +/** @brief Destroy decoder + * @param pDecoder decoder + * @return void +*/ +void WelsDestroyDecoder (ISVCDecoder* pDecoder); + +/** @brief Get codec version + * Note, old versions of Mingw (GCC < 4.7) are buggy and use an + * incorrect/different ABI for calling this function, making it + * incompatible with MSVC builds. + * @return The linked codec version +*/ +OpenH264Version WelsGetCodecVersion (void); + +/** @brief Get codec version + * @param pVersion struct to fill in with the version +*/ +void WelsGetCodecVersionEx (OpenH264Version* pVersion); + +#ifdef __cplusplus +} +#endif + +#endif//WELS_VIDEO_CODEC_SVC_API_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_app_def.h b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_app_def.h new file mode 100644 index 000000000..bb3c3d67b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_app_def.h @@ -0,0 +1,810 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + + +#ifndef WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__ +#define WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__ +/** + * @file codec_app_def.h + * @brief Data and /or structures introduced in Cisco OpenH264 application +*/ + +#include "codec_def.h" +/* Constants */ +#define MAX_TEMPORAL_LAYER_NUM 4 +#define MAX_SPATIAL_LAYER_NUM 4 +#define MAX_QUALITY_LAYER_NUM 4 + +#define MAX_LAYER_NUM_OF_FRAME 128 +#define MAX_NAL_UNITS_IN_LAYER 128 ///< predetermined here, adjust it later if need + +#define MAX_RTP_PAYLOAD_LEN 1000 +#define AVERAGE_RTP_PAYLOAD_LEN 800 + + +#define SAVED_NALUNIT_NUM_TMP ( (MAX_SPATIAL_LAYER_NUM*MAX_QUALITY_LAYER_NUM) + 1 + MAX_SPATIAL_LAYER_NUM ) ///< SPS/PPS + SEI/SSEI + PADDING_NAL +#define MAX_SLICES_NUM_TMP ( ( MAX_NAL_UNITS_IN_LAYER - SAVED_NALUNIT_NUM_TMP ) / 3 ) + + +#define AUTO_REF_PIC_COUNT -1 ///< encoder selects the number of reference frame automatically +#define UNSPECIFIED_BIT_RATE 0 ///< to do: add detail comment + +/** + * @brief Struct of OpenH264 version + */ +/// +/// E.g. SDK version is 1.2.0.0, major version number is 1, minor version number is 2, and revision number is 0. +typedef struct _tagVersion { + unsigned int uMajor; ///< The major version number + unsigned int uMinor; ///< The minor version number + unsigned int uRevision; ///< The revision number + unsigned int uReserved; ///< The reserved number, it should be 0. +} OpenH264Version; + +/** +* @brief Decoding status +*/ +typedef enum { + /** + * Errors derived from bitstream parsing + */ + dsErrorFree = 0x00, ///< bit stream error-free + dsFramePending = 0x01, ///< need more throughput to generate a frame output, + dsRefLost = 0x02, ///< layer lost at reference frame with temporal id 0 + dsBitstreamError = 0x04, ///< error bitstreams(maybe broken internal frame) the decoder cared + dsDepLayerLost = 0x08, ///< dependented layer is ever lost + dsNoParamSets = 0x10, ///< no parameter set NALs involved + dsDataErrorConcealed = 0x20, ///< current data error concealed specified + dsRefListNullPtrs = 0x40, /// do not write any of the following information to the header + unsigned char + uiVideoFormat; // EVideoFormatSPS; 3 bits in header; 0-5 => component, kpal, ntsc, secam, mac, undef + bool bFullRange; // false => analog video data range [16, 235]; true => full data range [0,255] + bool bColorDescriptionPresent; // false => do not write any of the following three items to the header + unsigned char + uiColorPrimaries; // EColorPrimaries; 8 bits in header; 0 - 9 => ???, bt709, undef, ???, bt470m, bt470bg, + // smpte170m, smpte240m, film, bt2020 + unsigned char + uiTransferCharacteristics; // ETransferCharacteristics; 8 bits in header; 0 - 15 => ???, bt709, undef, ???, bt470m, bt470bg, smpte170m, + // smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1, bt2020-10, bt2020-12 + unsigned char + uiColorMatrix; // EColorMatrix; 8 bits in header (corresponds to FFmpeg "colorspace"); 0 - 10 => GBR, bt709, + // undef, ???, fcc, bt470bg, smpte170m, smpte240m, YCgCo, bt2020nc, bt2020c + + bool bAspectRatioPresent; ///< aspect ratio present in VUI + ESampleAspectRatio eAspectRatio; ///< aspect ratio idc + unsigned short sAspectRatioExtWidth; ///< use if aspect ratio idc == 255 + unsigned short sAspectRatioExtHeight; ///< use if aspect ratio idc == 255 + +} SSpatialLayerConfig; + +/** +* @brief Encoder usage type +*/ +typedef enum { + CAMERA_VIDEO_REAL_TIME, ///< camera video for real-time communication + SCREEN_CONTENT_REAL_TIME, ///< screen content signal + CAMERA_VIDEO_NON_REAL_TIME, + SCREEN_CONTENT_NON_REAL_TIME, + INPUT_CONTENT_TYPE_ALL, +} EUsageType; + +/** +* @brief Enumulate the complexity mode +*/ +typedef enum { + LOW_COMPLEXITY = 0, ///< the lowest compleixty,the fastest speed, + MEDIUM_COMPLEXITY, ///< medium complexity, medium speed,medium quality + HIGH_COMPLEXITY ///< high complexity, lowest speed, high quality +} ECOMPLEXITY_MODE; + +/** + * @brief Enumulate for the stategy of SPS/PPS strategy + */ +typedef enum { + CONSTANT_ID = 0, ///< constant id in SPS/PPS + INCREASING_ID = 0x01, ///< SPS/PPS id increases at each IDR + SPS_LISTING = 0x02, ///< using SPS in the existing list if possible + SPS_LISTING_AND_PPS_INCREASING = 0x03, + SPS_PPS_LISTING = 0x06, +} EParameterSetStrategy; + +// TODO: Refine the parameters definition. +/** +* @brief SVC Encoding Parameters +*/ +typedef struct TagEncParamBase { + EUsageType + iUsageType; ///< application type; please refer to the definition of EUsageType + + int iPicWidth; ///< width of picture in luminance samples (the maximum of all layers if multiple spatial layers presents) + int iPicHeight; ///< height of picture in luminance samples((the maximum of all layers if multiple spatial layers presents) + int iTargetBitrate; ///< target bitrate desired, in unit of bps + RC_MODES iRCMode; ///< rate control mode + float fMaxFrameRate; ///< maximal input frame rate + +} SEncParamBase, *PEncParamBase; + +/** +* @brief SVC Encoding Parameters extention +*/ +typedef struct TagEncParamExt { + EUsageType + iUsageType; ///< same as in TagEncParamBase + + int iPicWidth; ///< same as in TagEncParamBase + int iPicHeight; ///< same as in TagEncParamBase + int iTargetBitrate; ///< same as in TagEncParamBase + RC_MODES iRCMode; ///< same as in TagEncParamBase + float fMaxFrameRate; ///< same as in TagEncParamBase + + int iTemporalLayerNum; ///< temporal layer number, max temporal layer = 4 + int iSpatialLayerNum; ///< spatial layer number,1<= iSpatialLayerNum <= MAX_SPATIAL_LAYER_NUM, MAX_SPATIAL_LAYER_NUM = 4 + SSpatialLayerConfig sSpatialLayers[MAX_SPATIAL_LAYER_NUM]; + + ECOMPLEXITY_MODE iComplexityMode; + unsigned int uiIntraPeriod; ///< period of Intra frame + int iNumRefFrame; ///< number of reference frame used + EParameterSetStrategy + eSpsPpsIdStrategy; ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional + bool bPrefixNalAddingCtrl; ///< false:not use Prefix NAL; true: use Prefix NAL + bool bEnableSSEI; ///< false:not use SSEI; true: use SSEI -- TODO: planning to remove the interface of SSEI + bool bSimulcastAVC; ///< (when encoding more than 1 spatial layer) false: use SVC syntax for higher layers; true: use Simulcast AVC + int iPaddingFlag; ///< 0:disable padding;1:padding + int iEntropyCodingModeFlag; ///< 0:CAVLC 1:CABAC. + + /* rc control */ + bool bEnableFrameSkip; ///< False: don't skip frame even if VBV buffer overflow.True: allow skipping frames to keep the bitrate within limits + int iMaxBitrate; ///< the maximum bitrate, in unit of bps, set it to UNSPECIFIED_BIT_RATE if not needed + int iMaxQp; ///< the maximum QP encoder supports + int iMinQp; ///< the minmum QP encoder supports + unsigned int uiMaxNalSize; ///< the maximum NAL size. This value should be not 0 for dynamic slice mode + + /*LTR settings*/ + bool bEnableLongTermReference; ///< 1: on, 0: off + int iLTRRefNum; ///< the number of LTR(long term reference),TODO: not supported to set it arbitrary yet + unsigned int iLtrMarkPeriod; ///< the LTR marked period that is used in feedback. + /* multi-thread settings*/ + unsigned short + iMultipleThreadIdc; ///< 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; lager than 1: count number of threads; + bool bUseLoadBalancing; ///< only used when uiSliceMode=1 or 3, will change slicing of a picture during the run-time of multi-thread encoding, so the result of each run may be different + + /* Deblocking loop filter */ + int iLoopFilterDisableIdc; ///< 0: on, 1: off, 2: on except for slice boundaries + int iLoopFilterAlphaC0Offset; ///< AlphaOffset: valid range [-6, 6], default 0 + int iLoopFilterBetaOffset; ///< BetaOffset: valid range [-6, 6], default 0 + /*pre-processing feature*/ + bool bEnableDenoise; ///< denoise control + bool bEnableBackgroundDetection; ///< background detection control //VAA_BACKGROUND_DETECTION //BGD cmd + bool bEnableAdaptiveQuant; ///< adaptive quantization control + bool bEnableFrameCroppingFlag; ///< enable frame cropping flag: TRUE always in application + bool bEnableSceneChangeDetect; + + bool bIsLosslessLink; ///< LTR advanced setting +} SEncParamExt; + +/** +* @brief Define a new struct to show the property of video bitstream. +*/ +typedef struct { + unsigned int size; ///< size of the struct + VIDEO_BITSTREAM_TYPE eVideoBsType; ///< video stream type (AVC/SVC) +} SVideoProperty; + +/** +* @brief SVC Decoding Parameters, reserved here and potential applicable in the future +*/ +typedef struct TagSVCDecodingParam { + char* pFileNameRestructed; ///< file name of reconstructed frame used for PSNR calculation based debug + + unsigned int uiCpuLoad; ///< CPU load + unsigned char uiTargetDqLayer; ///< setting target dq layer id + + ERROR_CON_IDC eEcActiveIdc; ///< whether active error concealment feature in decoder + bool bParseOnly; ///< decoder for parse only, no reconstruction. When it is true, SPS/PPS size should not exceed SPS_PPS_BS_SIZE (128). Otherwise, it will return error info + + SVideoProperty sVideoProperty; ///< video stream property +} SDecodingParam, *PDecodingParam; + +/** +* @brief Bitstream inforamtion of a layer being encoded +*/ +typedef struct { + unsigned char uiTemporalId; + unsigned char uiSpatialId; + unsigned char uiQualityId; + EVideoFrameType eFrameType; + unsigned char uiLayerType; + + /** + * The sub sequence layers are ordered hierarchically based on their dependency on each other so that any picture in a layer shall not be + * predicted from any picture on any higher layer. + */ + int iSubSeqId; ///< refer to D.2.11 Sub-sequence information SEI message semantics + int iNalCount; ///< count number of NAL coded already + int* pNalLengthInByte; ///< length of NAL size in byte from 0 to iNalCount-1 + unsigned char* pBsBuf; ///< buffer of bitstream contained +} SLayerBSInfo, *PLayerBSInfo; + +/** +* @brief Frame bit stream info +*/ +typedef struct { + int iLayerNum; + SLayerBSInfo sLayerInfo[MAX_LAYER_NUM_OF_FRAME]; + + EVideoFrameType eFrameType; + int iFrameSizeInBytes; + long long uiTimeStamp; +} SFrameBSInfo, *PFrameBSInfo; + +/** +* @brief Structure for source picture +*/ +typedef struct Source_Picture_s { + int iColorFormat; ///< color space type + int iStride[4]; ///< stride for each plane pData + unsigned char* pData[4]; ///< plane pData + int iPicWidth; ///< luma picture width in x coordinate + int iPicHeight; ///< luma picture height in y coordinate + long long uiTimeStamp; ///< timestamp of the source picture, unit: millisecond +} SSourcePicture; +/** +* @brief Structure for bit rate info +*/ +typedef struct TagBitrateInfo { + LAYER_NUM iLayer; + int iBitrate; ///< the maximum bitrate +} SBitrateInfo; + +/** +* @brief Structure for dump layer info +*/ +typedef struct TagDumpLayer { + int iLayer; + char* pFileName; +} SDumpLayer; + +/** +* @brief Structure for profile info in layer +* +*/ +typedef struct TagProfileInfo { + int iLayer; + EProfileIdc uiProfileIdc; ///< the profile info +} SProfileInfo; + +/** +* @brief Structure for level info in layer +* +*/ +typedef struct TagLevelInfo { + int iLayer; + ELevelIdc uiLevelIdc; ///< the level info +} SLevelInfo; +/** +* @brief Structure for dilivery status +* +*/ +typedef struct TagDeliveryStatus { + bool bDeliveryFlag; ///< 0: the previous frame isn't delivered,1: the previous frame is delivered + int iDropFrameType; ///< the frame type that is dropped; reserved + int iDropFrameSize; ///< the frame size that is dropped; reserved +} SDeliveryStatus; + +/** +* @brief The capability of decoder, for SDP negotiation +*/ +typedef struct TagDecoderCapability { + int iProfileIdc; ///< profile_idc + int iProfileIop; ///< profile-iop + int iLevelIdc; ///< level_idc + int iMaxMbps; ///< max-mbps + int iMaxFs; ///< max-fs + int iMaxCpb; ///< max-cpb + int iMaxDpb; ///< max-dpb + int iMaxBr; ///< max-br + bool bRedPicCap; ///< redundant-pic-cap +} SDecoderCapability; + +/** +* @brief Structure for parse only output +*/ +typedef struct TagParserBsInfo { + int iNalNum; ///< total NAL number in current AU + int* pNalLenInByte; ///< each nal length + unsigned char* pDstBuff; ///< outputted dst buffer for parsed bitstream + int iSpsWidthInPixel; ///< required SPS width info + int iSpsHeightInPixel; ///< required SPS height info + unsigned long long uiInBsTimeStamp; ///< input BS timestamp + unsigned long long uiOutBsTimeStamp; ///< output BS timestamp +} SParserBsInfo, *PParserBsInfo; + +/** +* @brief Structure for encoder statistics +*/ +typedef struct TagVideoEncoderStatistics { + unsigned int uiWidth; ///< the width of encoded frame + unsigned int uiHeight; ///< the height of encoded frame + //following standard, will be 16x aligned, if there are multiple spatial, this is of the highest + float fAverageFrameSpeedInMs; ///< average_Encoding_Time + + // rate control related + float fAverageFrameRate; ///< the average frame rate in, calculate since encoding starts, supposed that the input timestamp is in unit of ms + float fLatestFrameRate; ///< the frame rate in, in the last second, supposed that the input timestamp is in unit of ms (? useful for checking BR, but is it easy to calculate? + unsigned int uiBitRate; ///< sendrate in Bits per second, calculated within the set time-window + unsigned int uiAverageFrameQP; ///< the average QP of last encoded frame + + unsigned int uiInputFrameCount; ///< number of frames + unsigned int uiSkippedFrameCount; ///< number of frames + + unsigned int uiResolutionChangeTimes; ///< uiResolutionChangeTimes + unsigned int uiIDRReqNum; ///< number of IDR requests + unsigned int uiIDRSentNum; ///< number of actual IDRs sent + unsigned int uiLTRSentNum; ///< number of LTR sent/marked + + long long iStatisticsTs; ///< Timestamp of updating the statistics + + unsigned long iTotalEncodedBytes; + unsigned long iLastStatisticsBytes; + unsigned long iLastStatisticsFrameCount; +} SEncoderStatistics; + +/** +* @brief Structure for decoder statistics +*/ +typedef struct TagVideoDecoderStatistics { + unsigned int uiWidth; ///< the width of encode/decode frame + unsigned int uiHeight; ///< the height of encode/decode frame + float fAverageFrameSpeedInMs; ///< average_Decoding_Time + float fActualAverageFrameSpeedInMs; ///< actual average_Decoding_Time, including freezing pictures + unsigned int uiDecodedFrameCount; ///< number of frames + unsigned int uiResolutionChangeTimes; ///< uiResolutionChangeTimes + unsigned int uiIDRCorrectNum; ///< number of correct IDR received + //EC on related + unsigned int + uiAvgEcRatio; ///< when EC is on, the average ratio of total EC areas, can be an indicator of reconstruction quality + unsigned int + uiAvgEcPropRatio; ///< when EC is on, the rough average ratio of propogate EC areas, can be an indicator of reconstruction quality + unsigned int uiEcIDRNum; ///< number of actual unintegrity IDR or not received but eced + unsigned int uiEcFrameNum; ///< + unsigned int uiIDRLostNum; ///< number of whole lost IDR + unsigned int + uiFreezingIDRNum; ///< number of freezing IDR with error (partly received), under resolution change + unsigned int uiFreezingNonIDRNum; ///< number of freezing non-IDR with error + int iAvgLumaQp; ///< average luma QP. default: -1, no correct frame outputted + int iSpsReportErrorNum; ///< number of Sps Invalid report + int iSubSpsReportErrorNum; ///< number of SubSps Invalid report + int iPpsReportErrorNum; ///< number of Pps Invalid report + int iSpsNoExistNalNum; ///< number of Sps NoExist Nal + int iSubSpsNoExistNalNum; ///< number of SubSps NoExist Nal + int iPpsNoExistNalNum; ///< number of Pps NoExist Nal + + unsigned int uiProfile; ///< Profile idc in syntax + unsigned int uiLevel; ///< level idc according to Annex A-1 + + int iCurrentActiveSpsId; ///< current active SPS id + int iCurrentActivePpsId; ///< current active PPS id + + unsigned int iStatisticsLogInterval; ///< frame interval of statistics log +} SDecoderStatistics; // in building, coming soon + +/** +* @brief Structure for sample aspect ratio (SAR) info in VUI +*/ +typedef struct TagVuiSarInfo { + unsigned int uiSarWidth; ///< SAR width + unsigned int uiSarHeight; ///< SAR height + bool bOverscanAppropriateFlag; ///< SAR overscan flag +} SVuiSarInfo, *PVuiSarInfo; + +#endif//WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_def.h b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_def.h new file mode 100644 index 000000000..edde5f4a2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_def.h @@ -0,0 +1,216 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_VIDEO_CODEC_DEFINITION_H__ +#define WELS_VIDEO_CODEC_DEFINITION_H__ + +/** + * @file codec_def.h +*/ + +/** +* @brief Enumerate the type of video format +*/ +typedef enum { + videoFormatRGB = 1, ///< rgb color formats + videoFormatRGBA = 2, + videoFormatRGB555 = 3, + videoFormatRGB565 = 4, + videoFormatBGR = 5, + videoFormatBGRA = 6, + videoFormatABGR = 7, + videoFormatARGB = 8, + + videoFormatYUY2 = 20, ///< yuv color formats + videoFormatYVYU = 21, + videoFormatUYVY = 22, + videoFormatI420 = 23, ///< the same as IYUV + videoFormatYV12 = 24, + videoFormatInternal = 25, ///< only used in SVC decoder testbed + + videoFormatNV12 = 26, ///< new format for output by DXVA decoding + + videoFormatVFlip = 0x80000000 +} EVideoFormatType; + +/** +* @brief Enumerate video frame type +*/ +typedef enum { + videoFrameTypeInvalid, ///< encoder not ready or parameters are invalidate + videoFrameTypeIDR, ///< IDR frame in H.264 + videoFrameTypeI, ///< I frame type + videoFrameTypeP, ///< P frame type + videoFrameTypeSkip, ///< skip the frame based encoder kernel + videoFrameTypeIPMixed ///< a frame where I and P slices are mixing, not supported yet +} EVideoFrameType; + +/** +* @brief Enumerate return type +*/ +typedef enum { + cmResultSuccess, ///< successful + cmInitParaError, ///< parameters are invalid + cmUnknownReason, + cmMallocMemeError, ///< malloc a memory error + cmInitExpected, ///< initial action is expected + cmUnsupportedData +} CM_RETURN; + +/** +* @brief Enumulate the nal unit type +*/ +enum ENalUnitType { + NAL_UNKNOWN = 0, + NAL_SLICE = 1, + NAL_SLICE_DPA = 2, + NAL_SLICE_DPB = 3, + NAL_SLICE_DPC = 4, + NAL_SLICE_IDR = 5, ///< ref_idc != 0 + NAL_SEI = 6, ///< ref_idc == 0 + NAL_SPS = 7, + NAL_PPS = 8 + ///< ref_idc == 0 for 6,9,10,11,12 +}; + +/** +* @brief NRI: eNalRefIdc +*/ +enum ENalPriority { + NAL_PRIORITY_DISPOSABLE = 0, + NAL_PRIORITY_LOW = 1, + NAL_PRIORITY_HIGH = 2, + NAL_PRIORITY_HIGHEST = 3 +}; + +#define IS_PARAMETER_SET_NAL(eNalRefIdc, eNalType) \ +( (eNalRefIdc == NAL_PRIORITY_HIGHEST) && (eNalType == (NAL_SPS|NAL_PPS) || eNalType == NAL_SPS) ) + +#define IS_IDR_NAL(eNalRefIdc, eNalType) \ +( (eNalRefIdc == NAL_PRIORITY_HIGHEST) && (eNalType == NAL_SLICE_IDR) ) + +#define FRAME_NUM_PARAM_SET (-1) +#define FRAME_NUM_IDR 0 + +/** + * @brief eDeblockingIdc + */ +enum { + DEBLOCKING_IDC_0 = 0, + DEBLOCKING_IDC_1 = 1, + DEBLOCKING_IDC_2 = 2 +}; +#define DEBLOCKING_OFFSET (6) +#define DEBLOCKING_OFFSET_MINUS (-6) + +/* Error Tools definition */ +typedef unsigned short ERR_TOOL; + +/** + @brief to do +*/ +enum { + ET_NONE = 0x00, ///< NONE Error Tools + ET_IP_SCALE = 0x01, ///< IP Scalable + ET_FMO = 0x02, ///< Flexible Macroblock Ordering + ET_IR_R1 = 0x04, ///< Intra Refresh in predifined 2% MB + ET_IR_R2 = 0x08, ///< Intra Refresh in predifined 5% MB + ET_IR_R3 = 0x10, ///< Intra Refresh in predifined 10% MB + ET_FEC_HALF = 0x20, ///< Forward Error Correction in 50% redundency mode + ET_FEC_FULL = 0x40, ///< Forward Error Correction in 100% redundency mode + ET_RFS = 0x80 ///< Reference Frame Selection +}; + +/** +* @brief Information of coded Slice(=NAL)(s) +*/ +typedef struct SliceInformation { + unsigned char* pBufferOfSlices; ///< base buffer of coded slice(s) + int iCodedSliceCount; ///< number of coded slices + unsigned int* pLengthOfSlices; ///< array of slices length accordingly by number of slice + int iFecType; ///< FEC type[0, 50%FEC, 100%FEC] + unsigned char uiSliceIdx; ///< index of slice in frame [FMO: 0,..,uiSliceCount-1; No FMO: 0] + unsigned char uiSliceCount; ///< count number of slice in frame [FMO: 2-8; No FMO: 1] + char iFrameIndex; ///< index of frame[-1, .., idr_interval-1] + unsigned char uiNalRefIdc; ///< NRI, priority level of slice(NAL) + unsigned char uiNalType; ///< NAL type + unsigned char + uiContainingFinalNal; ///< whether final NAL is involved in buffer of coded slices, flag used in Pause feature in T27 +} SliceInfo, *PSliceInfo; + +/** +* @brief thresholds of the initial, maximal and minimal rate +*/ +typedef struct { + int iWidth; ///< frame width + int iHeight; ///< frame height + int iThresholdOfInitRate; ///< threshold of initial rate + int iThresholdOfMaxRate; ///< threshold of maximal rate + int iThresholdOfMinRate; ///< threshold of minimal rate + int iMinThresholdFrameRate; ///< min frame rate min + int iSkipFrameRate; ///< skip to frame rate min + int iSkipFrameStep; ///< how many frames to skip +} SRateThresholds, *PRateThresholds; + +/** +* @brief Structure for decoder memery +*/ +typedef struct TagSysMemBuffer { + int iWidth; ///< width of decoded pic for display + int iHeight; ///< height of decoded pic for display + int iFormat; ///< type is "EVideoFormatType" + int iStride[2]; ///< stride of 2 component +} SSysMEMBuffer; + +/** +* @brief Buffer info +*/ +typedef struct TagBufferInfo { + int iBufferStatus; ///< 0: one frame data is not ready; 1: one frame data is ready + unsigned long long uiInBsTimeStamp; ///< input BS timestamp + unsigned long long uiOutYuvTimeStamp; ///< output YUV timestamp, when bufferstatus is 1 + union { + SSysMEMBuffer sSystemBuffer; ///< memory info for one picture + } UsrData; ///< output buffer info + unsigned char* pDst[3]; //point to picture YUV data +} SBufferInfo; + + +/** +* @brief In a GOP, multiple of the key frame number, derived from +* the number of layers(index or array below) +*/ +static const char kiKeyNumMultiple[] = { + 1, 1, 2, 4, 8, 16, +}; + +#endif//WELS_VIDEO_CODEC_DEFINITION_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_ver.h b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_ver.h new file mode 100644 index 000000000..a4e494f6b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/api/svc/codec_ver.h @@ -0,0 +1,15 @@ +//The current file is auto-generated by script: generate_codec_ver.sh +#ifndef CODEC_VER_H +#define CODEC_VER_H + +#include "codec_app_def.h" + +static const OpenH264Version g_stCodecVersion = {2, 1, 0, 2002}; +static const char* const g_strCodecVer = "OpenH264 version:2.1.0.2002"; + +#define OPENH264_MAJOR (2) +#define OPENH264_MINOR (1) +#define OPENH264_REVISION (0) +#define OPENH264_RESERVED (2002) + +#endif // CODEC_VER_H diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/arm_arch_common_macro.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/arm_arch_common_macro.S new file mode 100644 index 000000000..361d2eb26 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/arm_arch_common_macro.S @@ -0,0 +1,83 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON + +.syntax unified + +#ifdef __APPLE__ + +.text + +.macro WELS_ASM_FUNC_BEGIN +.align 2 +.arm +.globl _$0 +_$0: +.endm + +.macro WELS_ASM_FUNC_END +mov pc, lr +.endm +#else + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits // Mark stack as non-executable +#endif +.text +#ifdef __ELF__ +.arch armv7-a +.fpu neon +#endif + +.macro WELS_ASM_FUNC_BEGIN funcName +.align 2 +.arm +.global \funcName +#ifdef __ELF__ +.type \funcName, %function +#endif +#ifndef __clang__ +.func \funcName +#endif +\funcName: +.endm + +.macro WELS_ASM_FUNC_END +mov pc, lr +#ifndef __clang__ +.endfunc +#endif +.endm +#endif + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/copy_mb_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/copy_mb_neon.S new file mode 100644 index 000000000..8b9877374 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/copy_mb_neon.S @@ -0,0 +1,161 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: \arg0~\arg3, src*, src_stride + vld1.64 {\arg0}, [\arg4,:128], \arg5 + vld1.64 {\arg1}, [\arg4,:128], \arg5 + vld1.64 {\arg2}, [\arg4,:128], \arg5 + vld1.64 {\arg3}, [\arg4,:128], \arg5 +// } +.endm + +.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: \arg0~\arg3, dst*, dst_stride + vst1.64 {\arg0}, [\arg4,:128], \arg5 + vst1.64 {\arg1}, [\arg4,:128], \arg5 + vst1.64 {\arg2}, [\arg4,:128], \arg5 + vst1.64 {\arg3}, [\arg4,:128], \arg5 +// } +.endm + +.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: \arg0~\arg3, src*, src_stride + vld1.64 {\arg0}, [\arg4], \arg5 + vld1.64 {\arg1}, [\arg4], \arg5 + vld1.64 {\arg2}, [\arg4], \arg5 + vld1.64 {\arg3}, [\arg4], \arg5 +// } +.endm + +.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: \arg0~\arg3, dst*, dst_stride + vst1.64 {\arg0}, [\arg4], \arg5 + vst1.64 {\arg1}, [\arg4], \arg5 + vst1.64 {\arg2}, [\arg4], \arg5 + vst1.64 {\arg3}, [\arg4], \arg5 +// } +.endm + + +WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon + + LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1 + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon + + LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3 + + STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1 + + LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3 + + STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1 + + LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3 + + STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1 + + LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3 + + STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1 + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon + + LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1 + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon + + LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1 + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon + + LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3 + + STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1 + +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/deblocking_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/deblocking_neon.S new file mode 100644 index 000000000..293c91765 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/deblocking_neon.S @@ -0,0 +1,857 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON + +#include "arm_arch_common_macro.S" + +.macro JMP_IF_128BITS_IS_ZERO arg0, arg1, arg2 + vorr.s16 \arg2, \arg0, \arg1 + vmov r3, r2, \arg2 + orr r3, r3, r2 + cmp r3, #0 +.endm + +.macro MASK_MATRIX arg0, arg1, arg2, arg3, arg4, arg5, arg6 + vabd.u8 \arg6, \arg1, \arg2 + vcgt.u8 \arg6, \arg4, \arg6 + + vabd.u8 \arg4, \arg0, \arg1 + vclt.u8 \arg4, \arg4, \arg5 + vand.u8 \arg6, \arg6, \arg4 + + vabd.u8 \arg4, \arg3, \arg2 + vclt.u8 \arg4, \arg4, \arg5 + vand.u8 \arg6, \arg6, \arg4 +.endm + +.macro DIFF_LUMA_LT4_P1_Q1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 + vmov.i8 \arg9, #128 + vrhadd.u8 \arg8, \arg2, \arg3 + vhadd.u8 \arg8, \arg0, \arg8 + vsub.s8 \arg8, \arg8, \arg9 + vsub.s8 \arg9, \arg1, \arg9 + vqsub.s8 \arg8, \arg8, \arg9 + vmax.s8 \arg8, \arg8, \arg5 + vmin.s8 \arg8, \arg8, \arg6 + vabd.u8 \arg9, \arg0, \arg2 + vclt.u8 \arg9, \arg9, \arg4 + vand.s8 \arg8, \arg8, \arg9 + vand.s8 \arg8, \arg8, \arg7 + vadd.u8 \arg8, \arg1, \arg8 + vabs.s8 \arg9, \arg9 +.endm + +.macro DIFF_LUMA_LT4_P0_Q0 arg0, arg1, arg2, arg3, arg4, arg5, arg6 + vsubl.u8 \arg5, \arg0, \arg3 + vsubl.u8 \arg6, \arg2, \arg1 + vshl.s16 \arg6, \arg6, #2 + vadd.s16 \arg5, \arg5, \arg6 + vqrshrn.s16 \arg4, \arg5, #3 +.endm + + +.macro DIFF_LUMA_EQ4_P2P1P0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 + vaddl.u8 q4, \arg1, \arg2 + vaddl.u8 q5, \arg3, \arg4 + vadd.u16 q5, q4, q5 + + vaddl.u8 q4, \arg0, \arg1 + vshl.u16 q4, q4, #1 + vadd.u16 q4, q5, q4 + + vrshrn.u16 \arg0, q5, #2 + vrshrn.u16 \arg7, q4, #3 + + vshl.u16 q5, q5, #1 + vsubl.u8 q4, \arg5, \arg1 + vadd.u16 q5, q4,q5 + + vaddl.u8 q4, \arg2, \arg5 + vaddw.u8 q4, q4, \arg2 + vaddw.u8 q4, q4, \arg3 + + vrshrn.u16 d10,q5, #3 + vrshrn.u16 d8, q4, #2 + vbsl.u8 \arg6, d10, d8 +.endm + +.macro DIFF_LUMA_EQ4_MASK arg0, arg1, arg2, arg3 + vmov \arg3, \arg2 + vbsl.u8 \arg3, \arg0, \arg1 +.endm + +.macro DIFF_CHROMA_EQ4_P0Q0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 + vaddl.u8 \arg4, \arg0, \arg3 + vaddw.u8 \arg5, \arg4, \arg1 + vaddw.u8 \arg6, \arg4, \arg2 + vaddw.u8 \arg5, \arg5, \arg0 + vaddw.u8 \arg6, \arg6, \arg3 + vrshrn.u16 \arg7, \arg5, #2 + vrshrn.u16 \arg8, \arg6, #2 +.endm + +.macro LOAD_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 + vld4.u8 {\arg0[\arg8],\arg1[\arg8],\arg2[\arg8],\arg3[\arg8]}, [r0], r2 + vld4.u8 {\arg4[\arg8],\arg5[\arg8],\arg6[\arg8],\arg7[\arg8]}, [r1], r2 +.endm + +.macro STORE_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 + vst4.u8 {\arg0[\arg8],\arg1[\arg8],\arg2[\arg8],\arg3[\arg8]}, [r0], r2 + vst4.u8 {\arg4[\arg8],\arg5[\arg8],\arg6[\arg8],\arg7[\arg8]}, [r1], r2 +.endm + +.macro LOAD_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6 + vld3.u8 {\arg0[\arg6],\arg1[\arg6],\arg2[\arg6]}, [r2], r1 + vld3.u8 {\arg3[\arg6],\arg4[\arg6],\arg5[\arg6]}, [r0], r1 +.endm + +.macro STORE_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5 + vst4.u8 {\arg0[\arg4],\arg1[\arg4],\arg2[\arg4],\arg3[\arg4]}, [r0], r1 + vst4.u8 {\arg0[\arg5],\arg1[\arg5],\arg2[\arg5],\arg3[\arg5]}, [r2], r1 +.endm + +.macro STORE_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6 + vst3.u8 {\arg0[\arg6],\arg1[\arg6],\arg2[\arg6]}, [r3], r1 + vst3.u8 {\arg3[\arg6],\arg4[\arg6],\arg5[\arg6]}, [r0], r1 +.endm + +.macro EXTRACT_DELTA_INTO_TWO_PART arg0, arg1 + vcge.s8 \arg1, \arg0, #0 + vand \arg1, \arg0, \arg1 + vsub.s8 \arg0, \arg1, \arg0 +.endm + +WELS_ASM_FUNC_BEGIN DeblockLumaLt4V_neon + vpush {q4-q7} + vdup.u8 q11, r2 + vdup.u8 q9, r3 + + add r2, r1, r1, lsl #1 + sub r2, r0, r2 + vld1.u8 {q0}, [r2], r1 + vld1.u8 {q3}, [r0], r1 + vld1.u8 {q1}, [r2], r1 + vld1.u8 {q4}, [r0], r1 + vld1.u8 {q2}, [r2] + vld1.u8 {q5}, [r0] + sub r2, r2, r1 + + ldr r3, [sp, #64] + vld1.s8 {d31}, [r3] + vdup.s8 d28, d31[0] + vdup.s8 d30, d31[1] + vdup.s8 d29, d31[2] + vdup.s8 d31, d31[3] + vtrn.32 d28, d30 + vtrn.32 d29, d31 + vcge.s8 q10, q14, #0 + + MASK_MATRIX q1, q2, q3, q4, q11, q9, q15 + vand.u8 q10, q10, q15 + + veor q15, q15 + vsub.i8 q15,q15,q14 + + DIFF_LUMA_LT4_P1_Q1 q0, q1, q2, q3, q9, q15, q14, q10, q6, q12 + vst1.u8 {q6}, [r2], r1 + + DIFF_LUMA_LT4_P1_Q1 q5, q4, q3, q2, q9, q15, q14, q10, q7, q13 + + vabs.s8 q12, q12 + vabs.s8 q13, q13 + vadd.u8 q14,q14,q12 + vadd.u8 q14,q14,q13 + veor q15, q15 + vsub.i8 q15,q15,q14 + + DIFF_LUMA_LT4_P0_Q0 d2, d4, d6, d8, d16, q12, q13 + DIFF_LUMA_LT4_P0_Q0 d3, d5, d7, d9, d17, q12, q13 + vmax.s8 q8, q8, q15 + vmin.s8 q8, q8, q14 + vand.s8 q8, q8, q10 + EXTRACT_DELTA_INTO_TWO_PART q8, q9 + vqadd.u8 q2, q2, q9 + vqsub.u8 q2, q2, q8 + vst1.u8 {q2}, [r2], r1 + vqsub.u8 q3, q3, q9 + vqadd.u8 q3, q3, q8 + vst1.u8 {q3}, [r2] , r1 + vst1.u8 {q7}, [r2] + + vpop {q4-q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN DeblockLumaEq4V_neon + vpush {q4-q7} + + vdup.u8 q5, r2 + vdup.u8 q4, r3 + + sub r3, r0, r1, lsl #2 + vld1.u8 {q8}, [r3], r1 + vld1.u8 {q12}, [r0], r1 + vld1.u8 {q9}, [r3], r1 + vld1.u8 {q13}, [r0], r1 + vld1.u8 {q10}, [r3], r1 + vld1.u8 {q14}, [r0], r1 + vld1.u8 {q11}, [r3] + vld1.u8 {q15}, [r0] + sub r3, r3, r1 , lsl #1 + + MASK_MATRIX q10, q11, q12, q13, q5, q4, q6 + + mov r2, r2, lsr #2 + add r2, r2, #2 + vdup.u8 q5, r2 + vabd.u8 q0, q11, q12 + vclt.u8 q7, q0, q5 + + vabd.u8 q1, q9, q11 + vclt.u8 q1, q1, q4 + vand.s8 q1, q1, q7 + + vabd.u8 q2, q14,q12 + vclt.u8 q2, q2, q4 + vand.s8 q2, q2, q7 + vand.u8 q7, q7, q6 + + vmov q3, q1 + + DIFF_LUMA_EQ4_P2P1P0 d16, d18, d20, d22, d24, d26, d2, d0 + DIFF_LUMA_EQ4_P2P1P0 d17, d19, d21, d23, d25, d27, d3, d1 + + vand.u8 q3, q7, q3 + DIFF_LUMA_EQ4_MASK q0, q9, q3, q4 + vst1.u8 {q4}, [r3], r1 + DIFF_LUMA_EQ4_MASK q8,q10, q3, q4 + vst1.u8 {q4}, [r3], r1 + DIFF_LUMA_EQ4_MASK q1,q11, q6, q4 + vst1.u8 {q4}, [r3], r1 + + vmov q0, q2 + DIFF_LUMA_EQ4_P2P1P0 d30, d28, d26, d24, d22, d20, d4, d6 + DIFF_LUMA_EQ4_P2P1P0 d31, d29, d27, d25, d23, d21, d5, d7 + + vand.u8 q0, q7, q0 + DIFF_LUMA_EQ4_MASK q2, q12, q6, q4 + vst1.u8 {q4}, [r3], r1 + DIFF_LUMA_EQ4_MASK q15, q13, q0, q4 + vst1.u8 {q4}, [r3], r1 + DIFF_LUMA_EQ4_MASK q3, q14, q0, q4 + vst1.u8 {q4}, [r3], r1 + + vpop {q4-q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN DeblockLumaLt4H_neon + vpush {q4-q7} + + vdup.u8 q11, r2 + vdup.u8 q9, r3 + + sub r2, r0, #3 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 0 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 1 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 2 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 3 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 4 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 5 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 6 + LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 7 + + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 0 + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 1 + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 2 + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 3 + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 4 + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 5 + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 6 + LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 7 + + vswp d1, d2 + vswp d3, d4 + vswp d1, d4 + vswp d7, d8 + vswp d9, d10 + vswp d7, d10 + + sub r0, r0, r1, lsl #4 + + ldr r3, [sp, #64] + vld1.s8 {d31}, [r3] + vdup.s8 d28, d31[0] + vdup.s8 d30, d31[1] + vdup.s8 d29, d31[2] + vdup.s8 d31, d31[3] + vtrn.32 d28, d30 + vtrn.32 d29, d31 + vcge.s8 q10, q14, #0 + + MASK_MATRIX q1, q2, q3, q4, q11, q9, q15 + vand.u8 q10, q10, q15 + + veor q15, q15 + vsub.i8 q15,q15,q14 + + DIFF_LUMA_LT4_P1_Q1 q0, q1, q2, q3, q9, q15, q14, q10, q6, q12 + DIFF_LUMA_LT4_P1_Q1 q5, q4, q3, q2, q9, q15, q14, q10, q7, q13 + + vabs.s8 q12, q12 + vabs.s8 q13, q13 + vadd.u8 q14,q14,q12 + vadd.u8 q14,q14,q13 + veor q15, q15 + vsub.i8 q15,q15,q14 + + DIFF_LUMA_LT4_P0_Q0 d2, d4, d6, d8, d16, q12, q13 + DIFF_LUMA_LT4_P0_Q0 d3, d5, d7, d9, d17, q12, q13 + vmax.s8 q8, q8, q15 + vmin.s8 q8, q8, q14 + vand.s8 q8, q8, q10 + EXTRACT_DELTA_INTO_TWO_PART q8, q9 + vqadd.u8 q2, q2, q9 + vqsub.u8 q2, q2, q8 + + vqsub.u8 q3, q3, q9 + vqadd.u8 q3, q3, q8 + + sub r0, #2 + add r2, r0, r1 + lsl r1, #1 + + vmov q1, q6 + vmov q4, q7 + + vswp q2, q3 + vswp d3, d6 + vswp d5, d8 + + STORE_LUMA_DATA_4 d2, d3, d4, d5, 0, 1 + STORE_LUMA_DATA_4 d2, d3, d4, d5, 2, 3 + STORE_LUMA_DATA_4 d2, d3, d4, d5, 4, 5 + STORE_LUMA_DATA_4 d2, d3, d4, d5, 6, 7 + + STORE_LUMA_DATA_4 d6, d7, d8, d9, 0, 1 + STORE_LUMA_DATA_4 d6, d7, d8, d9, 2, 3 + STORE_LUMA_DATA_4 d6, d7, d8, d9, 4, 5 + STORE_LUMA_DATA_4 d6, d7, d8, d9, 6, 7 + + vpop {q4-q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN DeblockLumaEq4H_neon + vpush {q4-q7} + vdup.u8 q5, r2 + vdup.u8 q4, r3 + + sub r3, r0, #4 // pix -= 4 + + vld1.u8 {d16}, [r3], r1 + vld1.u8 {d17}, [r3], r1 + vld1.u8 {d18}, [r3], r1 + vld1.u8 {d19}, [r3], r1 + vld1.u8 {d20}, [r3], r1 + vld1.u8 {d21}, [r3], r1 + vld1.u8 {d22}, [r3], r1 + vld1.u8 {d23}, [r3], r1 + vld1.u8 {d24}, [r3], r1 + vld1.u8 {d25}, [r3], r1 + vld1.u8 {d26}, [r3], r1 + vld1.u8 {d27}, [r3], r1 + vld1.u8 {d28}, [r3], r1 + vld1.u8 {d29}, [r3], r1 + vld1.u8 {d30}, [r3], r1 + vld1.u8 {d31}, [r3], r1 + + vtrn.u32 d16, d20 + vtrn.u32 d17, d21 + vtrn.u32 d18, d22 + vtrn.u32 d19, d23 + vtrn.u32 d24, d28 + vtrn.u32 d25, d29 + vtrn.u32 d26, d30 + vtrn.u32 d27, d31 + + vtrn.u16 d16, d18 + vtrn.u16 d17, d19 + vtrn.u16 d20, d22 + vtrn.u16 d21, d23 + vtrn.u16 d24, d26 + vtrn.u16 d25, d27 + vtrn.u16 d28, d30 + vtrn.u16 d29, d31 + + vtrn.u8 d16, d17 + vtrn.u8 d18, d19 + vtrn.u8 d20, d21 + vtrn.u8 d22, d23 + vtrn.u8 d24, d25 + vtrn.u8 d26, d27 + vtrn.u8 d28, d29 + vtrn.u8 d30, d31 + + vswp d17, d24 + vswp d19, d26 + vswp d21, d28 + vswp d23, d30 + + vswp q12, q9 + vswp q14, q11 + + vswp q12, q10 + vswp q13, q11 + + MASK_MATRIX q10, q11, q12, q13, q5, q4, q6 + + mov r2, r2, lsr #2 + add r2, r2, #2 + vdup.u8 q5, r2 + vabd.u8 q0, q11, q12 + vclt.u8 q7, q0, q5 + + vabd.u8 q1, q9, q11 + vclt.u8 q1, q1, q4 + vand.s8 q1, q1, q7 + + vabd.u8 q2, q14,q12 + vclt.u8 q2, q2, q4 + vand.s8 q2, q2, q7 + vand.u8 q7, q7, q6 + + vmov q3, q1 + + DIFF_LUMA_EQ4_P2P1P0 d16, d18, d20, d22, d24, d26, d2, d0 + DIFF_LUMA_EQ4_P2P1P0 d17, d19, d21, d23, d25, d27, d3, d1 + + vand.u8 q3, q7, q3 + DIFF_LUMA_EQ4_MASK q0, q9, q3, q4 + vmov q9, q4 + vbsl.u8 q3, q8, q10 + DIFF_LUMA_EQ4_MASK q1,q11, q6, q8 + + vand.u8 q7, q7, q2 + + DIFF_LUMA_EQ4_P2P1P0 d30, d28, d26, d24, d22, d20, d4, d0 + DIFF_LUMA_EQ4_P2P1P0 d31, d29, d27, d25, d23, d21, d5, d1 + + vbsl.u8 q6, q2, q12 + DIFF_LUMA_EQ4_MASK q15, q13, q7, q4 + + vbsl.u8 q7, q0, q14 + + vmov q5, q6 + vmov q2, q9 + vmov q6, q4 + vmov q4, q8 + + vswp d8, d6 + vswp d5, d7 + vswp d5, d8 + vswp d14, d12 + vswp d11, d13 + vswp d11, d14 + + sub r3, r0, #3 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,0 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,1 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,2 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,3 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,4 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,5 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,6 + STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,7 + + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,0 + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,1 + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,2 + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,3 + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,4 + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,5 + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,6 + STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,7 + + vpop {q4-q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN DeblockChromaLt4V_neon + vdup.u8 q11, r3 + ldr r3, [sp, #0] + + sub r0, r0, r2 , lsl #1 + sub r1, r1, r2, lsl #1 + vdup.u8 q9, r3 + ldr r3, [sp, #4] + + vld1.u8 {d0}, [r0], r2 + vld1.u8 {d1}, [r1], r2 + vld1.u8 {d2}, [r0], r2 + vld1.u8 {d3}, [r1], r2 + vld1.u8 {d4}, [r0], r2 + vld1.u8 {d5}, [r1], r2 + vld1.u8 {d6}, [r0] + vld1.u8 {d7}, [r1] + + sub r0, r0, r2, lsl #1 + sub r1, r1, r2, lsl #1 + + vld1.s8 {d31}, [r3] + vmovl.u8 q14,d31 + vshl.u64 d29,d28,#8 + vorr d28,d29 + vmov d29, d28 + veor q15, q15 + vsub.i8 q15,q15,q14 + + MASK_MATRIX q0, q1, q2, q3, q11, q9, q10 + + DIFF_LUMA_LT4_P0_Q0 d0, d2, d4, d6, d16, q12, q13 + DIFF_LUMA_LT4_P0_Q0 d1, d3, d5, d7, d17, q12, q13 + vmax.s8 q8, q8, q15 + vmin.s8 q8, q8, q14 + + vand.s8 q8, q8, q10 + vcge.s8 q14, q14, #0 + vand.s8 q8, q8, q14 + EXTRACT_DELTA_INTO_TWO_PART q8, q10 + vqadd.u8 q1, q1, q10 + vqsub.u8 q1, q1, q8 + vst1.u8 {d2}, [r0], r2 + vst1.u8 {d3}, [r1], r2 + vqsub.u8 q2, q2, q10 + vqadd.u8 q2, q2, q8 + vst1.u8 {d4}, [r0] + vst1.u8 {d5}, [r1] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN DeblockChromaEq4V_neon + vpush {q4-q5} + + vdup.u8 q11, r3 + ldr r3, [sp, #32] + + sub r0, r0, r2 , lsl #1 + sub r1, r1, r2, lsl #1 + vdup.u8 q9, r3 + vld1.u8 {d0}, [r0], r2 // q0::p1 + vld1.u8 {d1}, [r1], r2 + vld1.u8 {d2}, [r0], r2 // q1::p0 + vld1.u8 {d3}, [r1], r2 + vld1.u8 {d4}, [r0], r2 // q2::q0 + vld1.u8 {d5}, [r1], r2 + vld1.u8 {d6}, [r0] // q3::q1 + vld1.u8 {d7}, [r1] + + sub r0, r0, r2, lsl #1 // pix = [-1*src_stride] + sub r1, r1, r2, lsl #1 + + MASK_MATRIX q0, q1, q2, q3, q11, q9, q10 + + vmov q11, q10 + + DIFF_CHROMA_EQ4_P0Q0 d0, d2, d4, d6, q4, q5, q8, d30, d0 // Cb::p0' q0' + DIFF_CHROMA_EQ4_P0Q0 d1, d3, d5, d7, q12, q13, q14, d31, d1 // Cr::p0' q0' + + vbsl.u8 q10, q15, q1 + vst1.u8 {d20}, [r0], r2 + vst1.u8 {d21}, [r1], r2 + + vbsl.u8 q11, q0, q2 + vst1.u8 {d22}, [r0] + vst1.u8 {d23}, [r1] + + vpop {q4-q5} +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN DeblockChromaLt4H_neon + + vdup.u8 q11, r3 + ldr r3, [sp, #0] + + sub r0, r0, #2 + vdup.u8 q9, r3 + ldr r3, [sp, #4] + sub r1, r1, #2 + vld1.s8 {d31}, [r3] + + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7 + vswp q1, q2 + vswp d1, d2 + vswp d6, d5 + + vmovl.u8 q14, d31 + vshl.u64 d29,d28,#8 + vorr d28,d29 + vmov d29, d28 + veor q15, q15 + vsub.i8 q15,q15,q14 + + MASK_MATRIX q0, q1, q2, q3, q11, q9, q10 + + DIFF_LUMA_LT4_P0_Q0 d0, d2, d4, d6, d16, q12, q13 + DIFF_LUMA_LT4_P0_Q0 d1, d3, d5, d7, d17, q12, q13 + vmax.s8 q8, q8, q15 + vmin.s8 q8, q8, q14 + + vand.s8 q8, q8, q10 + vcge.s8 q14, q14, #0 + vand.s8 q8, q8, q14 + EXTRACT_DELTA_INTO_TWO_PART q8, q10 + vqadd.u8 q1, q1, q10 + vqsub.u8 q1, q1, q8 + vqsub.u8 q2, q2, q10 + vqadd.u8 q2, q2, q8 + + sub r0, r0, r2, lsl #3 + sub r1, r1, r2, lsl #3 + vswp d1, d2 + vswp d6, d5 + vswp q1, q2 + + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7 + +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN DeblockChromaEq4H_neon + vpush {q4-q5} + vdup.u8 q11, r3 + ldr r3, [sp, #32] + + sub r0, r0, #2 + sub r1, r1, #2 + + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6 + LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7 + vswp q1, q2 + vswp d1, d2 + vswp d6, d5 + + vdup.u8 q9, r3 + MASK_MATRIX q0, q1, q2, q3, q11, q9, q10 + vmov q11, q10 + + DIFF_CHROMA_EQ4_P0Q0 d0, d2, d4, d6, q8, q9, q12, d8, d10 + DIFF_CHROMA_EQ4_P0Q0 d1, d3, d5, d7, q13, q14, q15, d9, d11 + + vbsl.u8 q10, q4, q1 + vbsl.u8 q11, q5, q2 + sub r0, r0, r2, lsl #3 // pix: 0th row [-2] + sub r1, r1, r2, lsl #3 + + vmov q1, q10 + vmov q2, q11 + vswp d1, d2 + vswp d6, d5 + vswp q1, q2 + // Cb:d0d1d2d3, Cr:d4d5d6d7 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6 + STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7 + + vpop {q4-q5} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsNonZeroCount_neon + mov r1, #1 + vdup.8 q2, r1 + vld1.64 {d0,d1,d2}, [r0] + vmin.s8 q0, q0, q2 + vmin.s8 d2, d2, d4 + vst1.64 {d0,d1,d2}, [r0] +WELS_ASM_FUNC_END + +.macro BS_NZC_CHECK arg0, arg1, arg2, arg3, arg4 + vld1.8 {d0,d1}, [\arg0] + /* Arrenge the input data --- TOP */ + ands r6, \arg1, #2 + beq bs_nzc_check_jump0 + + sub r6, \arg0, \arg2, lsl #4 + sub r6, r6, \arg2, lsl #3 + add r6, #12 + vld1.32 d3[1], [r6] + +bs_nzc_check_jump0: + vext.8 q1, q1, q0, #12 + vadd.u8 \arg3, q0, q1 + + + /* Arrenge the input data --- LEFT */ + ands r6, \arg1, #1 + beq bs_nzc_check_jump1 + + sub r6, \arg0, #21 + add r7, r6, #4 + vld1.8 d3[4], [r6] + add r6, r7, #4 + vld1.8 d3[5], [r7] + add r7, r6, #4 + vld1.8 d3[6], [r6] + vld1.8 d3[7], [r7] + +bs_nzc_check_jump1: + vzip.8 d0, d1 + vzip.8 d0, d1 + vext.8 q1, q1, q0, #12 + vadd.u8 \arg4, q0, q1 +.endm + +.macro BS_COMPARE_MV arg0, arg1, arg2, arg3, arg4, arg5, arg6 //in: $0,$1(const),$2(const),$3(const),$4(const); out:$5, $6 + mov r6, #4 + vabd.s16 q8, \arg0, \arg1 + vabd.s16 q9, \arg1, \arg2 + vdup.s16 \arg0, r6 + vabd.s16 q10, \arg2, \arg3 + vabd.s16 q11, \arg3, \arg4 + + vcge.s16 q8, \arg0 + vcge.s16 q9, \arg0 + vcge.s16 q10, \arg0 + vcge.s16 q11, \arg0 + + vpadd.i16 d16, d16, d17 + vpadd.i16 d17, d18, d19 + vpadd.i16 d18, d20, d21 + vpadd.i16 d19, d22, d23 + + vaddhn.i16 \arg5, q8, q8 + vaddhn.i16 \arg6, q9, q9 +.endm + +.macro BS_MV_CHECK arg0, arg1, arg2, arg3, arg4, arg5, arg6 + vldm \arg0, {q0,q1,q2,q3} + + /* Arrenge the input data --- TOP */ + ands r6, \arg1, #2 + beq bs_mv_check_jump0 + + sub r6, \arg0, \arg2, lsl #6 + add r6, #48 + vld1.8 {d8, d9}, [r6] + +bs_mv_check_jump0: + BS_COMPARE_MV q4, q0, q1, q2, q3, \arg3, \arg4 + + /* Arrenge the input data --- LEFT */ + ands r6, \arg1, #1 + beq bs_mv_check_jump1 + + sub r6, \arg0, #52 + add r7, r6, #16 + vld1.32 d8[0], [r6] + add r6, r7, #16 + vld1.32 d8[1], [r7] + add r7, r6, #16 + vld1.32 d9[0], [r6] + vld1.32 d9[1], [r7] + +bs_mv_check_jump1: + vzip.32 q0, q2 + vzip.32 q1, q3 + vzip.32 q0, q1 + vzip.32 q2, q3 + BS_COMPARE_MV q4, q0, q1, q2, q3, \arg5, \arg6 +.endm + + +WELS_ASM_FUNC_BEGIN DeblockingBSCalcEnc_neon + + stmdb sp!, {r5-r7} + vpush {q4} + + ldr r5, [sp, #28] //Save BS to r5 + + /* Checking the nzc status */ + BS_NZC_CHECK r0, r2, r3, q14, q15 //q14,q15 save the nzc status + + /* For checking bS[I] = 2 */ + mov r6, #2 + vcgt.s8 q14, q14, #0 + vdup.u8 q0, r6 + vcgt.s8 q15, q15, #0 + + vand.u8 q14, q14, q0 //q14 save the nzc check result all the time --- for dir is top + vand.u8 q15, q15, q0 //q15 save the nzc check result all the time --- for dir is left + + /* Checking the mv status*/ + BS_MV_CHECK r1, r2, r3, d24, d25, d26, d27//q12, q13 save the mv status + + /* For checking bS[I] = 1 */ + mov r6, #1 + vdup.u8 q0, r6 + + vand.u8 q12, q12, q0 //q12 save the nzc check result all the time --- for dir is top + vand.u8 q13, q13, q0 //q13 save the nzc check result all the time --- for dir is left + + + /* Check bS[I] is '1' or '2' */ + vmax.u8 q1, q12, q14 + vmax.u8 q0, q13, q15 + + //vstm r5, {q0, q1} + vst1.32 {q0, q1}, [r5] + vpop {q4} + ldmia sp!, {r5-r7} +WELS_ASM_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/expand_picture_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/expand_picture_neon.S new file mode 100644 index 000000000..5dc403dac --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/expand_picture_neon.S @@ -0,0 +1,154 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + + +WELS_ASM_FUNC_BEGIN ExpandPictureLuma_neon + stmdb sp!, {r4-r8} + //Save the dst + mov r7, r0 + mov r8, r3 + + add r4, r7, r2 + sub r4, #1 + //For the left and right expand +_expand_picture_luma_loop2: + sub r5, r7, #32 + add r6, r4, #1 + + vld1.8 {d0[], d1[]}, [r7], r1 + vld1.8 {d2[], d3[]}, [r4], r1 + + vst1.8 {q0}, [r5]! + vst1.8 {q0}, [r5] + vst1.8 {q1}, [r6]! + vst1.8 {q1}, [r6] + subs r8, #1 + bne _expand_picture_luma_loop2 + + //for the top and bottom expand + add r2, #64 + sub r0, #32 + mla r4, r1, r3, r0 + sub r4, r1 +_expand_picture_luma_loop0: + mov r5, #32 + mls r5, r5, r1, r0 + add r6, r4, r1 + vld1.8 {q0}, [r0]! + vld1.8 {q1}, [r4]! + + mov r8, #32 +_expand_picture_luma_loop1: + vst1.8 {q0}, [r5], r1 + vst1.8 {q1}, [r6], r1 + subs r8, #1 + bne _expand_picture_luma_loop1 + + subs r2, #16 + bne _expand_picture_luma_loop0 + + //vldreq.32 d0, [r0] + + ldmia sp!, {r4-r8} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN ExpandPictureChroma_neon + stmdb sp!, {r4-r9} + //Save the dst + mov r7, r0 + mov r8, r3 + + add r4, r7, r2 + sub r4, #1 + //For the left and right expand +_expand_picture_chroma_loop2: + sub r5, r7, #16 + add r6, r4, #1 + + vld1.8 {d0[], d1[]}, [r7], r1 + vld1.8 {d2[], d3[]}, [r4], r1 + + vst1.8 {q0}, [r5] + vst1.8 {q1}, [r6] + subs r8, #1 + bne _expand_picture_chroma_loop2 + + //for the top and bottom expand + add r2, #32 + mov r9, r2 + bic r2, #15 + sub r0, #16 + mla r4, r1, r3, r0 + sub r4, r1 +_expand_picture_chroma_loop0: + mov r5, #16 + mls r5, r5, r1, r0 + add r6, r4, r1 + vld1.8 {q0}, [r0]! + vld1.8 {q1}, [r4]! + + mov r8, #16 +_expand_picture_chroma_loop1: + vst1.8 {q0}, [r5], r1 + vst1.8 {q1}, [r6], r1 + subs r8, #1 + bne _expand_picture_chroma_loop1 + + subs r2, #16 + bne _expand_picture_chroma_loop0 + + //vldreq.32 d0, [r0] + + and r9, #15 + cmp r9, #8 + bne _expand_picture_chroma_end + mov r5, #16 + mls r5, r5, r1, r0 + add r6, r4, r1 + vld1.8 {d0}, [r0]! + vld1.8 {d2}, [r4]! + mov r8, #16 +_expand_picture_chroma_loop3: + vst1.8 {d0}, [r5], r1 + vst1.8 {d2}, [r6], r1 + subs r8, #1 + bne _expand_picture_chroma_loop3 +_expand_picture_chroma_end: + + ldmia sp!, {r4-r9} +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/intra_pred_common_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/intra_pred_common_neon.S new file mode 100644 index 000000000..66617a6a6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/intra_pred_common_neon.S @@ -0,0 +1,82 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon + //Get the top line data to 'q0' + sub r3, r1, r2 + vldm r3, {d0, d1} + + //mov r2, #16 + mov r3, #4 + //Set the top line to the each line of MB(16*16) +loop_0_get_i16x16_luma_pred_v: + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_v +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon + //stmdb sp!, {r4, lr} + sub r1, r1, #1 + mov r3, #4 +loop_0_get_i16x16_luma_pred_h: + //Get one byte data from left side + vld1.8 {d0[],d1[]}, [r1], r2 + vld1.8 {d2[],d3[]}, [r1], r2 + vld1.8 {d4[],d5[]}, [r1], r2 + vld1.8 {d6[],d7[]}, [r1], r2 + + //Set the line of MB using the left side byte data + vst1.8 {d0,d1}, [r0]! + //add r0, #16 + vst1.8 {d2,d3}, [r0]! + //add r0, #16 + vst1.8 {d4,d5}, [r0]! + //add r0, #16 + vst1.8 {d6,d7}, [r0]! + //add r0, #16 + + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_h + +WELS_ASM_FUNC_END + + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/mc_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/mc_neon.S new file mode 100644 index 000000000..8e1d2ba10 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm/mc_neon.S @@ -0,0 +1,2283 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +.macro AVERAGE_TWO_8BITS arg0, arg1, arg2 +// { // input:dst_d, src_d A and B; working: q13 + vaddl.u8 q13, \arg2, \arg1 + vrshrn.u16 \arg0, q13, #1 +// } +.endm + +.macro FILTER_6TAG_8BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: q12, q13 + vaddl.u8 q12, \arg0, \arg5 //q12=src[-2]+src[3] + vaddl.u8 q13, \arg2, \arg3 //src[0]+src[1] + vmla.u16 q12, q13, \arg7 //q12 += 20*(src[0]+src[1]), 2 cycles + vaddl.u8 q13, \arg1, \arg4 //src[-1]+src[2] + vmls.s16 q12, q13, \arg8 //q12 -= 5*(src[-1]+src[2]), 2 cycles + vqrshrun.s16 \arg6, q12, #5 +// } +.endm + +.macro FILTER_SINGLE_TAG_8BITS arg0, arg1,arg2, arg3, arg4 // when width=17/9, used +// { // input: src_d{Y[0][1][2][3][4][5]X, the even of working_q2} + vrev64.8 \arg2, \arg0 // X[5][4][3][2][1][0]O + vaddl.u8 \arg3, \arg0, \arg2 // each 16bits, *[50][41][32][23][14][05]* + vmul.s16 \arg0, \arg2, \arg1 // 0+1*[50]-5*[41]+20[32] + vpadd.s16 \arg0, \arg0, \arg0 + vpadd.s16 \arg0, \arg0, \arg0 + vqrshrun.s16 \arg0, \arg4, #5 +// } +.endm + +.macro FILTER_6TAG_8BITS_AVERAGE_WITH_0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: q12, q13 + vaddl.u8 q12, \arg0, \arg5 //q12=src[-2]+src[3] + vaddl.u8 q13, \arg2, \arg3 //src[0]+src[1] + vmla.u16 q12, q13, \arg7 //q12 += 20*(src[0]+src[1]), 2 cycles + vaddl.u8 q13, \arg1, \arg4 //src[-1]+src[2] + vmls.s16 q12, q13, \arg8 //q12 -= 5*(src[-1]+src[2]), 2 cycles + vqrshrun.s16 \arg6, q12, #5 + vaddl.u8 q13, \arg2, \arg6 + vrshrn.u16 \arg6, q13, #1 +// } +.endm + +.macro FILTER_6TAG_8BITS_AVERAGE_WITH_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: q12, q13 + vaddl.u8 q12, \arg0, \arg5 //q12=src[-2]+src[3] + vaddl.u8 q13, \arg2, \arg3 //src[0]+src[1] + vmla.u16 q12, q13, \arg7 //q12 += 20*(src[0]+src[1]), 2 cycles + vaddl.u8 q13, \arg1, \arg4 //src[-1]+src[2] + vmls.s16 q12, q13, \arg8 //q12 -= 5*(src[-1]+src[2]), 2 cycles + vqrshrun.s16 \arg6, q12, #5 + vaddl.u8 q13, \arg3, \arg6 + vrshrn.u16 \arg6, q13, #1 +// } +.endm + +.macro FILTER_6TAG_8BITS_TO_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:d_src[-2], d_src[-1], d_src[0], d_src[1], d_src[2], d_src[3], dst_q, multiplier a/b; working:q13 + vaddl.u8 \arg6, \arg0, \arg5 //dst_q=src[-2]+src[3] + vaddl.u8 q13, \arg2, \arg3 //src[0]+src[1] + vmla.u16 \arg6, q13, \arg7 //dst_q += 20*(src[0]+src[1]), 2 cycles + vaddl.u8 q13, \arg1, \arg4 //src[-1]+src[2] + vmls.s16 \arg6, q13, \arg8 //dst_q -= 5*(src[-1]+src[2]), 2 cycles +// } +.endm + +.macro FILTER_3_IN_16BITS_TO_8BITS arg0, arg1, arg2, arg3 +// { // input:a, b, c, dst_d; + vsub.s16 \arg0, \arg0, \arg1 //a-b + vshr.s16 \arg0, \arg0, #2 //(a-b)/4 + vsub.s16 \arg0, \arg0, \arg1 //(a-b)/4-b + vadd.s16 \arg0, \arg0, \arg2 //(a-b)/4-b+c + vshr.s16 \arg0, \arg0, #2 //((a-b)/4-b+c)/4 + vadd.s16 \arg0, \arg0, \arg2 //((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16 + vqrshrun.s16 \arg3, \arg0, #6 //(+32)>>6 +// } +.endm + +.macro UNPACK_2_16BITS_TO_ABC arg0, arg1, arg2, arg3, arg4 +// { // input:q_src[-2:5], q_src[6:13](avail 8+5)/q_src[6:**](avail 4+5), dst_a, dst_b, dst_c; + vext.16 \arg4, \arg0, \arg1, #2 //src[0] + vext.16 \arg3, \arg0, \arg1, #3 //src[1] + vadd.s16 \arg4, \arg3 //c=src[0]+src[1] + + vext.16 \arg3, \arg0, \arg1, #1 //src[-1] + vext.16 \arg2, \arg0, \arg1, #4 //src[2] + vadd.s16 \arg3,\arg2 //b=src[-1]+src[2] + + vext.16 \arg2, \arg0, \arg1, #5 //src[3] + vadd.s16 \arg2, \arg0 //a=src[-2]+src[3] +// } +.endm + +.macro UNPACK_1_IN_8x16BITS_TO_8BITS arg0, arg1,arg2, arg3 +// { // each 16bits; input: d_dst, d_src[0:3] (even), d_src[4:5]+%% (odd) + vext.16 \arg3, \arg3, \arg3, #7 // 0x????, [0][1][2][3][4][5] + vrev64.16 \arg1, \arg1 + vadd.u16 \arg2, \arg1 // C[2+3],B[1+4],A[0+5] + vshr.s64 \arg1, \arg2, #16 + vshr.s64 \arg0, \arg2, #32 // Output: C \arg2, B \arg1, A \arg0 + + vsub.s16 \arg0, \arg0, \arg1 //a-b + vshr.s16 \arg0, \arg0, #2 //(a-b)/4 + vsub.s16 \arg0, \arg0, \arg1 //(a-b)/4-b + vadd.s16 \arg0, \arg0, \arg2 //(a-b)/4-b+c + vshr.s16 \arg0, \arg0, #2 //((a-b)/4-b+c)/4 + vadd.s16 \arg1, \arg0, \arg2 //((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16 + vqrshrun.s16 \arg0, \arg3, #6 //(+32)>>6 +// } +.endm + +WELS_ASM_FUNC_BEGIN McHorVer20WidthEq16_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w16_h_mc_luma_loop: + vld1.u8 {d0,d1,d2}, [r0], r1 //only use 21(16+5); q0=src[-2] + pld [r0] + pld [r0, #16] + + vext.8 q2, q0, q1, #1 //q2=src[-1] + vext.8 q3, q0, q1, #2 //q3=src[0] + vext.8 q8, q0, q1, #3 //q8=src[1] + vext.8 q9, q0, q1, #4 //q9=src[2] + vext.8 q10, q0, q1, #5 //q10=src[3] + + FILTER_6TAG_8BITS d0, d4, d6, d16, d18, d20, d2, q14, q15 + + FILTER_6TAG_8BITS d1, d5, d7, d17, d19, d21, d3, q14, q15 + + sub r4, #1 + vst1.u8 {d2, d3}, [r2], r3 //write 16Byte + + cmp r4, #0 + bne w16_h_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer20WidthEq8_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w8_h_mc_luma_loop: + vld1.u8 {d0,d1}, [r0], r1 //only use 13(8+5); q0=src[-2] + pld [r0] + + vext.8 d2, d0, d1, #1 //d2=src[-1] + vext.8 d3, d0, d1, #2 //d3=src[0] + vext.8 d4, d0, d1, #3 //d4=src[1] + vext.8 d5, d0, d1, #4 //d5=src[2] + vext.8 d6, d0, d1, #5 //d6=src[3] + + FILTER_6TAG_8BITS d0, d2, d3, d4, d5, d6, d1, q14, q15 + + sub r4, #1 + vst1.u8 {d1}, [r2], r3 + + cmp r4, #0 + bne w8_h_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer20WidthEq4_neon + push {r4, r5, r6} + ldr r6, [sp, #12] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w4_h_mc_luma_loop: + vld1.u8 {d0, d1}, [r0], r1 //only use 9(4+5);d0: 1st row src[-2:5] + pld [r0] + vld1.u8 {d2, d3}, [r0], r1 //d2: 2nd row src[-2:5] + pld [r0] + + vext.8 d4, d0, d1, #1 //d4: 1st row src[-1:6] + vext.8 d5, d2, d3, #1 //d5: 2nd row src[-1:6] + vext.8 q3, q2, q2, #1 //src[0:6 *] + vext.8 q8, q2, q2, #2 //src[1:6 * *] + + vtrn.32 q3, q8 //q3::d6:1st row [0:3]+[1:4]; d7:2nd row [0:3]+[1:4] + vtrn.32 d6, d7 //d6:[0:3]; d7[1:4] + vtrn.32 d0, d2 //d0:[-2:1]; d2[2:5] + vtrn.32 d4, d5 //d4:[-1:2]; d5[3:6] + + FILTER_6TAG_8BITS d0, d4, d6, d7, d2, d5, d1, q14, q15 + + vmov r4, r5, d1 + str r4, [r2], r3 + str r5, [r2], r3 + + sub r6, #2 + cmp r6, #0 + bne w4_h_mc_luma_loop + + pop {r4, r5, r6} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer10WidthEq16_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w16_xy_10_mc_luma_loop: + vld1.u8 {d0,d1,d2}, [r0], r1 //only use 21(16+5); q0=src[-2] + pld [r0] + pld [r0, #16] + + vext.8 q2, q0, q1, #1 //q2=src[-1] + vext.8 q3, q0, q1, #2 //q3=src[0] + vext.8 q8, q0, q1, #3 //q8=src[1] + vext.8 q9, q0, q1, #4 //q9=src[2] + vext.8 q10, q0, q1, #5 //q10=src[3] + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d0, d4, d6, d16, d18, d20, d2, q14, q15 + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d1, d5, d7, d17, d19, d21, d3, q14, q15 + + sub r4, #1 + vst1.u8 {d2, d3}, [r2], r3 //write 16Byte + + cmp r4, #0 + bne w16_xy_10_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer10WidthEq8_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w8_xy_10_mc_luma_loop: + vld1.u8 {d0,d1}, [r0], r1 //only use 13(8+5); q0=src[-2] + pld [r0] + + vext.8 d2, d0, d1, #1 //d2=src[-1] + vext.8 d3, d0, d1, #2 //d3=src[0] + vext.8 d4, d0, d1, #3 //d4=src[1] + vext.8 d5, d0, d1, #4 //d5=src[2] + vext.8 d6, d0, d1, #5 //d6=src[3] + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d0, d2, d3, d4, d5, d6, d1, q14, q15 + + sub r4, #1 + vst1.u8 {d1}, [r2], r3 + + cmp r4, #0 + bne w8_xy_10_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer10WidthEq4_neon + push {r4, r5, r6} + ldr r6, [sp, #12] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w4_xy_10_mc_luma_loop: + vld1.u8 {d0, d1}, [r0], r1 //only use 9(4+5);d0: 1st row src[-2:5] + pld [r0] + vld1.u8 {d2, d3}, [r0], r1 //d2: 2nd row src[-2:5] + pld [r0] + + vext.8 d4, d0, d1, #1 //d4: 1st row src[-1:6] + vext.8 d5, d2, d3, #1 //d5: 2nd row src[-1:6] + vext.8 q3, q2, q2, #1 //src[0:6 *] + vext.8 q8, q2, q2, #2 //src[1:6 * *] + + vtrn.32 q3, q8 //q3::d6:1st row [0:3]+[1:4]; d7:2nd row [0:3]+[1:4] + vtrn.32 d6, d7 //d6:[0:3]; d7[1:4] + vtrn.32 d0, d2 //d0:[-2:1]; d2[2:5] + vtrn.32 d4, d5 //d4:[-1:2]; d5[3:6] + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d0, d4, d6, d7, d2, d5, d1, q14, q15 + + vmov r4, r5, d1 + str r4, [r2], r3 + str r5, [r2], r3 + + sub r6, #2 + cmp r6, #0 + bne w4_xy_10_mc_luma_loop + + pop {r4, r5, r6} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer30WidthEq16_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w16_xy_30_mc_luma_loop: + vld1.u8 {d0,d1,d2}, [r0], r1 //only use 21(16+5); q0=src[-2] + pld [r0] + pld [r0, #16] + + vext.8 q2, q0, q1, #1 //q2=src[-1] + vext.8 q3, q0, q1, #2 //q3=src[0] + vext.8 q8, q0, q1, #3 //q8=src[1] + vext.8 q9, q0, q1, #4 //q9=src[2] + vext.8 q10, q0, q1, #5 //q10=src[3] + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d0, d4, d6, d16, d18, d20, d2, q14, q15 + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d1, d5, d7, d17, d19, d21, d3, q14, q15 + + sub r4, #1 + vst1.u8 {d2, d3}, [r2], r3 //write 16Byte + + cmp r4, #0 + bne w16_xy_30_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer30WidthEq8_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w8_xy_30_mc_luma_loop: + vld1.u8 {d0,d1}, [r0], r1 //only use 13(8+5); q0=src[-2] + pld [r0] + + vext.8 d2, d0, d1, #1 //d2=src[-1] + vext.8 d3, d0, d1, #2 //d3=src[0] + vext.8 d4, d0, d1, #3 //d4=src[1] + vext.8 d5, d0, d1, #4 //d5=src[2] + vext.8 d6, d0, d1, #5 //d6=src[3] + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d0, d2, d3, d4, d5, d6, d1, q14, q15 + + sub r4, #1 + vst1.u8 {d1}, [r2], r3 + + cmp r4, #0 + bne w8_xy_30_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer30WidthEq4_neon + push {r4, r5, r6} + ldr r6, [sp, #12] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w4_xy_30_mc_luma_loop: + vld1.u8 {d0, d1}, [r0], r1 //only use 9(4+5);d0: 1st row src[-2:5] + pld [r0] + vld1.u8 {d2, d3}, [r0], r1 //d2: 2nd row src[-2:5] + pld [r0] + + vext.8 d4, d0, d1, #1 //d4: 1st row src[-1:6] + vext.8 d5, d2, d3, #1 //d5: 2nd row src[-1:6] + vext.8 q3, q2, q2, #1 //src[0:6 *] + vext.8 q8, q2, q2, #2 //src[1:6 * *] + + vtrn.32 q3, q8 //q3::d6:1st row [0:3]+[1:4]; d7:2nd row [0:3]+[1:4] + vtrn.32 d6, d7 //d6:[0:3]; d7[1:4] + vtrn.32 d0, d2 //d0:[-2:1]; d2[2:5] + vtrn.32 d4, d5 //d4:[-1:2]; d5[3:6] + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d0, d4, d6, d7, d2, d5, d1, q14, q15 + + vmov r4, r5, d1 + str r4, [r2], r3 + str r5, [r2], r3 + + sub r6, #2 + cmp r6, #0 + bne w4_xy_30_mc_luma_loop + + pop {r4, r5, r6} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer01WidthEq16_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //q0=src[-2] + vld1.u8 {q1}, [r0], r1 //q1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {q2}, [r0], r1 //q2=src[0] + vld1.u8 {q3}, [r0], r1 //q3=src[1] + vld1.u8 {q8}, [r0], r1 //q8=src[2] + +w16_xy_01_luma_loop: + + vld1.u8 {q9}, [r0], r1 //q9=src[3] + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 2nd row + vst1.u8 {q10}, [r2], r3 //write 1st 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d3, d5, d7, d17, d19, d1, d21, q14, q15 + vld1.u8 {q1}, [r0], r1 //read 3rd row + vst1.u8 {q10}, [r2], r3 //write 2nd 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d4, d6, d16, d18, d0, d2, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d5, d7, d17, d19, d1, d3, d21, q14, q15 + vld1.u8 {q2}, [r0], r1 //read 4th row + vst1.u8 {q10}, [r2], r3 //write 3rd 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d6, d16, d18, d0, d2, d4, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d7, d17, d19, d1, d3, d5, d21, q14, q15 + vld1.u8 {q3}, [r0], r1 //read 5th row + vst1.u8 {q10}, [r2], r3 //write 4th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d16, d18, d0, d2, d4, d6, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d17, d19, d1, d3, d5, d7, d21, q14, q15 + vld1.u8 {q8}, [r0], r1 //read 6th row + vst1.u8 {q10}, [r2], r3 //write 5th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d18, d0, d2, d4, d6, d16, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d19, d1, d3, d5, d7, d17, d21, q14, q15 + vld1.u8 {q9}, [r0], r1 //read 7th row + vst1.u8 {q10}, [r2], r3 //write 6th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 8th row + vst1.u8 {q10}, [r2], r3 //write 7th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d3, d5, d7, d17, d19, d1, d21, q14, q15 + vst1.u8 {q10}, [r2], r3 //write 8th 16Byte + + //q2, q3, q4, q5, q0 --> q0~q4 + vswp q0, q8 + vswp q0, q2 + vmov q1, q3 + vmov q3, q9 //q0~q4 + + sub r4, #8 + cmp r4, #0 + bne w16_xy_01_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer01WidthEq8_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {d0}, [r0], r1 //d0=src[-2] + vld1.u8 {d1}, [r0], r1 //d1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {d2}, [r0], r1 //d2=src[0] + vld1.u8 {d3}, [r0], r1 //d3=src[1] + + vld1.u8 {d4}, [r0], r1 //d4=src[2] + vld1.u8 {d5}, [r0], r1 //d5=src[3] + +w8_xy_01_mc_luma_loop: + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d0, d1, d2, d3, d4, d5, d16, q14, q15 + vld1.u8 {d0}, [r0], r1 //read 2nd row + vst1.u8 {d16}, [r2], r3 //write 1st 8Byte + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d1, d2, d3, d4, d5, d0, d16, q14, q15 + vld1.u8 {d1}, [r0], r1 //read 3rd row + vst1.u8 {d16}, [r2], r3 //write 2nd 8Byte + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d2, d3, d4, d5, d0, d1, d16, q14, q15 + vld1.u8 {d2}, [r0], r1 //read 4th row + vst1.u8 {d16}, [r2], r3 //write 3rd 8Byte + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d3, d4, d5, d0, d1, d2, d16, q14, q15 + vld1.u8 {d3}, [r0], r1 //read 5th row + vst1.u8 {d16}, [r2], r3 //write 4th 8Byte + + //d4, d5, d0, d1, d2, d3 --> d0, d1, d2, d3, d4, d5 + vswp q0, q2 + vswp q1, q2 + + sub r4, #4 + cmp r4, #0 + bne w8_xy_01_mc_luma_loop + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer01WidthEq4_neon + push {r4, r5, r6, r7} + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + ldr r4, [r0], r1 //r4=src[-2] + ldr r5, [r0], r1 //r5=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + ldr r6, [r0], r1 //r6=src[0] + ldr r7, [r0], r1 //r7=src[1] + + vmov d0, r4, r5 + vmov d1, r5, r6 + vmov d2, r6, r7 + + ldr r4, [r0], r1 //r4=src[2] + vmov d3, r7, r4 + ldr r7, [sp, #16] + +w4_xy_01_mc_luma_loop: + +// pld [r0] + //using reserving r4 + ldr r5, [r0], r1 //r5=src[3] + ldr r6, [r0], r1 //r6=src[0] + vmov d4, r4, r5 + vmov d5, r5, r6 //reserved r6 + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d0, d1, d2, d3, d4, d5, d16, q14, q15 + vmov r4, r5, d16 + str r4, [r2], r3 //write 1st 4Byte + str r5, [r2], r3 //write 2nd 4Byte + + ldr r5, [r0], r1 //r5=src[1] + ldr r4, [r0], r1 //r4=src[2] + vmov d0, r6, r5 + vmov d1, r5, r4 //reserved r4 + + FILTER_6TAG_8BITS_AVERAGE_WITH_0 d2, d3, d4, d5, d0, d1, d16, q14, q15 + vmov r5, r6, d16 + str r5, [r2], r3 //write 3rd 4Byte + str r6, [r2], r3 //write 4th 4Byte + + //d4, d5, d0, d1 --> d0, d1, d2, d3 + vmov q1, q0 + vmov q0, q2 + + sub r7, #4 + cmp r7, #0 + bne w4_xy_01_mc_luma_loop + + pop {r4, r5, r6, r7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer03WidthEq16_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //q0=src[-2] + vld1.u8 {q1}, [r0], r1 //q1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {q2}, [r0], r1 //q2=src[0] + vld1.u8 {q3}, [r0], r1 //q3=src[1] + vld1.u8 {q8}, [r0], r1 //q8=src[2] + +w16_xy_03_luma_loop: + + vld1.u8 {q9}, [r0], r1 //q9=src[3] + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 2nd row + vst1.u8 {q10}, [r2], r3 //write 1st 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d3, d5, d7, d17, d19, d1, d21, q14, q15 + vld1.u8 {q1}, [r0], r1 //read 3rd row + vst1.u8 {q10}, [r2], r3 //write 2nd 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d4, d6, d16, d18, d0, d2, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d5, d7, d17, d19, d1, d3, d21, q14, q15 + vld1.u8 {q2}, [r0], r1 //read 4th row + vst1.u8 {q10}, [r2], r3 //write 3rd 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d6, d16, d18, d0, d2, d4, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d7, d17, d19, d1, d3, d5, d21, q14, q15 + vld1.u8 {q3}, [r0], r1 //read 5th row + vst1.u8 {q10}, [r2], r3 //write 4th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d16, d18, d0, d2, d4, d6, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d17, d19, d1, d3, d5, d7, d21, q14, q15 + vld1.u8 {q8}, [r0], r1 //read 6th row + vst1.u8 {q10}, [r2], r3 //write 5th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d18, d0, d2, d4, d6, d16, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d19, d1, d3, d5, d7, d17, d21, q14, q15 + vld1.u8 {q9}, [r0], r1 //read 7th row + vst1.u8 {q10}, [r2], r3 //write 6th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 8th row + vst1.u8 {q10}, [r2], r3 //write 7th 16Byte + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d3, d5, d7, d17, d19, d1, d21, q14, q15 + vst1.u8 {q10}, [r2], r3 //write 8th 16Byte + + //q2, q3, q8, q9, q0 --> q0~q8 + vswp q0, q8 + vswp q0, q2 + vmov q1, q3 + vmov q3, q9 //q0~q8 + + sub r4, #8 + cmp r4, #0 + bne w16_xy_03_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer03WidthEq8_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {d0}, [r0], r1 //d0=src[-2] + vld1.u8 {d1}, [r0], r1 //d1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {d2}, [r0], r1 //d2=src[0] + vld1.u8 {d3}, [r0], r1 //d3=src[1] + + vld1.u8 {d4}, [r0], r1 //d4=src[2] + vld1.u8 {d5}, [r0], r1 //d5=src[3] + +w8_xy_03_mc_luma_loop: + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d0, d1, d2, d3, d4, d5, d16, q14, q15 + vld1.u8 {d0}, [r0], r1 //read 2nd row + vst1.u8 {d16}, [r2], r3 //write 1st 8Byte + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d1, d2, d3, d4, d5, d0, d16, q14, q15 + vld1.u8 {d1}, [r0], r1 //read 3rd row + vst1.u8 {d16}, [r2], r3 //write 2nd 8Byte + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d2, d3, d4, d5, d0, d1, d16, q14, q15 + vld1.u8 {d2}, [r0], r1 //read 4th row + vst1.u8 {d16}, [r2], r3 //write 3rd 8Byte + + pld [r0] + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d3, d4, d5, d0, d1, d2, d16, q14, q15 + vld1.u8 {d3}, [r0], r1 //read 5th row + vst1.u8 {d16}, [r2], r3 //write 4th 8Byte + + //d4, d5, d0, d1, d2, d3 --> d0, d1, d2, d3, d4, d5 + vswp q0, q2 + vswp q1, q2 + + sub r4, #4 + cmp r4, #0 + bne w8_xy_03_mc_luma_loop + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer03WidthEq4_neon + push {r4, r5, r6, r7} + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + ldr r4, [r0], r1 //r4=src[-2] + ldr r5, [r0], r1 //r5=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + ldr r6, [r0], r1 //r6=src[0] + ldr r7, [r0], r1 //r7=src[1] + + vmov d0, r4, r5 + vmov d1, r5, r6 + vmov d2, r6, r7 + + ldr r4, [r0], r1 //r4=src[2] + vmov d3, r7, r4 + ldr r7, [sp, #16] + +w4_xy_03_mc_luma_loop: + +// pld [r0] + //using reserving r4 + ldr r5, [r0], r1 //r5=src[3] + ldr r6, [r0], r1 //r6=src[0] + vmov d4, r4, r5 + vmov d5, r5, r6 //reserved r6 + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d0, d1, d2, d3, d4, d5, d16, q14, q15 + vmov r4, r5, d16 + str r4, [r2], r3 //write 1st 4Byte + str r5, [r2], r3 //write 2nd 4Byte + + ldr r5, [r0], r1 //r5=src[1] + ldr r4, [r0], r1 //r4=src[2] + vmov d0, r6, r5 + vmov d1, r5, r4 //reserved r4 + + FILTER_6TAG_8BITS_AVERAGE_WITH_1 d2, d3, d4, d5, d0, d1, d16, q14, q15 + vmov r5, r6, d16 + str r5, [r2], r3 //write 3rd 4Byte + str r6, [r2], r3 //write 4th 4Byte + + //d4, d5, d0, d1 --> d0, d1, d2, d3 + vmov q1, q0 + vmov q0, q2 + + sub r7, #4 + cmp r7, #0 + bne w4_xy_03_mc_luma_loop + + pop {r4, r5, r6, r7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer02WidthEq16_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //q0=src[-2] + vld1.u8 {q1}, [r0], r1 //q1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {q2}, [r0], r1 //q2=src[0] + vld1.u8 {q3}, [r0], r1 //q3=src[1] + vld1.u8 {q8}, [r0], r1 //q8=src[2] + +w16_v_mc_luma_loop: + + vld1.u8 {q9}, [r0], r1 //q9=src[3] + + FILTER_6TAG_8BITS d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 2nd row + vst1.u8 {q10}, [r2], r3 //write 1st 16Byte + + FILTER_6TAG_8BITS d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d3, d5, d7, d17, d19, d1, d21, q14, q15 + vld1.u8 {q1}, [r0], r1 //read 3rd row + vst1.u8 {q10}, [r2], r3 //write 2nd 16Byte + + FILTER_6TAG_8BITS d4, d6, d16, d18, d0, d2, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d5, d7, d17, d19, d1, d3, d21, q14, q15 + vld1.u8 {q2}, [r0], r1 //read 4th row + vst1.u8 {q10}, [r2], r3 //write 3rd 16Byte + + FILTER_6TAG_8BITS d6, d16, d18, d0, d2, d4, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d7, d17, d19, d1, d3, d5, d21, q14, q15 + vld1.u8 {q3}, [r0], r1 //read 5th row + vst1.u8 {q10}, [r2], r3 //write 4th 16Byte + + FILTER_6TAG_8BITS d16, d18, d0, d2, d4, d6, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d17, d19, d1, d3, d5, d7, d21, q14, q15 + vld1.u8 {q8}, [r0], r1 //read 6th row + vst1.u8 {q10}, [r2], r3 //write 5th 16Byte + + FILTER_6TAG_8BITS d18, d0, d2, d4, d6, d16, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d19, d1, d3, d5, d7, d17, d21, q14, q15 + vld1.u8 {q9}, [r0], r1 //read 7th row + vst1.u8 {q10}, [r2], r3 //write 6th 16Byte + + FILTER_6TAG_8BITS d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 8th row + vst1.u8 {q10}, [r2], r3 //write 7th 16Byte + + FILTER_6TAG_8BITS d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d3, d5, d7, d17, d19, d1, d21, q14, q15 + vst1.u8 {q10}, [r2], r3 //write 8th 16Byte + + //q2, q3, q8, q9, q0 --> q0~q8 + vswp q0, q8 + vswp q0, q2 + vmov q1, q3 + vmov q3, q9 //q0~q8 + + sub r4, #8 + cmp r4, #0 + bne w16_v_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer02WidthEq8_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {d0}, [r0], r1 //d0=src[-2] + vld1.u8 {d1}, [r0], r1 //d1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {d2}, [r0], r1 //d2=src[0] + vld1.u8 {d3}, [r0], r1 //d3=src[1] + + vld1.u8 {d4}, [r0], r1 //d4=src[2] + vld1.u8 {d5}, [r0], r1 //d5=src[3] + +w8_v_mc_luma_loop: + + pld [r0] + FILTER_6TAG_8BITS d0, d1, d2, d3, d4, d5, d16, q14, q15 + vld1.u8 {d0}, [r0], r1 //read 2nd row + vst1.u8 {d16}, [r2], r3 //write 1st 8Byte + + pld [r0] + FILTER_6TAG_8BITS d1, d2, d3, d4, d5, d0, d16, q14, q15 + vld1.u8 {d1}, [r0], r1 //read 3rd row + vst1.u8 {d16}, [r2], r3 //write 2nd 8Byte + + pld [r0] + FILTER_6TAG_8BITS d2, d3, d4, d5, d0, d1, d16, q14, q15 + vld1.u8 {d2}, [r0], r1 //read 4th row + vst1.u8 {d16}, [r2], r3 //write 3rd 8Byte + + pld [r0] + FILTER_6TAG_8BITS d3, d4, d5, d0, d1, d2, d16, q14, q15 + vld1.u8 {d3}, [r0], r1 //read 5th row + vst1.u8 {d16}, [r2], r3 //write 4th 8Byte + + //d4, d5, d0, d1, d2, d3 --> d0, d1, d2, d3, d4, d5 + vswp q0, q2 + vswp q1, q2 + + sub r4, #4 + cmp r4, #0 + bne w8_v_mc_luma_loop + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer02WidthEq4_neon + push {r4, r5, r6, r7} + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + ldr r4, [r0], r1 //r4=src[-2] + ldr r5, [r0], r1 //r5=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + ldr r6, [r0], r1 //r6=src[0] + ldr r7, [r0], r1 //r7=src[1] + + vmov d0, r4, r5 + vmov d1, r5, r6 + vmov d2, r6, r7 + + ldr r4, [r0], r1 //r4=src[2] + vmov d3, r7, r4 + ldr r7, [sp, #16] + +w4_v_mc_luma_loop: + +// pld [r0] + //using reserving r4 + ldr r5, [r0], r1 //r5=src[3] + ldr r6, [r0], r1 //r6=src[0] + vmov d4, r4, r5 + vmov d5, r5, r6 //reserved r6 + + FILTER_6TAG_8BITS d0, d1, d2, d3, d4, d5, d16, q14, q15 + vmov r4, r5, d16 + str r4, [r2], r3 //write 1st 4Byte + str r5, [r2], r3 //write 2nd 4Byte + + ldr r5, [r0], r1 //r5=src[1] + ldr r4, [r0], r1 //r4=src[2] + vmov d0, r6, r5 + vmov d1, r5, r4 //reserved r4 + + FILTER_6TAG_8BITS d2, d3, d4, d5, d0, d1, d16, q14, q15 + vmov r5, r6, d16 + str r5, [r2], r3 //write 3rd 4Byte + str r6, [r2], r3 //write 4th 4Byte + + //d4, d5, d0, d1 --> d0, d1, d2, d3 + vmov q1, q0 + vmov q0, q2 + + sub r7, #4 + cmp r7, #0 + bne w4_v_mc_luma_loop + + pop {r4, r5, r6, r7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer22WidthEq16_neon + push {r4} + vpush {q4-q7} + ldr r4, [sp, #68] + + sub r0, #2 //src[-2] + sub r0, r0, r1, lsl #1 //src[-2*src_stride-2] + pld [r0] + pld [r0, r1] + + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {d0-d2}, [r0], r1 //use 21(16+5), =src[-2] + vld1.u8 {d3-d5}, [r0], r1 //use 21(16+5), =src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + + vld1.u8 {d6-d8}, [r0], r1 //use 21(16+5), =src[0] + vld1.u8 {d9-d11}, [r0], r1 //use 21(16+5), =src[1] + pld [r0] + pld [r0, r1] + vld1.u8 {d12-d14}, [r0], r1 //use 21(16+5), =src[2] + +w16_hv_mc_luma_loop: + + vld1.u8 {d15-d17}, [r0], r1 //use 21(16+5), =src[3] + //the 1st row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d3, d6, d9, d12, d15, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d1, d4, d7,d10, d13, d16,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d0 //output to q0[0] + + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d2, d5, d8,d11, d14, d17,q11, q14, q15 // only 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d1 //output to q0[1] + vst1.u8 {q0}, [r2], r3 //write 16Byte + + + vld1.u8 {d0-d2}, [r0], r1 //read 2nd row + //the 2nd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d3, d6, d9, d12, d15, d0, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d4, d7,d10, d13, d16, d1,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d3 //output to d3 + + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d5, d8,d11, d14, d17, d2,q11, q14, q15 // only 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d4 //output to d4 + + vst1.u8 {d3, d4}, [r2], r3 //write 16Byte + + vld1.u8 {d3-d5}, [r0], r1 //read 3rd row + //the 3rd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d6, d9, d12, d15, d0, d3, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d7,d10, d13, d16, d1, d4,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d6 //output to d6 + + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d8,d11, d14, d17, d2, d5,q11, q14, q15 // only 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d7 //output to d7 + vst1.u8 {d6, d7}, [r2], r3 //write 16Byte + + vld1.u8 {d6-d8}, [r0], r1 //read 4th row + //the 4th row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d9, d12, d15, d0, d3, d6, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d10, d13, d16, d1, d4, d7,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d9 //output to d9 + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d11, d14, d17, d2, d5, d8,q11, q14, q15 // only 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d10 //output to d10 + vst1.u8 {d9, d10}, [r2], r3 //write 16Byte + + //d12~d17(q6~q8), d0~d8(q0~q3+d8), --> d0~d14 + vswp q0, q6 + vswp q6, q3 + vmov q5, q2 + vmov q2, q8 + + vmov d20,d8 + vmov q4, q1 + vmov q1, q7 + vmov d14,d20 + + sub r4, #4 + cmp r4, #0 + bne w16_hv_mc_luma_loop + vpop {q4-q7} + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer22WidthEq8_neon + push {r4} + vpush {q4} + ldr r4, [sp, #20] + + sub r0, #2 //src[-2] + sub r0, r0, r1, lsl #1 //src[-2*src_stride-2] + pld [r0] + pld [r0, r1] + + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //use 13(8+5), =src[-2] + vld1.u8 {q1}, [r0], r1 //use 13(8+5), =src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + + vld1.u8 {q2}, [r0], r1 //use 13(8+5), =src[0] + vld1.u8 {q3}, [r0], r1 //use 13(8+5), =src[1] + pld [r0] + pld [r0, r1] + vld1.u8 {q4}, [r0], r1 //use 13(8+5), =src[2] + +w8_hv_mc_luma_loop: + + vld1.u8 {q8}, [r0], r1 //use 13(8+5), =src[3] + //the 1st row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d2, d4, d6, d8, d16, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d1, d3, d5, d7, d9, d17, q10, q14, q15 // 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2], r3 //write 8Byte + + vld1.u8 {q0}, [r0], r1 //read 2nd row + //the 2nd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d2, d4, d6, d8, d16, d0, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d3, d5, d7, d9, d17, d1, q10, q14, q15 // 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2], r3 //write 8Byte + + vld1.u8 {q1}, [r0], r1 //read 3rd row + //the 3rd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d4, d6, d8, d16, d0, d2, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d5, d7, d9, d17, d1, d3, q10, q14, q15 // 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2], r3 //write 8Byte + + vld1.u8 {q2}, [r0], r1 //read 4th row + //the 4th row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d6, d8, d16, d0, d2, d4, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d7, d9, d17, d1, d3, d5, q10, q14, q15 // 5 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2], r3 //write 8Byte + + //q4~q5, q0~q2, --> q0~q4 + vswp q0, q4 + vswp q2, q4 + vmov q3, q1 + vmov q1, q8 + + sub r4, #4 + cmp r4, #0 + bne w8_hv_mc_luma_loop + vpop {q4} + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer22WidthEq4_neon + push {r4 ,r5, r6} + vpush {q4-q7} + ldr r6, [sp, #76] + + sub r0, #2 //src[-2] + sub r0, r0, r1, lsl #1 //src[-2*src_stride-2] + pld [r0] + pld [r0, r1] + + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //use 9(4+5), =src[-2] + vld1.u8 {q1}, [r0], r1 //use 9(4+5), =src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + + vld1.u8 {q2}, [r0], r1 //use 9(4+5), =src[0] + vld1.u8 {q3}, [r0], r1 //use 9(4+5), =src[1] + pld [r0] + pld [r0, r1] + vld1.u8 {q4}, [r0], r1 //use 9(4+5), =src[2] + +w4_hv_mc_luma_loop: + + vld1.u8 {q5}, [r0], r1 //use 9(4+5), =src[3] + vld1.u8 {q6}, [r0], r1 //use 9(4+5), =src[4] + + //the 1st&2nd row + pld [r0] + pld [r0, r1] + // vertical filtered + FILTER_6TAG_8BITS_TO_16BITS d0, d2, d4, d6, d8, d10, q7, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d1, d3, d5, d7, d9, d11, q8, q14, q15 // 1 avail + + FILTER_6TAG_8BITS_TO_16BITS d2, d4, d6, d8,d10, d12, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d3, d5, d7, d9,d11, d13,q10, q14, q15 // 1 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q7, q8, q11, q12, q13 //4 avail + UNPACK_2_16BITS_TO_ABC q9,q10, q0, q7, q8 //4 avail + + vmov d23, d0 + vmov d25, d14 + vmov d27, d16 + + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d22 //output to q11[0] + vmov r4 ,r5, d22 + str r4, [r2], r3 //write 4Byte + str r5, [r2], r3 //write 4Byte + + //the 3rd&4th row + vld1.u8 {q0}, [r0], r1 //use 9(4+5), =src[3] + vld1.u8 {q1}, [r0], r1 //use 9(4+5), =src[4] + pld [r0] + pld [r0, r1] + // vertical filtered + FILTER_6TAG_8BITS_TO_16BITS d4, d6, d8, d10, d12, d0, q7, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d5, d7, d9, d11, d13, d1, q8, q14, q15 // 1 avail + + FILTER_6TAG_8BITS_TO_16BITS d6, d8,d10, d12, d0, d2, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d7, d9,d11, d13, d1, d3,q10, q14, q15 // 1 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q7, q8, q11, q12, q13 //4 avail + UNPACK_2_16BITS_TO_ABC q9,q10, q2, q7, q8 //4 avail + + vmov d23, d4 + vmov d25, d14 + vmov d27, d16 + + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d22 //output to q11[0] + vmov r4 ,r5, d22 + str r4, [r2], r3 //write 4Byte + str r5, [r2], r3 //write 4Byte + + //q4~q6, q0~q1, --> q0~q4 + vswp q4, q0 + vmov q3, q4 + vmov q4, q1 + vmov q1, q5 + vmov q2, q6 + + sub r6, #4 + cmp r6, #0 + bne w4_hv_mc_luma_loop + + vpop {q4-q7} + pop {r4, r5, r6} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McCopyWidthEq16_neon + push {r4} + ldr r4, [sp, #4] +w16_copy_loop: + vld1.u8 {q0}, [r0], r1 + sub r4, #2 + vld1.u8 {q1}, [r0], r1 + vst1.u8 {q0}, [r2], r3 + cmp r4, #0 + vst1.u8 {q1}, [r2], r3 + bne w16_copy_loop + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McCopyWidthEq8_neon + push {r4} + ldr r4, [sp, #4] +w8_copy_loop: + vld1.u8 {d0}, [r0], r1 + vld1.u8 {d1}, [r0], r1 + vst1.u8 {d0}, [r2], r3 + vst1.u8 {d1}, [r2], r3 + sub r4, #2 + cmp r4, #0 + bne w8_copy_loop + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McCopyWidthEq4_neon + push {r4, r5, r6} + ldr r4, [sp, #12] +w4_copy_loop: + ldr r5, [r0], r1 + ldr r6, [r0], r1 + str r5, [r2], r3 + str r6, [r2], r3 + + sub r4, #2 + cmp r4, #0 + bne w4_copy_loop + + pop {r4, r5, r6} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN PixelAvgWidthEq16_neon + push {r4} + ldr r4, [sp, #4] +w16_pix_avg_loop: + vld1.u8 {q0}, [r2]! + vld1.u8 {q1}, [r3]! + vld1.u8 {q2}, [r2]! + vld1.u8 {q3}, [r3]! + + vld1.u8 {q8}, [r2]! + vld1.u8 {q9}, [r3]! + vld1.u8 {q10}, [r2]! + vld1.u8 {q11}, [r3]! + + AVERAGE_TWO_8BITS d0, d0, d2 + AVERAGE_TWO_8BITS d1, d1, d3 + vst1.u8 {q0}, [r0], r1 + + AVERAGE_TWO_8BITS d4, d4, d6 + AVERAGE_TWO_8BITS d5, d5, d7 + vst1.u8 {q2}, [r0], r1 + + AVERAGE_TWO_8BITS d16, d16, d18 + AVERAGE_TWO_8BITS d17, d17, d19 + vst1.u8 {q8}, [r0], r1 + + AVERAGE_TWO_8BITS d20, d20, d22 + AVERAGE_TWO_8BITS d21, d21, d23 + vst1.u8 {q10}, [r0], r1 + + sub r4, #4 + cmp r4, #0 + bne w16_pix_avg_loop + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN PixelAvgWidthEq8_neon + push {r4, r5} + ldr r4, [sp, #8] + mov r5, #16 +w8_pix_avg_loop: + + vld1.u8 {d0}, [r2], r5 + vld1.u8 {d2}, [r3], r5 + vld1.u8 {d1}, [r2], r5 + vld1.u8 {d3}, [r3], r5 + + AVERAGE_TWO_8BITS d0, d0, d2 + AVERAGE_TWO_8BITS d1, d1, d3 + vst1.u8 {d0}, [r0], r1 + vst1.u8 {d1}, [r0], r1 + + vld1.u8 {d4}, [r2], r5 + vld1.u8 {d6}, [r3], r5 + vld1.u8 {d5}, [r2], r5 + vld1.u8 {d7}, [r3], r5 + + AVERAGE_TWO_8BITS d4, d4, d6 + AVERAGE_TWO_8BITS d5, d5, d7 + vst1.u8 {d4}, [r0], r1 + vst1.u8 {d5}, [r0], r1 + + sub r4, #4 + cmp r4, #0 + bne w8_pix_avg_loop + + pop {r4, r5} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN PixelAvgWidthEq4_neon + push {r4-r8} + ldr r4, [sp, #20] +w4_pix_avg_loop: + + ldr r5, [r2] + ldr r6, [r2, #16] + ldr r7, [r3] + ldr r8, [r3, #16] + add r2, #32 + add r3, #32 + + vmov d0, r5, r6 + vmov d1, r7, r8 + AVERAGE_TWO_8BITS d0, d0, d1 + vmov r5, r6, d0 + + str r5, [r0], r1 + str r6, [r0], r1 + + sub r4, #2 + cmp r4, #0 + bne w4_pix_avg_loop + + pop {r4-r8} +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN McChromaWidthEq8_neon + push {r4, r5} + ldr r4, [sp, #8] + ldr r5, [sp, #12] +// normal case: {cA*src[x] + cB*src[x+1]} + {cC*src[x+stride] + cD*srcp[x+stride+1]} +// we can opti it by adding vert only/ hori only cases, to be continue + vld1.u8 {d31}, [r4] //load A/B/C/D + vld1.u8 {q0}, [r0], r1 //src[x] + + vdup.u8 d28, d31[0] //A + vdup.u8 d29, d31[1] //B + vdup.u8 d30, d31[2] //C + vdup.u8 d31, d31[3] //D + + vext.u8 d1, d0, d1, #1 //src[x+1] + +w8_mc_chroma_loop: // each two pxl row + vld1.u8 {q1}, [r0], r1 //src[x+stride] + vld1.u8 {q2}, [r0], r1 //src[x+2*stride] + vext.u8 d3, d2, d3, #1 //src[x+stride+1] + vext.u8 d5, d4, d5, #1 //src[x+2*stride+1] + + vmull.u8 q3, d0, d28 //(src[x] * A) + vmlal.u8 q3, d1, d29 //+=(src[x+1] * B) + vmlal.u8 q3, d2, d30 //+=(src[x+stride] * C) + vmlal.u8 q3, d3, d31 //+=(src[x+stride+1] * D) + vrshrn.u16 d6, q3, #6 + vst1.u8 d6, [r2], r3 + + vmull.u8 q3, d2, d28 //(src[x] * A) + vmlal.u8 q3, d3, d29 //+=(src[x+1] * B) + vmlal.u8 q3, d4, d30 //+=(src[x+stride] * C) + vmlal.u8 q3, d5, d31 //+=(src[x+stride+1] * D) + vrshrn.u16 d6, q3, #6 + vst1.u8 d6, [r2], r3 + + vmov q0, q2 + sub r5, #2 + cmp r5, #0 + bne w8_mc_chroma_loop + + pop {r4, r5} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McChromaWidthEq4_neon + + push {r4, r5, r6} + ldr r4, [sp, #12] + ldr r6, [sp, #16] +// normal case: {cA*src[x] + cB*src[x+1]} + {cC*src[x+stride] + cD*srcp[x+stride+1]} +// we can opti it by adding vert only/ hori only cases, to be continue + vld1.u8 {d31}, [r4] //load A/B/C/D + + vdup.u8 d28, d31[0] //A + vdup.u8 d29, d31[1] //B + vdup.u8 d30, d31[2] //C + vdup.u8 d31, d31[3] //D + +w4_mc_chroma_loop: // each two pxl row + vld1.u8 {d0}, [r0], r1 //a::src[x] + vld1.u8 {d2}, [r0], r1 //b::src[x+stride] + vld1.u8 {d4}, [r0] //c::src[x+2*stride] + + vshr.u64 d1, d0, #8 + vshr.u64 d3, d2, #8 + vshr.u64 d5, d4, #8 + + vmov q3, q1 //b::[0:7]+b::[1~8] + vtrn.32 q0, q1 //d0{a::[0:3]+b::[0:3]}; d1{a::[1:4]+b::[1:4]} + vtrn.32 q3, q2 //d6{b::[0:3]+c::[0:3]}; d7{b::[1:4]+c::[1:4]} + + vmull.u8 q1, d0, d28 //(src[x] * A) + vmlal.u8 q1, d1, d29 //+=(src[x+1] * B) + vmlal.u8 q1, d6, d30 //+=(src[x+stride] * C) + vmlal.u8 q1, d7, d31 //+=(src[x+stride+1] * D) + + vrshrn.u16 d2, q1, #6 + vmov r4, r5, d2 + str r4, [r2], r3 + str r5, [r2], r3 + + sub r6, #2 + cmp r6, #0 + bne w4_mc_chroma_loop + + pop {r4, r5, r6} +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN McHorVer20Width17_neon + push {r4-r5} + mov r4, #20 + mov r5, #1 + sub r4, r4, r4, lsl #(16-2) + lsl r5, #16 + ror r4, #16 + vmov d3, r5, r4 // 0x0014FFFB00010000 + + sub r3, #16 + ldr r4, [sp, #8] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w17_h_mc_luma_loop: + vld1.u8 {d0,d1,d2}, [r0], r1 //only use 22(17+5); q0=src[-2] + + vext.8 q2, q0, q1, #1 //q2=src[-1] + vext.8 q3, q0, q1, #2 //q3=src[0] + vext.8 q8, q0, q1, #3 //q8=src[1] + vext.8 q9, q0, q1, #4 //q9=src[2] + vext.8 q10, q0, q1, #5 //q10=src[3] + + FILTER_6TAG_8BITS d0, d4, d6, d16, d18, d20, d22, q14, q15 + + FILTER_6TAG_8BITS d1, d5, d7, d17, d19, d21, d23, q14, q15 + + vst1.u8 {d22, d23}, [r2]! //write [0:15] Byte + + vsli.64 d2, d2, #8 // [0][1][2][3][4][5]XO-->O[0][1][2][3][4][5]X + FILTER_SINGLE_TAG_8BITS d2, d3, d22, q11, q1 + + vst1.u8 {d2[0]}, [r2], r3 //write 16th Byte + + sub r4, #1 + cmp r4, #0 + bne w17_h_mc_luma_loop + pop {r4-r5} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer20Width9_neon + push {r4-r5} + mov r4, #20 + mov r5, #1 + sub r4, r4, r4, lsl #(16-2) + lsl r5, #16 + ror r4, #16 + vmov d7, r5, r4 // 0x0014FFFB00010000 + + sub r3, #8 + ldr r4, [sp, #8] + + sub r0, #2 + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w9_h_mc_luma_loop: + vld1.u8 {d0,d1}, [r0], r1 //only use 14(9+5); q0=src[-2] + pld [r0] + + vext.8 d2, d0, d1, #1 //d2=src[-1] + vext.8 d3, d0, d1, #2 //d3=src[0] + vext.8 d4, d0, d1, #3 //d4=src[1] + vext.8 d5, d0, d1, #4 //d5=src[2] + vext.8 d6, d0, d1, #5 //d6=src[3] + + FILTER_6TAG_8BITS d0, d2, d3, d4, d5, d6, d16, q14, q15 + + sub r4, #1 + vst1.u8 {d16}, [r2]! //write [0:7] Byte + + vsli.64 d2, d1, #8 // [0][1][2][3][4][5]XO-->O[0][1][2][3][4][5]X + FILTER_SINGLE_TAG_8BITS d2, d7, d18, q9, q1 + vst1.u8 {d2[0]}, [r2], r3 //write 8th Byte + + cmp r4, #0 + bne w9_h_mc_luma_loop + pop {r4-r5} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer20Width5_neon + push {r4} + sub r3, #4 + sub r0, #2 + ldr r4, [sp, #4] + vmov.u16 q14, #0x0014 // 20 + vshr.u16 q15, q14, #2 // 5 + +w5_h_mc_luma_loop: + vld1.u8 {d0,d1}, [r0], r1 //only use 10(5+5); q0=src[-2] + pld [r0] + + vext.8 d2, d0, d1, #1 //d2=src[-1] + vext.8 d3, d0, d1, #2 //d3=src[0] + vext.8 d4, d0, d1, #3 //d4=src[1] + vext.8 d5, d0, d1, #4 //d5=src[2] + vext.8 d6, d0, d1, #5 //d6=src[3] + + FILTER_6TAG_8BITS d0, d2, d3, d4, d5, d6, d16, q14, q15 + + sub r4, #1 + vst1.u32 {d16[0]}, [r2]! //write [0:3] Byte + vst1.u8 {d16[4]}, [r2], r3 //write 5th Byte + + cmp r4, #0 + bne w5_h_mc_luma_loop + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer02Height17_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //q0=src[-2] + vld1.u8 {q1}, [r0], r1 //q1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {q2}, [r0], r1 //q2=src[0] + vld1.u8 {q3}, [r0], r1 //q3=src[1] + vld1.u8 {q8}, [r0], r1 //q8=src[2] + +w17_v_mc_luma_loop: + + vld1.u8 {q9}, [r0], r1 //q9=src[3] + + FILTER_6TAG_8BITS d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 2nd row + vst1.u8 {q10}, [r2], r3 //write 1st 16Byte + + FILTER_6TAG_8BITS d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d3, d5, d7, d17, d19, d1, d21, q14, q15 + vld1.u8 {q1}, [r0], r1 //read 3rd row + vst1.u8 {q10}, [r2], r3 //write 2nd 16Byte + + FILTER_6TAG_8BITS d4, d6, d16, d18, d0, d2, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d5, d7, d17, d19, d1, d3, d21, q14, q15 + vld1.u8 {q2}, [r0], r1 //read 4th row + vst1.u8 {q10}, [r2], r3 //write 3rd 16Byte + + FILTER_6TAG_8BITS d6, d16, d18, d0, d2, d4, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d7, d17, d19, d1, d3, d5, d21, q14, q15 + vld1.u8 {q3}, [r0], r1 //read 5th row + vst1.u8 {q10}, [r2], r3 //write 4th 16Byte + + FILTER_6TAG_8BITS d16, d18, d0, d2, d4, d6, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d17, d19, d1, d3, d5, d7, d21, q14, q15 + vld1.u8 {q8}, [r0], r1 //read 6th row + vst1.u8 {q10}, [r2], r3 //write 5th 16Byte + + FILTER_6TAG_8BITS d18, d0, d2, d4, d6, d16, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d19, d1, d3, d5, d7, d17, d21, q14, q15 + vld1.u8 {q9}, [r0], r1 //read 7th row + vst1.u8 {q10}, [r2], r3 //write 6th 16Byte + + FILTER_6TAG_8BITS d0, d2, d4, d6, d16, d18, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d1, d3, d5, d7, d17, d19, d21, q14, q15 + vld1.u8 {q0}, [r0], r1 //read 8th row + vst1.u8 {q10}, [r2], r3 //write 7th 16Byte + + FILTER_6TAG_8BITS d2, d4, d6, d16, d18, d0, d20, q14, q15 + pld [r0] + FILTER_6TAG_8BITS d3, d5, d7, d17, d19, d1, d21, q14, q15 + vst1.u8 {q10}, [r2], r3 //write 8th 16Byte + + //q2, q3, q8, q9, q0 --> q0~q8 + vswp q0, q8 + vswp q0, q2 + vmov q1, q3 + vmov q3, q9 //q0~q8 + + sub r4, #8 + cmp r4, #1 + bne w17_v_mc_luma_loop + // the last 16Bytes + vld1.u8 {q9}, [r0], r1 //q9=src[3] + FILTER_6TAG_8BITS d0, d2, d4, d6, d16, d18, d20, q14, q15 + FILTER_6TAG_8BITS d1, d3, d5, d7, d17, d19, d21, q14, q15 + vst1.u8 {q10}, [r2], r3 //write 1st 16Byte + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer02Height9_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {d0}, [r0], r1 //d0=src[-2] + vld1.u8 {d1}, [r0], r1 //d1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {d2}, [r0], r1 //d2=src[0] + vld1.u8 {d3}, [r0], r1 //d3=src[1] + + vld1.u8 {d4}, [r0], r1 //d4=src[2] + vld1.u8 {d5}, [r0], r1 //d5=src[3] + +w9_v_mc_luma_loop: + + pld [r0] + FILTER_6TAG_8BITS d0, d1, d2, d3, d4, d5, d16, q14, q15 + vld1.u8 {d0}, [r0], r1 //read 2nd row + vst1.u8 {d16}, [r2], r3 //write 1st 8Byte + + pld [r0] + FILTER_6TAG_8BITS d1, d2, d3, d4, d5, d0, d16, q14, q15 + vld1.u8 {d1}, [r0], r1 //read 3rd row + vst1.u8 {d16}, [r2], r3 //write 2nd 8Byte + + pld [r0] + FILTER_6TAG_8BITS d2, d3, d4, d5, d0, d1, d16, q14, q15 + vld1.u8 {d2}, [r0], r1 //read 4th row + vst1.u8 {d16}, [r2], r3 //write 3rd 8Byte + + pld [r0] + FILTER_6TAG_8BITS d3, d4, d5, d0, d1, d2, d16, q14, q15 + vld1.u8 {d3}, [r0], r1 //read 5th row + vst1.u8 {d16}, [r2], r3 //write 4th 8Byte + + //d4, d5, d0, d1, d2, d3 --> d0, d1, d2, d3, d4, d5 + vswp q0, q2 + vswp q1, q2 + + sub r4, #4 + cmp r4, #1 + bne w9_v_mc_luma_loop + + FILTER_6TAG_8BITS d0, d1, d2, d3, d4, d5, d16, q14, q15 + vst1.u8 {d16}, [r2], r3 //write last 8Byte + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer02Height5_neon + push {r4} + ldr r4, [sp, #4] + + sub r0, r0, r1, lsl #1 //src[-2*src_stride] + pld [r0] + pld [r0, r1] + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {d0}, [r0], r1 //d0=src[-2] + vld1.u8 {d1}, [r0], r1 //d1=src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + vld1.u8 {d2}, [r0], r1 //d2=src[0] + vld1.u8 {d3}, [r0], r1 //d3=src[1] + + vld1.u8 {d4}, [r0], r1 //d4=src[2] + vld1.u8 {d5}, [r0], r1 //d5=src[3] + +w5_v_mc_luma_loop: + + pld [r0] + FILTER_6TAG_8BITS d0, d1, d2, d3, d4, d5, d16, q14, q15 + vld1.u8 {d0}, [r0], r1 //read 2nd row + vst1.u32 {d16[0]}, [r2], r3 //write 1st 4Byte + + pld [r0] + FILTER_6TAG_8BITS d1, d2, d3, d4, d5, d0, d16, q14, q15 + vld1.u8 {d1}, [r0], r1 //read 3rd row + vst1.u32 {d16[0]}, [r2], r3 //write 2nd 4Byte + + pld [r0] + FILTER_6TAG_8BITS d2, d3, d4, d5, d0, d1, d16, q14, q15 + vld1.u8 {d2}, [r0], r1 //read 4th row + vst1.u32 {d16[0]}, [r2], r3 //write 3rd 4Byte + + pld [r0] + FILTER_6TAG_8BITS d3, d4, d5, d0, d1, d2, d16, q14, q15 + vld1.u8 {d3}, [r0], r1 //read 5th row + vst1.u32 {d16[0]}, [r2], r3 //write 4th 8Byte + + //d4, d5, d0, d1, d2, d3 --> d0, d1, d2, d3, d4, d5 + vswp q0, q2 + vswp q1, q2 + + sub r4, #4 + cmp r4, #1 + bne w5_v_mc_luma_loop + + FILTER_6TAG_8BITS d0, d1, d2, d3, d4, d5, d16, q14, q15 + vst1.u32 {d16[0]}, [r2], r3 //write last 4Byte + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer22Width17_neon + push {r4} + vpush {q4-q7} + ldr r4, [sp, #68] + + sub r0, #2 //src[-2] + sub r0, r0, r1, lsl #1 //src[-2*src_stride-2] + pld [r0] + pld [r0, r1] + + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {d0-d2}, [r0], r1 //use 21(17+5), =src[-2] + vld1.u8 {d3-d5}, [r0], r1 //use 21(17+5), =src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + + vld1.u8 {d6-d8}, [r0], r1 //use 21(17+5), =src[0] + vld1.u8 {d9-d11}, [r0], r1 //use 21(17+5), =src[1] + pld [r0] + pld [r0, r1] + vld1.u8 {d12-d14}, [r0], r1 //use 21(17+5), =src[2] + sub r3, #16 + +w17_hv_mc_luma_loop: + + vld1.u8 {d15-d17}, [r0], r1 //use 21(17+5), =src[3] + //the 1st row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d3, d6, d9, d12, d15, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d1, d4, d7,d10, d13, d16,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d0 //output to q0[0] + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d2, d5, d8,d11, d14, d17,q11, q14, q15 // only 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d1 //output to q0[1] + vst1.u8 {d0, d1}, [r2]! //write 16Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d2, d22, d23, q11 //output to d2[0] + vst1.u8 {d2[0]}, [r2], r3 //write 16th Byte + + vld1.u8 {d0-d2}, [r0], r1 //read 2nd row + //the 2nd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d3, d6, d9, d12, d15, d0, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d4, d7,d10, d13, d16, d1,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d3 //output to d3 + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d5, d8,d11, d14, d17, d2,q11, q14, q15 // only 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d4 //output to d4 + vst1.u8 {d3, d4}, [r2]! //write 16Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d5, d22, d23, q11 //output to d5[0] + vst1.u8 {d5[0]}, [r2], r3 //write 16th Byte + + vld1.u8 {d3-d5}, [r0], r1 //read 3rd row + //the 3rd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d6, d9, d12, d15, d0, d3, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d7,d10, d13, d16, d1, d4,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d6 //output to d6 + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d8,d11, d14, d17, d2, d5,q11, q14, q15 // only 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d7 //output to d7 + vst1.u8 {d6, d7}, [r2]! //write 16Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d8, d22, d23, q11 //output to d8[0] + vst1.u8 {d8[0]}, [r2], r3 //write 16th Byte + + vld1.u8 {d6-d8}, [r0], r1 //read 4th row + //the 4th row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d9, d12, d15, d0, d3, d6, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d10, d13, d16, d1, d4, d7,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d9 //output to d9 + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d11, d14, d17, d2, d5, d8,q11, q14, q15 // only 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d10 //output to d10 + vst1.u8 {d9, d10}, [r2]! //write 16Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d11, d22, d23, q11 //output to d11[0] + vst1.u8 {d11[0]}, [r2], r3 //write 16th Byte + + //d12~d17(q6~q8), d0~d8(q0~q3+d8), --> d0~d14 + vswp q0, q6 + vswp q6, q3 + vmov q5, q2 + vmov q2, q8 + + vmov d20,d8 + vmov q4, q1 + vmov q1, q7 + vmov d14,d20 + + sub r4, #4 + cmp r4, #1 + bne w17_hv_mc_luma_loop + //the last row + vld1.u8 {d15-d17}, [r0], r1 //use 21(17+5), =src[3] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d3, d6, d9, d12, d15, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d1, d4, d7,d10, d13, d16,q10, q14, q15 // 8 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d0 //output to q0[0] + // vertical filtered into q10/q11 + FILTER_6TAG_8BITS_TO_16BITS d2, d5, d8,d11, d14, d17,q11, q14, q15 // only 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q10, q11, q9, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q9, q12, q13, d1 //output to q0[1] + vst1.u8 {q0}, [r2]! //write 16Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d2, d22, d23, q11 //output to d2[0] + vst1.u8 {d2[0]}, [r2], r3 //write 16th Byte + + vpop {q4-q7} + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer22Width9_neon + push {r4} + vpush {q4} + ldr r4, [sp, #20] + + sub r0, #2 //src[-2] + sub r0, r0, r1, lsl #1 //src[-2*src_stride-2] + pld [r0] + pld [r0, r1] + + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //use 14(9+5), =src[-2] + vld1.u8 {q1}, [r0], r1 //use 14(9+5), =src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + + vld1.u8 {q2}, [r0], r1 //use 14(9+5), =src[0] + vld1.u8 {q3}, [r0], r1 //use 14(9+5), =src[1] + pld [r0] + pld [r0, r1] + vld1.u8 {q4}, [r0], r1 //use 14(9+5), =src[2] + sub r3, #8 + +w9_hv_mc_luma_loop: + + vld1.u8 {q8}, [r0], r1 //use 14(9+5), =src[3] + //the 1st row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d2, d4, d6, d8, d16, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d1, d3, d5, d7, d9, d17, q10, q14, q15 // 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2]! //write 8Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d19, d20, d21, q10 //output to d19[0] + vst1.u8 {d19[0]}, [r2], r3 //write 8th Byte + + vld1.u8 {q0}, [r0], r1 //read 2nd row + //the 2nd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d2, d4, d6, d8, d16, d0, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d3, d5, d7, d9, d17, d1, q10, q14, q15 // 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2]! //write 8Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d19, d20, d21, q10 //output to d19[0] + vst1.u8 {d19[0]}, [r2], r3 //write 8th Byte + + vld1.u8 {q1}, [r0], r1 //read 3rd row + //the 3rd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d4, d6, d8, d16, d0, d2, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d5, d7, d9, d17, d1, d3, q10, q14, q15 // 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2]! //write 8Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d19, d20, d21, q10 //output to d19[0] + vst1.u8 {d19[0]}, [r2], r3 //write 8th Byte + + vld1.u8 {q2}, [r0], r1 //read 4th row + //the 4th row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d6, d8, d16, d0, d2, d4, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d7, d9, d17, d1, d3, d5, q10, q14, q15 // 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2]! //write 8Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d19, d20, d21, q10 //output to d19[0] + vst1.u8 {d19[0]}, [r2], r3 //write 8th Byte + + //q4~q8, q0~q2, --> q0~q4 + vswp q0, q4 + vswp q2, q4 + vmov q3, q1 + vmov q1, q8 + + sub r4, #4 + cmp r4, #1 + bne w9_hv_mc_luma_loop + //the last row + vld1.u8 {q8}, [r0], r1 //use 14(9+5), =src[3] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d2, d4, d6, d8, d16, q9, q14, q15 // 8 avail + FILTER_6TAG_8BITS_TO_16BITS d1, d3, d5, d7, d9, d17, q10, q14, q15 // 6 avail + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 //output to q9[0] + vst1.u8 d18, [r2]! //write 8Byte + UNPACK_1_IN_8x16BITS_TO_8BITS d19, d20, d21, q10 //output to d19[0] + vst1.u8 {d19[0]}, [r2], r3 //write 8th Byte + vpop {q4} + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN McHorVer22Width5_neon + push {r4} + vpush {q4} + ldr r4, [sp, #20] + + sub r0, #2 //src[-2] + sub r0, r0, r1, lsl #1 //src[-2*src_stride-2] + pld [r0] + pld [r0, r1] + + vmov.u16 q14, #0x0014 // 20 + vld1.u8 {q0}, [r0], r1 //use 10(5+5), =src[-2] + vld1.u8 {q1}, [r0], r1 //use 10(5+5), =src[-1] + + pld [r0] + pld [r0, r1] + vshr.u16 q15, q14, #2 // 5 + + vld1.u8 {q2}, [r0], r1 //use 10(5+5), =src[0] + vld1.u8 {q3}, [r0], r1 //use 10(5+5), =src[1] + pld [r0] + pld [r0, r1] + vld1.u8 {q4}, [r0], r1 //use 10(5+5), =src[2] + sub r3, #4 + +w5_hv_mc_luma_loop: + + vld1.u8 {q8}, [r0], r1 //use 10(5+5), =src[3] + //the 1st row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d2, d4, d6, d8, d16, q9, q14, q15 + FILTER_6TAG_8BITS_TO_16BITS d1, d3, d5, d7, d9, d17, q10, q14, q15 + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 + vst1.u32 {d18[0]}, [r2]! //write 4Byte + vst1.u8 {d18[4]}, [r2], r3 //write 5th Byte + + vld1.u8 {q0}, [r0], r1 //read 2nd row + //the 2nd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d2, d4, d6, d8, d16, d0, q9, q14, q15 + FILTER_6TAG_8BITS_TO_16BITS d3, d5, d7, d9, d17, d1, q10, q14, q15 + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 + vst1.u32 {d18[0]}, [r2]! //write 4Byte + vst1.u8 {d18[4]}, [r2], r3 //write 5th Byte + + vld1.u8 {q1}, [r0], r1 //read 3rd row + //the 3rd row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d4, d6, d8, d16, d0, d2, q9, q14, q15 + FILTER_6TAG_8BITS_TO_16BITS d5, d7, d9, d17, d1, d3, q10, q14, q15 + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 + vst1.u32 {d18[0]}, [r2]! //write 4Byte + vst1.u8 {d18[4]}, [r2], r3 //write 5th Byte + + vld1.u8 {q2}, [r0], r1 //read 4th row + //the 4th row + pld [r0] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d6, d8, d16, d0, d2, d4, q9, q14, q15 + FILTER_6TAG_8BITS_TO_16BITS d7, d9, d17, d1, d3, d5, q10, q14, q15 + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 + vst1.u32 {d18[0]}, [r2]! //write 4Byte + vst1.u8 {d18[4]}, [r2], r3 //write 5th Byte + + //q4~q8, q0~q2, --> q0~q4 + vswp q0, q4 + vswp q2, q4 + vmov q3, q1 + vmov q1, q8 + + sub r4, #4 + cmp r4, #1 + bne w5_hv_mc_luma_loop + //the last row + vld1.u8 {q8}, [r0], r1 //use 10(5+5), =src[3] + // vertical filtered into q9/q10 + FILTER_6TAG_8BITS_TO_16BITS d0, d2, d4, d6, d8, d16, q9, q14, q15 + FILTER_6TAG_8BITS_TO_16BITS d1, d3, d5, d7, d9, d17, q10, q14, q15 + // horizon filtered + UNPACK_2_16BITS_TO_ABC q9, q10, q11, q12, q13 + FILTER_3_IN_16BITS_TO_8BITS q11, q12, q13, d18 + vst1.u32 {d18[0]}, [r2]! //write 4Byte + vst1.u8 {d18[4]}, [r2], r3 //write 5th Byte + vpop {q4} + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN PixStrideAvgWidthEq16_neon + push {r4, r5, r6} + ldr r4, [sp, #12] + ldr r5, [sp, #16] + ldr r6, [sp, #20] + +enc_w16_pix_avg_loop: + vld1.u8 {q0}, [r2], r3 + vld1.u8 {q1}, [r4], r5 + vld1.u8 {q2}, [r2], r3 + vld1.u8 {q3}, [r4], r5 + + vld1.u8 {q8}, [r2], r3 + vld1.u8 {q9}, [r4], r5 + vld1.u8 {q10}, [r2], r3 + vld1.u8 {q11}, [r4], r5 + + AVERAGE_TWO_8BITS d0, d0, d2 + AVERAGE_TWO_8BITS d1, d1, d3 + vst1.u8 {q0}, [r0], r1 + + AVERAGE_TWO_8BITS d4, d4, d6 + AVERAGE_TWO_8BITS d5, d5, d7 + vst1.u8 {q2}, [r0], r1 + + AVERAGE_TWO_8BITS d16, d16, d18 + AVERAGE_TWO_8BITS d17, d17, d19 + vst1.u8 {q8}, [r0], r1 + + AVERAGE_TWO_8BITS d20, d20, d22 + AVERAGE_TWO_8BITS d21, d21, d23 + vst1.u8 {q10}, [r0], r1 + + sub r6, #4 + cmp r6, #0 + bne enc_w16_pix_avg_loop + + pop {r4, r5, r6} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN PixStrideAvgWidthEq8_neon + push {r4, r5, r6} + ldr r4, [sp, #12] + ldr r5, [sp, #16] + ldr r6, [sp, #20] +enc_w8_pix_avg_loop: + + vld1.u8 {d0}, [r2], r3 + vld1.u8 {d2}, [r4], r5 + vld1.u8 {d1}, [r2], r3 + vld1.u8 {d3}, [r4], r5 + + AVERAGE_TWO_8BITS d0, d0, d2 + AVERAGE_TWO_8BITS d1, d1, d3 + vst1.u8 {d0}, [r0], r1 + vst1.u8 {d1}, [r0], r1 + + vld1.u8 {d4}, [r2], r3 + vld1.u8 {d6}, [r4], r5 + vld1.u8 {d5}, [r2], r3 + vld1.u8 {d7}, [r4], r5 + + AVERAGE_TWO_8BITS d4, d4, d6 + AVERAGE_TWO_8BITS d5, d5, d7 + vst1.u8 {d4}, [r0], r1 + vst1.u8 {d5}, [r0], r1 + + sub r6, #4 + cmp r6, #0 + bne enc_w8_pix_avg_loop + + pop {r4, r5, r6} +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/arm_arch64_common_macro.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/arm_arch64_common_macro.S new file mode 100644 index 000000000..e6e2d590f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/arm_arch64_common_macro.S @@ -0,0 +1,76 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef __APPLE__ + +.text + +.macro WELS_ASM_AARCH64_FUNC_BEGIN +.align 2 +.globl _$0 +_$0: +.endm + +.macro WELS_ASM_AARCH64_FUNC_END +ret +.endm +#else + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits // Mark stack as non-executable +#endif +.text + +.macro WELS_ASM_AARCH64_FUNC_BEGIN funcName +.align 2 +.global \funcName +#ifdef __ELF__ +.type \funcName, %function +#endif +#ifndef __clang__ +.func \funcName +#endif +\funcName: +.endm + +.macro WELS_ASM_AARCH64_FUNC_END +ret +#ifndef __clang__ +.endfunc +#endif +.endm + +#endif + +.macro SIGN_EXTENSION arg0, arg1 + sxtw \arg0, \arg1 +.endm diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/copy_mb_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/copy_mb_aarch64_neon.S new file mode 100644 index 000000000..4d9f1975a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/copy_mb_aarch64_neon.S @@ -0,0 +1,202 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, src*, src_stride + ld1 {\arg0\().d}[0], [\arg4], \arg5 + ld1 {\arg1\().d}[0], [\arg4], \arg5 + ld1 {\arg2\().d}[0], [\arg4], \arg5 + ld1 {\arg3\().d}[0], [\arg4], \arg5 +// } +.endm + +.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, dst*, dst_stride + st1 {\arg0\().d}[0], [\arg4], \arg5 + st1 {\arg1\().d}[0], [\arg4], \arg5 + st1 {\arg2\().d}[0], [\arg4], \arg5 + st1 {\arg3\().d}[0], [\arg4], \arg5 +// } +.endm + +.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, src*, src_stride + ld1 {\arg0\().8b}, [\arg4], \arg5 + ld1 {\arg1\().8b}, [\arg4], \arg5 + ld1 {\arg2\().8b}, [\arg4], \arg5 + ld1 {\arg3\().8b}, [\arg4], \arg5 +// } +.endm + +.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, dst*, dst_stride + st1 {\arg0\().8b}, [\arg4], \arg5 + st1 {\arg1\().8b}, [\arg4], \arg5 + st1 {\arg2\().8b}, [\arg4], \arg5 + st1 {\arg3\().8b}, [\arg4], \arg5 +// } +.endm + +.macro LOAD16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, src*, src_stride + ld1 {\arg0\().2d}, [\arg4], \arg5 + ld1 {\arg1\().2d}, [\arg4], \arg5 + ld1 {\arg2\().2d}, [\arg4], \arg5 + ld1 {\arg3\().2d}, [\arg4], \arg5 +// } +.endm + +.macro STORE16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, dst*, dst_stride + st1 {\arg0\().2d}, [\arg4], \arg5 + st1 {\arg1\().2d}, [\arg4], \arg5 + st1 {\arg2\().2d}, [\arg4], \arg5 + st1 {\arg3\().2d}, [\arg4], \arg5 +// } +.endm + +.macro LOAD16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, src*, src_stride + ld1 {\arg0\().16b}, [\arg4], \arg5 + ld1 {\arg1\().16b}, [\arg4], \arg5 + ld1 {\arg2\().16b}, [\arg4], \arg5 + ld1 {\arg3\().16b}, [\arg4], \arg5 +// } +.endm + +.macro STORE16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: $0~$3, dst*, dst_stride + st1 {\arg0\().16b}, [\arg4], \arg5 + st1 {\arg1\().16b}, [\arg4], \arg5 + st1 {\arg2\().16b}, [\arg4], \arg5 + st1 {\arg3\().16b}, [\arg4], \arg5 +// } +.endm + +//void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x8_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3 + + STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1 + +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 + + STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 + + LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 + + STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 + +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16NotAligned_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 + + STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 + + LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 + + STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 + +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x8NotAligned_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 + + STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 + +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x16_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3 + + STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 + + STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 + + LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3 + + STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1 + +WELS_ASM_AARCH64_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/deblocking_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/deblocking_aarch64_neon.S new file mode 100644 index 000000000..a62c48844 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/deblocking_aarch64_neon.S @@ -0,0 +1,852 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 + +#include "arm_arch64_common_macro.S" + +.macro MASK_MATRIX arg0, arg1, arg2, arg3, arg4, arg5, arg6 + uabd \arg6\().16b, \arg1\().16b, \arg2\().16b + cmhi \arg6\().16b, \arg4\().16b, \arg6\().16b + + uabd \arg4\().16b, \arg0\().16b, \arg1\().16b + cmhi \arg4\().16b, \arg5\().16b, \arg4\().16b + and \arg6\().16b, \arg6\().16b, \arg4\().16b + + uabd \arg4\().16b, \arg3\().16b, \arg2\().16b + cmhi \arg4\().16b, \arg5\().16b, \arg4\().16b + and \arg6\().16b, \arg6\().16b, \arg4\().16b +.endm + +.macro DIFF_LUMA_LT4_P1_Q1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 + //v0, v1, v2, v3, v17(beta), v18(-Tc0), v6(Tc0), v7(flag), v19, v20 + urhadd \arg8\().16b, \arg2\().16b, \arg3\().16b + uhadd \arg8\().16b, \arg0\().16b, \arg8\().16b + usubl \arg9\().8h, \arg8\().8b, \arg1\().8b + sqxtn \arg9\().8b, \arg9\().8h + usubl2 \arg8\().8h, \arg8\().16b, \arg1\().16b + sqxtn2 \arg9\().16b, \arg8\().8h + smax \arg8\().16b, \arg9\().16b, \arg5\().16b + // + smin \arg8\().16b, \arg8\().16b, \arg6\().16b + uabd \arg9\().16b, \arg0\().16b, \arg2\().16b + cmhi \arg9\().16b, \arg4\().16b, \arg9\().16b + and \arg8\().16b, \arg8\().16b, \arg9\().16b + and \arg8\().16b, \arg8\().16b, \arg7\().16b + add \arg8\().16b, \arg1\().16b, \arg8\().16b + abs \arg9\().16b, \arg9\().16b +.endm + +.macro DIFF_LUMA_LT4_P0_Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6 + usubl \arg5\().8h, \arg0\().8b, \arg3\().8b + usubl \arg6\().8h, \arg2\().8b, \arg1\().8b + shl \arg6\().8h, \arg6\().8h, #2 + add \arg5\().8h, \arg5\().8h, \arg6\().8h + sqrshrn \arg4\().8b, \arg5\().8h, #3 +.endm + +.macro DIFF_LUMA_LT4_P0_Q0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6 + usubl2 \arg5\().8h, \arg0\().16b, \arg3\().16b + usubl2 \arg6\().8h, \arg2\().16b, \arg1\().16b + shl \arg6\().8h, \arg6\().8h, #2 + add \arg5\().8h, \arg5\().8h, \arg6\().8h + sqrshrn2 \arg4\().16b, \arg5\().8h, #3 +.endm + +.macro EXTRACT_DELTA_INTO_TWO_PART arg0, arg1 + cmge \arg1\().16b, \arg0\().16b, #0 + and \arg1\().16b, \arg0\().16b, \arg1\().16b + sub \arg0\().16b, \arg1\().16b, \arg0\().16b +.endm + +.macro DIFF_LUMA_EQ4_P2P1P0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 + uaddl \arg8\().8h, \arg1\().8b, \arg2\().8b + uaddl \arg9\().8h, \arg3\().8b, \arg4\().8b + add \arg9\().8h, \arg9\().8h, \arg8\().8h + + uaddl \arg8\().8h, \arg0\().8b, \arg1\().8b + shl \arg8\().8h, \arg8\().8h, #1 + add \arg8\().8h, \arg9\().8h, \arg8\().8h + + rshrn \arg0\().8b, \arg9\().8h, #2 + rshrn \arg7\().8b, \arg8\().8h, #3 + shl \arg9\().8h, \arg9\().8h, #1 + usubl \arg8\().8h, \arg5\().8b, \arg1\().8b + add \arg9\().8h, \arg8\().8h, \arg9\().8h + + uaddl \arg8\().8h, \arg2\().8b, \arg5\().8b + uaddw \arg8\().8h, \arg8\().8h, \arg2\().8b + uaddw \arg8\().8h, \arg8\().8h, \arg3\().8b + + rshrn \arg9\().8b, \arg9\().8h, #3 + rshrn \arg8\().8b, \arg8\().8h, #2 + bsl \arg6\().8b, \arg9\().8b, \arg8\().8b +.endm + +.macro DIFF_LUMA_EQ4_P2P1P0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 + uaddl2 \arg8\().8h, \arg1\().16b, \arg2\().16b + uaddl2 \arg9\().8h, \arg3\().16b, \arg4\().16b + add \arg9\().8h, \arg9\().8h, \arg8\().8h + + uaddl2 \arg8\().8h, \arg0\().16b, \arg1\().16b + shl \arg8\().8h, \arg8\().8h, #1 + add \arg8\().8h, \arg9\().8h, \arg8\().8h + + rshrn2 \arg0\().16b, \arg9\().8h, #2 + rshrn2 \arg7\().16b, \arg8\().8h, #3 + shl \arg9\().8h, \arg9\().8h, #1 + usubl2 \arg8\().8h, \arg5\().16b, \arg1\().16b + add \arg9\().8h, \arg8\().8h, \arg9\().8h + + uaddl2 \arg8\().8h, \arg2\().16b, \arg5\().16b + uaddw2 \arg8\().8h, \arg8\().8h, \arg2\().16b + uaddw2 \arg8\().8h, \arg8\().8h, \arg3\().16b + + rshrn2 \arg9\().16b, \arg9\().8h, #3 + rshrn2 \arg8\().16b, \arg8\().8h, #2 + bsl \arg6\().16b, \arg9\().16b, \arg8\().16b +.endm + + +.macro DIFF_CHROMA_EQ4_P0Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 + uaddl \arg4\().8h, \arg0\().8b, \arg3\().8b + shl \arg4\().8h, \arg4\().8h, #1 + usubl \arg5\().8h, \arg1\().8b, \arg3\().8b + add \arg5\().8h, \arg5\().8h, \arg4\().8h + rshrn \arg6\().8b, \arg5\().8h, #2 + usubl \arg5\().8h, \arg2\().8b, \arg0\().8b + add \arg5\().8h, \arg5\().8h, \arg4\().8h + rshrn \arg7\().8b, \arg5\().8h, #2 +.endm + +.macro DIFF_CHROMA_EQ4_P0Q0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 + uaddl2 \arg4\().8h, \arg0\().16b, \arg3\().16b + shl \arg4\().8h, \arg4\().8h, #1 + usubl2 \arg5\().8h, \arg1\().16b, \arg3\().16b + add \arg5\().8h, \arg5\().8h, \arg4\().8h + rshrn2 \arg6\().16b, \arg5\().8h, #2 + usubl2 \arg5\().8h, \arg2\().16b, \arg0\().16b + add \arg5\().8h, \arg5\().8h, \arg4\().8h + rshrn2 \arg7\().16b, \arg5\().8h, #2 +.endm + +.macro DIFF_LUMA_EQ4_MASK arg0, arg1, arg2, arg3 + mov \arg3\().16b, \arg2\().16b + bsl \arg3\().16b, \arg0\().16b, \arg1\().16b +.endm + +.macro LOAD_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6 + ld3 {\arg0\().b, \arg1\().b, \arg2\().b} [\arg6], [x2], x1 + ld3 {\arg3\().b, \arg4\().b, \arg5\().b} [\arg6], [x0], x1 +.endm + +.macro LOAD_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 + ld4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg8], [x3], x1 + ld4 {\arg4\().b, \arg5\().b, \arg6\().b, \arg7\().b} [\arg8], [x0], x1 +.endm + +.macro STORE_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5 + st4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg4], [x0], x1 + st4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg5], [x2], x1 +.endm + +.macro STORE_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6 + st3 {\arg0\().b, \arg1\().b, \arg2\().b} [\arg6], [x3], x1 + st3 {\arg3\().b, \arg4\().b, \arg5\().b} [\arg6], [x0], x1 +.endm + +.macro LOAD_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5 + ld4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg5], [\arg4], x2 +.endm + +.macro STORE_CHROMA_DATA_2 arg0, arg1, arg2, arg3 + st2 {\arg0\().b, \arg1\().b} [\arg3], [\arg2], x2 +.endm + +.macro ZERO_JUMP_END arg0, arg1, arg2, arg3 + mov \arg1, \arg0\().d[0] + mov \arg2, \arg0\().d[1] + orr \arg1, \arg1, \arg2 + cbz \arg1, \arg3 +.endm + +.macro BS_NZC_CHECK arg0, arg1, arg2, arg3, arg4 + ld1 {v0.16b}, [\arg0] + //Arrange the input data --- TOP + ands x6, \arg1, #2 + cbz x6, bs_nzc_check_jump0 + sub x6, \arg0, \arg2, lsl #4 + sub x6, x6, \arg2, lsl #3 + add x6, x6, #12 + ld1 {v1.s} [3], [x6] + +bs_nzc_check_jump0: + ext v1.16b, v1.16b, v0.16b, #12 + add \arg3\().16b, v0.16b, v1.16b + + // Arrange the input data --- LEFT + ands x6, \arg1, #1 + cbz x6, bs_nzc_check_jump1 + + sub x6, \arg0, #21 + add x7, x6, #4 + ld1 {v1.b} [12], [x6] + add x6, x7, #4 + ld1 {v1.b} [13], [x7] + add x7, x6, #4 + ld1 {v1.b} [14], [x6] + ld1 {v1.b} [15], [x7] + +bs_nzc_check_jump1: + ins v2.d[0], v0.d[1] + zip1 v0.16b, v0.16b, v2.16b + ins v2.d[0], v0.d[1] + zip1 v0.16b, v0.16b, v2.16b + ext v1.16b, v1.16b, v0.16b, #12 + add \arg4\().16b, v0.16b, v1.16b +.endm + +.macro BS_COMPARE_MV arg0, arg1, arg2, arg3, arg4, arg5 + //in: \arg0,\arg1(const),\arg2(const),\arg3(const),\arg4(const); out:\arg5 + mov w6, #4 + sabd v20.8h, \arg0\().8h, \arg1\().8h + sabd v21.8h, \arg1\().8h, \arg2\().8h + dup \arg0\().8h, w6 + sabd v22.8h, \arg2\().8h, \arg3\().8h + sabd v23.8h, \arg3\().8h, \arg4\().8h + + cmge v20.8h, v20.8h, \arg0\().8h + cmge v21.8h, v21.8h, \arg0\().8h + cmge v22.8h, v22.8h, \arg0\().8h + cmge v23.8h, v23.8h, \arg0\().8h + + addp v20.8h, v20.8h, v21.8h + addp v21.8h, v22.8h, v23.8h + + addhn \arg5\().8b, v20.8h, v20.8h + addhn2 \arg5\().16b, v21.8h, v21.8h +.endm + +.macro BS_MV_CHECK arg0, arg1, arg2, arg3, arg4, arg5, arg6 + ldp q0, q1, [\arg0], #32 + ldp q2, q3, [\arg0] + sub \arg0, \arg0, #32 + // Arrenge the input data --- TOP + ands x6, \arg1, #2 + cbz x6, bs_mv_check_jump0 + sub x6, \arg0, \arg2, lsl #6 + add x6, x6, #48 + ld1 {v4.16b}, [x6] +bs_mv_check_jump0: + BS_COMPARE_MV v4, v0, v1, v2, v3, \arg3 + // Arrange the input data --- LEFT + ands x6, \arg1, #1 + cbz x6, bs_mv_check_jump1 + sub x6, \arg0, #52 + add x7, x6, #16 + ld1 {v4.s} [0], [x6] + add x6, x7, #16 + ld1 {v4.s} [1], [x7] + add x7, x6, #16 + ld1 {v4.s} [2], [x6] + ld1 {v4.s} [3], [x7] +bs_mv_check_jump1: + zip1 \arg5\().4s, v0.4s, v2.4s + zip2 \arg6\().4s, v0.4s, v2.4s + zip1 v0.4s, v1.4s, v3.4s + zip2 v2.4s, v1.4s, v3.4s + zip2 v1.4s, \arg5\().4s, v0.4s + zip1 v0.4s, \arg5\().4s, v0.4s + zip2 v3.4s, \arg6\().4s, v2.4s + zip1 v2.4s, \arg6\().4s, v2.4s + BS_COMPARE_MV v4, v0, v1, v2, v3, \arg4 +.endm + +WELS_ASM_AARCH64_FUNC_BEGIN WelsNonZeroCount_AArch64_neon + mov w1, #1 + dup v3.8b, w1 + ld1 {v0.8b, v1.8b, v2.8b}, [x0] + umin v0.8b, v0.8b, v3.8b + umin v1.8b, v1.8b, v3.8b + umin v2.8b, v2.8b, v3.8b + st1 {v0.8b, v1.8b, v2.8b}, [x0] +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaLt4V_AArch64_neon //uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc + dup v16.16b, w2 //alpha + dup v17.16b, w3 //beta + SIGN_EXTENSION x1,w1 + add x2, x1, x1, lsl #1 + sub x2, x0, x2 + movi v23.16b, #128 + ld1 {v0.16b}, [x2], x1 + ld1 {v1.16b}, [x2], x1 + ld1 {v2.16b}, [x2] + ld1 {v3.16b}, [x0], x1 + ld1 {v4.16b}, [x0], x1 + ld1 {v5.16b}, [x0] + sub x2, x2, x1 + ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x4] + trn1 v18.2s, v18.2s, v19.2s + trn1 v20.2s, v20.2s, v21.2s + trn1 v6.2d, v18.2d, v20.2d // iTc0: 0000, 1111, 2222, 3333 + cmge v7.16b, v6.16b, #0 // iTc0 Flag + + MASK_MATRIX v1, v2, v3, v4, v16, v17, v18 + and v7.16b, v7.16b, v18.16b // need filter flag + + ZERO_JUMP_END v7, x3, x4, DeblockLumaLt4V_AArch64_neon_end + + eor v18.16b, v18.16b, v18.16b + sub v18.16b, v18.16b, v6.16b // -iTc0: 0000, 1111, 2222, 3333 + + DIFF_LUMA_LT4_P1_Q1 v0, v1, v2, v3, v17, v18, v6, v7, v19, v20 + st1 {v19.16b}, [x2], x1 + + DIFF_LUMA_LT4_P1_Q1 v5, v4, v3, v2, v17, v18, v6, v7, v21, v22 + + abs v20.16b, v20.16b + abs v22.16b, v22.16b + add v6.16b, v6.16b, v20.16b + add v6.16b, v6.16b, v22.16b + eor v18.16b, v18.16b, v18.16b + sub v18.16b, v18.16b, v6.16b + + DIFF_LUMA_LT4_P0_Q0_1 v1, v2, v3, v4, v19, v20, v22 + DIFF_LUMA_LT4_P0_Q0_2 v1, v2, v3, v4, v19, v20, v22 + + smax v19.16b, v19.16b, v18.16b + smin v19.16b, v19.16b, v6.16b + and v19.16b, v19.16b, v7.16b + + EXTRACT_DELTA_INTO_TWO_PART v19, v20 + uqadd v2.16b, v2.16b, v20.16b + uqsub v2.16b, v2.16b, v19.16b + st1 {v2.16b}, [x2], x1 + uqsub v3.16b, v3.16b, v20.16b + uqadd v3.16b, v3.16b, v19.16b + st1 {v3.16b}, [x2], x1 + st1 {v21.16b}, [x2] +DeblockLumaLt4V_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaEq4V_AArch64_neon + dup v16.16b, w2 //alpha + dup v17.16b, w3 //beta + SIGN_EXTENSION x1,w1 + sub x3, x0, x1, lsl #2 + ld1 {v0.16b}, [x3], x1 + ld1 {v4.16b}, [x0], x1 + ld1 {v1.16b}, [x3], x1 + ld1 {v5.16b}, [x0], x1 + ld1 {v2.16b}, [x3], x1 + ld1 {v6.16b}, [x0], x1 + ld1 {v3.16b}, [x3] + ld1 {v7.16b}, [x0] + + sub x3, x3, x1, lsl #1 + MASK_MATRIX v2, v3, v4, v5, v16, v17, v18 + lsr w2, w2, #2 + add w2, w2, #2 + dup v16.16b, w2 //((alpha >> 2) + 2) + uabd v19.16b, v3.16b, v4.16b + cmhi v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2) + + uabd v21.16b, v1.16b, v3.16b + cmhi v21.16b, v17.16b, v21.16b //bDetaP2P0 + and v21.16b, v21.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaP2P0 + + uabd v22.16b, v6.16b, v4.16b + cmhi v22.16b, v17.16b, v22.16b //bDetaQ2Q0 + and v22.16b, v22.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaQ2Q0 + and v20.16b, v20.16b, v18.16b //(iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0&&(iDetaP0Q0 < ((iAlpha >> 2) + 2)) + + mov v23.16b, v21.16b + mov v24.16b, v21.16b + + mov v25.16b, v0.16b + DIFF_LUMA_EQ4_P2P1P0_1 v0, v1, v2, v3, v4, v5, v23, v19, v17, v16 + DIFF_LUMA_EQ4_P2P1P0_2 v25, v1, v2, v3, v4, v5, v24, v19, v17, v16 + ins v0.d[1], v25.d[1] + ins v23.d[1], v24.d[1] + and v21.16b, v20.16b, v21.16b + DIFF_LUMA_EQ4_MASK v19, v1, v21, v17 + st1 {v17.16b}, [x3], x1 + DIFF_LUMA_EQ4_MASK v0, v2, v21, v17 + st1 {v17.16b}, [x3], x1 + DIFF_LUMA_EQ4_MASK v23, v3, v18, v17 + st1 {v17.16b}, [x3], x1 + + + mov v23.16b, v22.16b + mov v24.16b, v22.16b + mov v25.16b, v7.16b + DIFF_LUMA_EQ4_P2P1P0_1 v7, v6, v5, v4, v3, v2, v23, v19, v17, v16 + DIFF_LUMA_EQ4_P2P1P0_2 v25, v6, v5, v4, v3, v2, v24, v19, v17, v16 + ins v7.d[1], v25.d[1] + ins v23.d[1], v24.d[1] + and v22.16b, v20.16b, v22.16b + DIFF_LUMA_EQ4_MASK v23, v4, v18, v17 + st1 {v17.16b}, [x3], x1 + DIFF_LUMA_EQ4_MASK v7, v5, v22, v17 + st1 {v17.16b}, [x3], x1 + DIFF_LUMA_EQ4_MASK v19, v6, v22, v17 + st1 {v17.16b}, [x3], x1 +DeblockLumaEq4V_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaLt4H_AArch64_neon //uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc + dup v16.16b, w2 //alpha + dup v17.16b, w3 //beta + sub x2, x0, #3 + movi v23.16b, #128 + SIGN_EXTENSION x1,w1 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 0 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 1 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 2 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 3 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 4 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 5 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 6 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 7 + + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 8 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 9 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 10 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 11 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 12 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 13 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 14 + LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 15 + + sub x0, x0, x1, lsl #4 + + ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x4] + trn1 v18.2s, v18.2s, v19.2s + trn1 v20.2s, v20.2s, v21.2s + trn1 v6.2d, v18.2d, v20.2d // iTc0: 0000, 1111, 2222, 3333 + cmge v7.16b, v6.16b, #0 // iTc0 Flag + + MASK_MATRIX v1, v2, v3, v4, v16, v17, v18 + and v7.16b, v7.16b, v18.16b // need filter flag + + ZERO_JUMP_END v7, x3, x4, DeblockLumaLt4H_AArch64_neon_end + + eor v18.16b, v18.16b, v18.16b + sub v18.16b, v18.16b, v6.16b // -iTc0: 0000, 1111, 2222, 3333 + + DIFF_LUMA_LT4_P1_Q1 v0, v1, v2, v3, v17, v18, v6, v7, v19, v20 //Use Tmp v23,v24 + mov v25.16b, v19.16b + + DIFF_LUMA_LT4_P1_Q1 v5, v4, v3, v2, v17, v18, v6, v7, v21, v22 //Use Tmp v23,v24 + + abs v20.16b, v20.16b + abs v22.16b, v22.16b + add v6.16b, v6.16b, v20.16b + add v6.16b, v6.16b, v22.16b + eor v18.16b, v18.16b, v18.16b + sub v18.16b, v18.16b, v6.16b + + DIFF_LUMA_LT4_P0_Q0_1 v1, v2, v3, v4, v19, v20, v22 + DIFF_LUMA_LT4_P0_Q0_2 v1, v2, v3, v4, v19, v20, v22 + + smax v19.16b, v19.16b, v18.16b + smin v19.16b, v19.16b, v6.16b + and v19.16b, v19.16b, v7.16b + + EXTRACT_DELTA_INTO_TWO_PART v19, v20 + uqadd v2.16b, v2.16b, v20.16b + uqsub v2.16b, v2.16b, v19.16b + mov v26.16b, v2.16b + uqsub v3.16b, v3.16b, v20.16b + uqadd v3.16b, v3.16b, v19.16b + mov v27.16b, v3.16b + mov v28.16b, v21.16b + + sub x0, x0, #2 + add x2, x0, x1 + lsl x1, x1, #1 + + STORE_LUMA_DATA_4 v25, v26, v27, v28, 0, 1 + STORE_LUMA_DATA_4 v25, v26, v27, v28, 2, 3 + STORE_LUMA_DATA_4 v25, v26, v27, v28, 4, 5 + STORE_LUMA_DATA_4 v25, v26, v27, v28, 6, 7 + + STORE_LUMA_DATA_4 v25, v26, v27, v28, 8, 9 + STORE_LUMA_DATA_4 v25, v26, v27, v28, 10, 11 + STORE_LUMA_DATA_4 v25, v26, v27, v28, 12, 13 + STORE_LUMA_DATA_4 v25, v26, v27, v28, 14, 15 +DeblockLumaLt4H_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaEq4H_AArch64_neon + dup v16.16b, w2 //alpha + dup v17.16b, w3 //beta + sub x3, x0, #4 + SIGN_EXTENSION x1,w1 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 0 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 1 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 2 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 3 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 4 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 5 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 6 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 7 + + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 8 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 9 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 10 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 11 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 12 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 13 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 14 + LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 15 + + sub x0, x0, x1, lsl #4 + sub x3, x0, #3 + MASK_MATRIX v2, v3, v4, v5, v16, v17, v18 + + ZERO_JUMP_END v18, x4, x5, DeblockLumaEq4H_AArch64_neon_end + + lsr w2, w2, #2 + add w2, w2, #2 + dup v16.16b, w2 //((alpha >> 2) + 2) + uabd v19.16b, v3.16b, v4.16b + cmhi v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2) + + uabd v21.16b, v1.16b, v3.16b + cmhi v21.16b, v17.16b, v21.16b //bDetaP2P0 + and v21.16b, v21.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaP2P0 + + uabd v22.16b, v6.16b, v4.16b + cmhi v22.16b, v17.16b, v22.16b //bDetaQ2Q0 + and v22.16b, v22.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaQ2Q0 + and v20.16b, v20.16b, v18.16b //(iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0&&(iDetaP0Q0 < ((iAlpha >> 2) + 2)) + + mov v23.16b, v21.16b + mov v24.16b, v21.16b + + mov v25.16b, v0.16b + DIFF_LUMA_EQ4_P2P1P0_1 v0, v1, v2, v3, v4, v5, v23, v19, v17, v16 + DIFF_LUMA_EQ4_P2P1P0_2 v25, v1, v2, v3, v4, v5, v24, v19, v17, v16 + ins v0.d[1], v25.d[1] + ins v23.d[1], v24.d[1] + and v21.16b, v20.16b, v21.16b + DIFF_LUMA_EQ4_MASK v19, v1, v21, v17 + mov v26.16b, v17.16b + DIFF_LUMA_EQ4_MASK v0, v2, v21, v17 + mov v27.16b, v17.16b + DIFF_LUMA_EQ4_MASK v23, v3, v18, v17 + mov v28.16b, v17.16b + + + mov v23.16b, v22.16b + mov v24.16b, v22.16b + mov v25.16b, v7.16b + DIFF_LUMA_EQ4_P2P1P0_1 v7, v6, v5, v4, v3, v2, v23, v19, v17, v16 + DIFF_LUMA_EQ4_P2P1P0_2 v25, v6, v5, v4, v3, v2, v24, v19, v17, v16 + ins v7.d[1], v25.d[1] + ins v23.d[1], v24.d[1] + and v22.16b, v20.16b, v22.16b + DIFF_LUMA_EQ4_MASK v23, v4, v18, v17 + mov v29.16b, v17.16b + DIFF_LUMA_EQ4_MASK v7, v5, v22, v17 + mov v30.16b, v17.16b + DIFF_LUMA_EQ4_MASK v19, v6, v22, v17 + mov v31.16b, v17.16b + + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 0 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 1 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 2 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 3 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 4 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 5 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 6 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 7 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 8 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 9 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 10 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 11 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 12 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 13 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 14 + STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 15 +DeblockLumaEq4H_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaLt4V_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta, int8_t* pTc + dup v16.16b, w3 //alpha + dup v17.16b, w4 //beta + lsl x3, x2, #1 + sub x6, x0, x3 //pPixCb-2*Stride + sub x7, x1, x3 //pPixCr-2*Stride + + ld1 {v0.d} [0], [x6], x2 + ld1 {v1.d} [0], [x6] + ld1 {v2.d} [0], [x0], x2 + ld1 {v3.d} [0], [x0] + ld1 {v0.d} [1], [x7], x2 + ld1 {v1.d} [1], [x7] + ld1 {v2.d} [1], [x1], x2 + ld1 {v3.d} [1], [x1] + + ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x5] + trn1 v18.4h, v18.4h, v19.4h //0011,0011, + trn1 v20.4h, v20.4h, v21.4h //2233,2233 + zip1 v6.4s, v18.4s, v20.4s //iTc0: 0011,2233,0011,2233 + cmgt v7.16b, v6.16b, #0 // iTc0 Flag + + MASK_MATRIX v0, v1, v2, v3, v16, v17, v18 + and v7.16b, v7.16b, v18.16b // need filter flag + + ZERO_JUMP_END v7, x4, x5, DeblockChromaLt4V_AArch64_neon_end + + eor v18.16b, v18.16b, v18.16b + sub v18.16b, v18.16b, v6.16b //-iTc0: 0011,2233,0011,2233 + + DIFF_LUMA_LT4_P0_Q0_1 v0, v1, v2, v3, v19, v20, v22 + DIFF_LUMA_LT4_P0_Q0_2 v0, v1, v2, v3, v19, v20, v22 + + smax v19.16b, v19.16b, v18.16b + smin v19.16b, v19.16b, v6.16b + and v19.16b, v19.16b, v7.16b + + EXTRACT_DELTA_INTO_TWO_PART v19, v20 + uqadd v1.16b, v1.16b, v20.16b + uqsub v1.16b, v1.16b, v19.16b + st1 {v1.d} [0], [x6], x2 + st1 {v1.d} [1], [x7], x2 + uqsub v2.16b, v2.16b, v20.16b + uqadd v2.16b, v2.16b, v19.16b + st1 {v2.d} [0], [x6] + st1 {v2.d} [1], [x7] +DeblockChromaLt4V_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaLt4H_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta, int8_t* pTc + dup v16.16b, w3 //alpha + dup v17.16b, w4 //beta + sub x6, x0, #2 //pPixCb-2 + sub x7, x1, #2 //pPixCr-2 + + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 0 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 1 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 2 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 3 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 4 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 5 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 6 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 7 + + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 8 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 9 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 10 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 11 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 12 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 13 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 14 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 15 + + sub x0, x0, #1 + sub x1, x1, #1 + + ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x5] + trn1 v18.4h, v18.4h, v19.4h //0011,0011, + trn1 v20.4h, v20.4h, v21.4h //2233,2233 + zip1 v6.4s, v18.4s, v20.4s //iTc0: 0011,2233,0011,2233 + cmgt v7.16b, v6.16b, #0 // iTc0 Flag + + MASK_MATRIX v0, v1, v2, v3, v16, v17, v18 + and v7.16b, v7.16b, v18.16b // need filter flag + + ZERO_JUMP_END v7, x4, x5, DeblockChromaLt4H_AArch64_neon_end + eor v18.16b, v18.16b, v18.16b + sub v18.16b, v18.16b, v6.16b //-iTc0: 0011,2233,0011,2233 + + DIFF_LUMA_LT4_P0_Q0_1 v0, v1, v2, v3, v19, v20, v22 + DIFF_LUMA_LT4_P0_Q0_2 v0, v1, v2, v3, v19, v20, v22 + + smax v19.16b, v19.16b, v18.16b + smin v19.16b, v19.16b, v6.16b + and v19.16b, v19.16b, v7.16b + + EXTRACT_DELTA_INTO_TWO_PART v19, v20 + uqadd v1.16b, v1.16b, v20.16b + uqsub v1.16b, v1.16b, v19.16b + uqsub v2.16b, v2.16b, v20.16b + uqadd v2.16b, v2.16b, v19.16b + + STORE_CHROMA_DATA_2 v1, v2, x0, 0 + STORE_CHROMA_DATA_2 v1, v2, x0, 1 + STORE_CHROMA_DATA_2 v1, v2, x0, 2 + STORE_CHROMA_DATA_2 v1, v2, x0, 3 + STORE_CHROMA_DATA_2 v1, v2, x0, 4 + STORE_CHROMA_DATA_2 v1, v2, x0, 5 + STORE_CHROMA_DATA_2 v1, v2, x0, 6 + STORE_CHROMA_DATA_2 v1, v2, x0, 7 + + STORE_CHROMA_DATA_2 v1, v2, x1, 8 + STORE_CHROMA_DATA_2 v1, v2, x1, 9 + STORE_CHROMA_DATA_2 v1, v2, x1, 10 + STORE_CHROMA_DATA_2 v1, v2, x1, 11 + STORE_CHROMA_DATA_2 v1, v2, x1, 12 + STORE_CHROMA_DATA_2 v1, v2, x1, 13 + STORE_CHROMA_DATA_2 v1, v2, x1, 14 + STORE_CHROMA_DATA_2 v1, v2, x1, 15 +DeblockChromaLt4H_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaEq4V_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta + dup v16.16b, w3 //alpha + dup v17.16b, w4 //beta + lsl x3, x2, #1 + sub x6, x0, x3 //pPixCb-2*Stride + sub x7, x1, x3 //pPixCr-2*Stride + + ld1 {v0.d} [0], [x6], x2 + ld1 {v1.d} [0], [x6] + ld1 {v2.d} [0], [x0], x2 + ld1 {v3.d} [0], [x0] + ld1 {v0.d} [1], [x7], x2 + ld1 {v1.d} [1], [x7] + ld1 {v2.d} [1], [x1], x2 + ld1 {v3.d} [1], [x1] + + MASK_MATRIX v0, v1, v2, v3, v16, v17, v7 + + ZERO_JUMP_END v7, x3, x4, DeblockChromaEq4V_AArch64_neon_end + + DIFF_CHROMA_EQ4_P0Q0_1 v0, v1, v2, v3, v18, v19, v20, v21 + DIFF_CHROMA_EQ4_P0Q0_2 v0, v1, v2, v3, v18, v19, v20, v21 + + mov v6.16b, v7.16b + bsl v6.16b, v20.16b, v1.16b + bsl v7.16b, v21.16b, v2.16b + + st1 {v6.d} [0], [x6], x2 + st1 {v6.d} [1], [x7], x2 + + st1 {v7.d} [0], [x6] + st1 {v7.d} [1], [x7] +DeblockChromaEq4V_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaEq4H_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta + dup v16.16b, w3 //alpha + dup v17.16b, w4 //beta + + sub x6, x0, #2 //pPixCb-2 + sub x7, x1, #2 //pPixCr-2 + + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 0 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 1 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 2 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 3 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 4 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 5 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 6 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 7 + + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 8 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 9 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 10 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 11 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 12 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 13 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 14 + LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 15 + sub x0, x0, #1 + sub x1, x1, #1 + + MASK_MATRIX v0, v1, v2, v3, v16, v17, v7 + + ZERO_JUMP_END v7, x3, x4, DeblockChromaEq4H_AArch64_neon_end + + DIFF_CHROMA_EQ4_P0Q0_1 v0, v1, v2, v3, v18, v19, v20, v21 + DIFF_CHROMA_EQ4_P0Q0_2 v0, v1, v2, v3, v18, v19, v20, v21 + + mov v6.16b, v7.16b + bsl v6.16b, v20.16b, v1.16b + bsl v7.16b, v21.16b, v2.16b + + STORE_CHROMA_DATA_2 v6, v7, x0, 0 + STORE_CHROMA_DATA_2 v6, v7, x0, 1 + STORE_CHROMA_DATA_2 v6, v7, x0, 2 + STORE_CHROMA_DATA_2 v6, v7, x0, 3 + STORE_CHROMA_DATA_2 v6, v7, x0, 4 + STORE_CHROMA_DATA_2 v6, v7, x0, 5 + STORE_CHROMA_DATA_2 v6, v7, x0, 6 + STORE_CHROMA_DATA_2 v6, v7, x0, 7 + + STORE_CHROMA_DATA_2 v6, v7, x1, 8 + STORE_CHROMA_DATA_2 v6, v7, x1, 9 + STORE_CHROMA_DATA_2 v6, v7, x1, 10 + STORE_CHROMA_DATA_2 v6, v7, x1, 11 + STORE_CHROMA_DATA_2 v6, v7, x1, 12 + STORE_CHROMA_DATA_2 v6, v7, x1, 13 + STORE_CHROMA_DATA_2 v6, v7, x1, 14 + STORE_CHROMA_DATA_2 v6, v7, x1, 15 +DeblockChromaEq4H_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN DeblockingBSCalcEnc_AArch64_neon + // Checking the nzc status + BS_NZC_CHECK x0, x2, x3, v16, v17 //v16,v17 save the nzc status + // For checking bS[I] = 2 + movi v0.16b, #0 + cmgt v16.16b, v16.16b, v0.16b + cmgt v17.16b, v17.16b, v0.16b + movi v0.16b, #2 + + and v16.16b, v16.16b, v0.16b //v16 save the nzc check result all the time --- for dir is top + and v17.16b, v17.16b, v0.16b //v17 save the nzc check result all the time --- for dir is left + + // Checking the mv status + BS_MV_CHECK x1, x2, x3, v18, v19, v5 , v6 //v18, v19 save the mv status + // For checking bS[I] = 1 + movi v0.16b, #1 + and v18.16b, v18.16b, v0.16b //v18 save the nzc check result all the time --- for dir is top + and v19.16b, v19.16b, v0.16b //v19 save the nzc check result all the time --- for dir is left + // Check bS[I] is '1' or '2' + umax v1.16b, v18.16b, v16.16b + umax v0.16b, v19.16b, v17.16b + st1 {v0.16b, v1.16b}, [x4] +WELS_ASM_AARCH64_FUNC_END + + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/expand_picture_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/expand_picture_aarch64_neon.S new file mode 100644 index 000000000..933de27f0 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/expand_picture_aarch64_neon.S @@ -0,0 +1,150 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" +//void ExpandPictureLuma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); +WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureLuma_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x2,w2 + SIGN_EXTENSION x3,w3 + mov x7, x0 + mov x8, x3 + add x4, x7, x2 + sub x4, x4, #1 + mov x10, #16 + //For the left and right expand +_expand_picture_luma_loop2: + sub x5, x7, #32 + add x6, x4, #1 + ld1r {v0.16b}, [x7], x1 + ld1r {v2.16b}, [x4], x1 + mov v1.16b, v0.16b + mov v3.16b, v2.16b + st2 {v0.16b, v1.16b}, [x5] + st2 {v2.16b, v3.16b}, [x6] + sub x8, x8, #1 + cbnz x8, _expand_picture_luma_loop2 + //for the top and bottom expand + add x2, x2, #64 + sub x0, x0, #32 + madd x4, x1, x3, x0 + sub x4, x4, x1 +_expand_picture_luma_loop0: + mov x5, #32 + msub x5, x5, x1, x0 + add x6, x4, x1 + ld1 {v0.16b}, [x0], x10 + ld1 {v1.16b}, [x4], x10 + mov x8, #32 +_expand_picture_luma_loop1: + st1 {v0.16b}, [x5], x1 + st1 {v1.16b}, [x6], x1 + sub x8, x8, #1 + cbnz x8, _expand_picture_luma_loop1 + + sub x2, x2, #16 + cbnz x2, _expand_picture_luma_loop0 +WELS_ASM_AARCH64_FUNC_END + +//void ExpandPictureChroma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, +// const int32_t kiPicH); +WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureChroma_AArch64_neon + //Save the dst + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x2,w2 + SIGN_EXTENSION x3,w3 + mov x7, x0 + mov x8, x3 + mov x10, #16 + add x4, x7, x2 + sub x4, x4, #1 + //For the left and right expand +_expand_picture_chroma_loop2: + sub x5, x7, #16 + add x6, x4, #1 + + ld1r {v0.16b}, [x7], x1 + ld1r {v1.16b}, [x4], x1 + + st1 {v0.16b}, [x5] + st1 {v1.16b}, [x6] + sub x8, x8, #1 + cbnz x8, _expand_picture_chroma_loop2 + + //for the top and bottom expand + add x2, x2, #32 + // + mov x9, x2 + mov x11, #15 + bic x2, x2, x11 + // + sub x0, x0, #16 + madd x4, x1, x3, x0 + sub x4, x4, x1 +_expand_picture_chroma_loop0: + mov x5, #16 + msub x5, x5, x1, x0 + add x6, x4, x1 + ld1 {v0.16b}, [x0], x10 + ld1 {v1.16b}, [x4], x10 + + mov x8, #16 +_expand_picture_chroma_loop1: + st1 {v0.16b}, [x5], x1 + st1 {v1.16b}, [x6], x1 + sub x8, x8, #1 + cbnz x8, _expand_picture_chroma_loop1 + + sub x2, x2, #16 + cbnz x2, _expand_picture_chroma_loop0 + + and x9, x9, #15 + sub x9, x9, #8 + cbnz x9, _expand_picture_chroma_end + mov x5, #16 + msub x5, x5, x1, x0 + add x6, x4, x1 + ld1 {v0.8b}, [x0] + ld1 {v1.8b}, [x4] + + mov x8, #16 +_expand_picture_chroma_loop3: + st1 {v0.8b}, [x5], x1 + st1 {v1.8b}, [x6], x1 + sub x8, x8, #1 + cbnz x8, _expand_picture_chroma_loop3 +_expand_picture_chroma_end: + +WELS_ASM_AARCH64_FUNC_END +#endif + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/intra_pred_common_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/intra_pred_common_aarch64_neon.S new file mode 100644 index 000000000..c18c2d463 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/intra_pred_common_aarch64_neon.S @@ -0,0 +1,58 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +//for Luma 16x16 +//void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.16b}, [x3] +.rept 16 + st1 {v0.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END + +//void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, #1 +.rept 16 + ld1r {v0.16b}, [x3], x2 + st1 {v0.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END + +#endif + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/mc_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/mc_aarch64_neon.S new file mode 100644 index 000000000..e4e43f998 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/arm64/mc_aarch64_neon.S @@ -0,0 +1,2614 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" +.align 4 +filter_para: .short 0, 1, -5, 20, 0, 0, 0, 0 + +.macro FILTER_6TAG_8BITS1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: v18, v19 + uaddl v18.8h, \arg0\().8b, \arg5\().8b //v18=src[-2]+src[3] + uaddl v19.8h, \arg2\().8b, \arg3\().8b //src[0]+src[1] + mla v18.8h, v19.8h, \arg7\().8h //v18 += 20*(src[0]+src[1]), 2 cycles + uaddl v19.8h, \arg1\().8b, \arg4\().8b //src[-1]+src[2] + mls v18.8h, v19.8h, \arg8\().8h //v18 -= 5*(src[-1]+src[2]), 2 cycles + sqrshrun \arg6\().8b, v18.8h, #5 +// } +.endm + +.macro FILTER_6TAG_8BITS2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: v18, v19 + uaddl2 v18.8h, \arg0\().16b, \arg5\().16b //v18=src[-2]+src[3] + uaddl2 v19.8h, \arg2\().16b, \arg3\().16b //src[0]+src[1] + mla v18.8h, v19.8h, \arg7\().8h //v18 += 20*(src[0]+src[1]), 2 cycles + uaddl2 v19.8h, \arg1\().16b, \arg4\().16b //src[-1]+src[2] + mls v18.8h, v19.8h, \arg8\().8h //v18 -= 5*(src[-1]+src[2]), 2 cycles + sqrshrun2 \arg6\().16b, v18.8h, #5 +// } +.endm + +.macro FILTER_6TAG_8BITS1_AVERAGE_WITH_0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: v18, v19 + uaddl v18.8h, \arg0\().8b, \arg5\().8b //v18=src[-2]+src[3] + uaddl v19.8h, \arg2\().8b, \arg3\().8b //src[0]+src[1] + mla v18.8h, v19.8h, \arg7\().8h //v18 += 20*(src[0]+src[1]), 2 cycles + uaddl v19.8h, \arg1\().8b, \arg4\().8b //src[-1]+src[2] + mls v18.8h, v19.8h, \arg8\().8h //v18 -= 5*(src[-1]+src[2]), 2 cycles + sqrshrun \arg6\().8b, v18.8h, #5 + uaddl v19.8h, \arg2\().8b, \arg6\().8b + rshrn \arg6\().8b, v19.8h, #1 +// } +.endm + +.macro FILTER_6TAG_8BITS2_AVERAGE_WITH_0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: v18, v19 + uaddl2 v18.8h, \arg0\().16b, \arg5\().16b //v18=src[-2]+src[3] + uaddl2 v19.8h, \arg2\().16b, \arg3\().16b //src[0]+src[1] + mla v18.8h, v19.8h, \arg7\().8h //v18 += 20*(src[0]+src[1]), 2 cycles + uaddl2 v19.8h, \arg1\().16b, \arg4\().16b //src[-1]+src[2] + mls v18.8h, v19.8h, \arg8\().8h //v18 -= 5*(src[-1]+src[2]), 2 cycles + sqrshrun2 \arg6\().16b, v18.8h, #5 + uaddl2 v19.8h, \arg2\().16b, \arg6\().16b + rshrn2 \arg6\().16b, v19.8h, #1 +// } +.endm + +.macro FILTER_6TAG_8BITS1_AVERAGE_WITH_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: v18, v19 + uaddl v18.8h, \arg0\().8b, \arg5\().8b //v18=src[-2]+src[3] + uaddl v19.8h, \arg2\().8b, \arg3\().8b //src[0]+src[1] + mla v18.8h, v19.8h, \arg7\().8h //v18 += 20*(src[0]+src[1]), 2 cycles + uaddl v19.8h, \arg1\().8b, \arg4\().8b //src[-1]+src[2] + mls v18.8h, v19.8h, \arg8\().8h //v18 -= 5*(src[-1]+src[2]), 2 cycles + sqrshrun \arg6\().8b, v18.8h, #5 + uaddl v19.8h, \arg3\().8b, \arg6\().8b + rshrn \arg6\().8b, v19.8h, #1 +// } +.endm + +.macro FILTER_6TAG_8BITS2_AVERAGE_WITH_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:src[-2], src[-1], src[0], src[1], src[2], src[3], dst_d, multiplier a/b; working: v18, v19 + uaddl2 v18.8h, \arg0\().16b, \arg5\().16b //v18=src[-2]+src[3] + uaddl2 v19.8h, \arg2\().16b, \arg3\().16b //src[0]+src[1] + mla v18.8h, v19.8h, \arg7\().8h //v18 += 20*(src[0]+src[1]), 2 cycles + uaddl2 v19.8h, \arg1\().16b, \arg4\().16b //src[-1]+src[2] + mls v18.8h, v19.8h, \arg8\().8h //v18 -= 5*(src[-1]+src[2]), 2 cycles + sqrshrun2 \arg6\().16b, v18.8h, #5 + uaddl2 v19.8h, \arg3\().16b, \arg6\().16b + rshrn2 \arg6\().16b, v19.8h, #1 +// } +.endm + +.macro FILTER_6TAG_8BITS_TO_16BITS1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:d_src[-2], d_src[-1], d_src[0], d_src[1], d_src[2], d_src[3], dst_q, multiplier a/b; working:v31 + uaddl \arg6\().8h, \arg0\().8b, \arg5\().8b //dst_q=src[-2]+src[3] + uaddl v31.8h, \arg2\().8b, \arg3\().8b //src[0]+src[1] + mla \arg6\().8h, v31.8h, \arg7\().8h //dst_q += 20*(src[0]+src[1]), 2 cycles + uaddl v31.8h, \arg1\().8b, \arg4\().8b //src[-1]+src[2] + mls \arg6\().8h, v31.8h, \arg8\().8h //dst_q -= 5*(src[-1]+src[2]), 2 cycles +// } +.endm + +.macro FILTER_6TAG_8BITS_TO_16BITS2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input:d_src[-2], d_src[-1], d_src[0], d_src[1], d_src[2], d_src[3], dst_q, multiplier a/b; working:v31 + uaddl2 \arg6\().8h, \arg0\().16b, \arg5\().16b //dst_q=src[-2]+src[3] + uaddl2 v31.8h, \arg2\().16b, \arg3\().16b //src[0]+src[1] + mla \arg6\().8h, v31.8h, \arg7\().8h //dst_q += 20*(src[0]+src[1]), 2 cycles + uaddl2 v31.8h, \arg1\().16b, \arg4\().16b //src[-1]+src[2] + mls \arg6\().8h, v31.8h, \arg8\().8h //dst_q -= 5*(src[-1]+src[2]), 2 cycles +// } +.endm + +.macro FILTER_3_IN_16BITS_TO_8BITS1 arg0, arg1, arg2, arg3 +// { // input:a, b, c, dst_d; + sub \arg0\().8h, \arg0\().8h, \arg1\().8h //a-b + sshr \arg0\().8h, \arg0\().8h, #2 //(a-b)/4 + sub \arg0\().8h, \arg0\().8h, \arg1\().8h //(a-b)/4-b + add \arg0\().8h, \arg0\().8h, \arg2\().8h //(a-b)/4-b+c + sshr \arg0\().8h, \arg0\().8h, #2 //((a-b)/4-b+c)/4 + add \arg0\().8h, \arg0\().8h, \arg2\().8h //((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16 + sqrshrun \arg3\().8b, \arg0\().8h, #6 //(+32)>>6 +// } +.endm + +.macro FILTER_3_IN_16BITS_TO_8BITS2 arg0, arg1, arg2, arg3 +// { // input:a, b, c, dst_d; + sub \arg0\().8h, \arg0\().8h, \arg1\().8h //a-b + sshr \arg0\().8h, \arg0\().8h, #2 //(a-b)/4 + sub \arg0\().8h, \arg0\().8h, \arg1\().8h //(a-b)/4-b + add \arg0\().8h, \arg0\().8h, \arg2\().8h //(a-b)/4-b+c + sshr \arg0\().8h, \arg0\().8h, #2 //((a-b)/4-b+c)/4 + add \arg0\().8h, \arg0\().8h, \arg2\().8h //((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16 + sqrshrun2 \arg3\().16b, \arg0\().8h, #6 //(+32)>>6 +// } +.endm + +.macro UNPACK_2_16BITS_TO_ABC arg0, arg1, arg2, arg3, arg4 +// { // input:q_src[-2:5], q_src[6:13](avail 8+5)/q_src[6:**](avail 4+5), dst_a, dst_b, dst_c; + ext \arg4\().16b, \arg0\().16b, \arg1\().16b, #4 //src[0] + ext \arg3\().16b, \arg0\().16b, \arg1\().16b, #6 //src[1] + add \arg4\().8h, \arg4\().8h, \arg3\().8h //c=src[0]+src[1] + + ext \arg3\().16b, \arg0\().16b, \arg1\().16b, #2 //src[-1] + ext \arg2\().16b, \arg0\().16b, \arg1\().16b, #8 //src[2] + add \arg3\().8h, \arg3\().8h, \arg2\().8h //b=src[-1]+src[2] + + ext \arg2\().16b, \arg0\().16b, \arg1\().16b, #10 //src[3] + add \arg2\().8h, \arg2\().8h, \arg0\().8h //a=src[-2]+src[3] +// } +.endm + +.macro AVERAGE_TWO_8BITS1 arg0, arg1, arg2 +// { // input:dst_d, src_d A and B; working: v5 + uaddl v30.8h, \arg2\().8b, \arg1\().8b + rshrn \arg0\().8b, v30.8h, #1 +// } +.endm + +.macro AVERAGE_TWO_8BITS2 arg0, arg1, arg2 +// { // input:dst_d, src_d A and B; working: v5 + uaddl2 v30.8h, \arg2\().16b, \arg1\().16b + rshrn2 \arg0\().16b, v30.8h, #1 +// } +.endm + +.macro FILTER_SINGLE_TAG_8BITS arg0, arg1, arg2, arg3 +// when width=17/9, used +// { // input: src_d{Y[0][1][2][3][4][5]X}, + rev64 \arg2\().8b, \arg0\().8b // X[5][4][3][2][1][0]O + uaddl \arg2\().8h, \arg0\().8b, \arg2\().8b // each 16bits, *[50][41][32][23][14][05]* + mul \arg2\().4h, \arg2\().4h, \arg1\().4h // 0+1*[50]-5*[41]+20[32] + addv \arg3, \arg2\().4h + sqrshrun \arg0\().8b, \arg0\().8h, #5 +// } +.endm + +.macro UNPACK_FILTER_SINGLE_TAG_16BITS arg0, arg1, arg2, arg3, arg4, arg5 +// { // each 16bits; input: d_dst, d_src[0:5], para, working, working, d(low part of d_dst) + ext \arg3\().16b, \arg1\().16b, \arg1\().16b, #14 // X[0][1][2][3][4][5]O + ext \arg4\().16b, \arg3\().16b, \arg3\().16b, #8 // [3][4][5]OX[0][1][2] + rev64 \arg4\().8h, \arg4\().8h // X[5][4][3][2][1][0]O + add \arg3\().8h, \arg3\().8h, \arg4\().8h // each 16bits, *[50][41][32][23][14][05]* + smull \arg3\().4s, \arg3\().4h, \arg2\().4h // 0+1*[50]-5*[41]+20[32] + saddlv \arg5, \arg3\().4s + //sshr \arg0\().2d, \arg0\().2d, #4 + sqrshrun \arg0\().2s, \arg0\().2d, #10 + uqxtn \arg0\().4h, \arg0\().4s + uqxtn \arg0\().8b, \arg0\().8h + // } +.endm + +.macro VEC4_LD1_8BITS_16ELEMENT arg0, arg1, arg2, arg3, arg4, arg5 +//{//load 16bytes * 4rows + ld1 {\arg2\().16b}, [\arg0], \arg1 + ld1 {\arg3\().16b}, [\arg0], \arg1 + ld1 {\arg4\().16b}, [\arg0], \arg1 + ld1 {\arg5\().16b}, [\arg0], \arg1 +//} +.endm + +.macro VEC4_ST1_8BITS_8ELEMENT arg0, arg1, arg2, arg3, arg4, arg5 +//{ + st1 {\arg2\().8b}, [\arg0], \arg1 + st1 {\arg3\().8b}, [\arg0], \arg1 + st1 {\arg4\().8b}, [\arg0], \arg1 + st1 {\arg5\().8b}, [\arg0], \arg1 +//} +.endm + +.macro VEC4_UADDL_8BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11 +//{ + uaddl \arg8\().8h, \arg0\().8b, \arg1\().8b + uaddl \arg9\().8h, \arg2\().8b, \arg3\().8b + uaddl \arg10\().8h, \arg4\().8b, \arg5\().8b + uaddl \arg11\().8h, \arg6\().8b, \arg7\().8b +//} +.endm + +.macro VEC4_UADDL2_8BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11 +//{ + uaddl \arg8\().8h, \arg0\().16b, \arg1\().16b + uaddl \arg9\().8h, \arg2\().16b, \arg3\().16b + uaddl \arg10\().8h, \arg4\().16b, \arg5\().16b + uaddl \arg11\().8h, \arg6\().16b, \arg7\().16b +//} +.endm + +.macro VEC4_MLS_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11 +//{ + mls \arg8\().8h, \arg0\().8h, \arg1\().8h + mls \arg9\().8h, \arg2\().8h, \arg3\().8h + mls \arg10\().8h, \arg4\().8h, \arg5\().8h + mls \arg11\().8h, \arg6\().8h, \arg7\().8h +//} +.endm + +.macro VEC4_MLA_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11 +//{ + mla \arg8\().8h, \arg0\().8h, \arg1\().8h + mla \arg9\().8h, \arg2\().8h, \arg3\().8h + mla \arg10\().8h, \arg4\().8h, \arg5\().8h + mla \arg11\().8h, \arg6\().8h, \arg7\().8h +//} +.endm + +.macro VEC4_SQRSHRUN_16BITS_SHIFT5 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +//{ + sqrshrun \arg4\().8b, \arg0\().8h, #5 + sqrshrun \arg5\().8b, \arg1\().8h, #5 + sqrshrun \arg6\().8b, \arg2\().8h, #5 + sqrshrun \arg7\().8b, \arg3\().8h, #5 +//} +.endm + +.macro VEC4_SQRSHRUN2_16BITS_SHIFT5 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +//{ + sqrshrun2 \arg4\().16b, \arg0\().8h, #5 + sqrshrun2 \arg5\().16b, \arg1\().8h, #5 + sqrshrun2 \arg6\().16b, \arg2\().8h, #5 + sqrshrun2 \arg7\().16b, \arg3\().8h, #5 +//} +.endm + +.macro VEC4_RSHRN_16BITS_SHIFT1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +//{ + rshrn \arg4\().8b, \arg0\().8h, #1 + rshrn \arg5\().8b, \arg1\().8h, #1 + rshrn \arg6\().8b, \arg2\().8h, #1 + rshrn \arg7\().8b, \arg3\().8h, #1 +//} +.endm + +//(const uint8_t* pSrc {x0}, int32_t iSrcStride{x1}, uint8_t* pDst{x2}, int32_t iDstStride{x3}, int32_t iHeight{x4}) +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20WidthEq16_AArch64_neon + sub x0, x0, #2 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w16_h_mc_luma_loop: + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 //only use 21(16+5); v2=src[-2] + trn1 v2.2d, v2.2d, v3.2d + //prfm pldl1strm, [x0] + ext v5.16b, v2.16b, v4.16b, #1 //v5=src[-1] + ext v6.16b, v2.16b, v4.16b, #2 //v6=src[0] + ext v7.16b, v2.16b, v4.16b, #3 //v7=src[1] + ext v16.16b, v2.16b, v4.16b, #4 //v16=src[2] + ext v17.16b, v2.16b, v4.16b, #5 //v17=src[3] + + FILTER_6TAG_8BITS1 v2, v5, v6, v7, v16, v17, v20, v0, v1 + FILTER_6TAG_8BITS2 v2, v5, v6, v7, v16, v17, v20, v0, v1 + + sub x4, x4, #1 + st1 {v20.16b}, [x2], x3 //write 16Byte + cbnz x4, w16_h_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20WidthEq8_AArch64_neon + sub x0, x0, #2 + stp d8,d9, [sp,#-16]! + movi v8.8h, #20, lsl #0 + movi v9.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w8_h_mc_luma_loop: + VEC4_LD1_8BITS_16ELEMENT x0, x1, v16, v20, v24, v28 //load src[-2] in v16,v20,v24,v28 for 4 row; only use 13(8+5); + sub x4, x4, #4 + + //1st row: + ext v17.16b, v16.16b, v16.16b, #5 //src[3] + ext v18.16b, v16.16b, v16.16b, #1 //src[-1] + ext v19.16b, v16.16b, v16.16b, #4 //src[2] + //2nd row: + ext v21.16b, v20.16b, v20.16b, #5 //src[3] + ext v22.16b, v20.16b, v20.16b, #1 //src[-1] + ext v23.16b, v20.16b, v20.16b, #4 //src[2] + //3rd row: + ext v25.16b, v24.16b, v24.16b, #5 //src[3] + ext v26.16b, v24.16b, v24.16b, #1 //src[-1] + ext v27.16b, v24.16b, v24.16b, #4 //src[2] + //4th row: + ext v29.16b, v28.16b, v28.16b, #5 //src[3] + ext v30.16b, v28.16b, v28.16b, #1 //src[-1] + ext v31.16b, v28.16b, v28.16b, #4 //src[2] + + VEC4_UADDL_8BITS v16, v17, v20, v21, v24, v25, v28, v29, v0, v2, v4, v6 //v0/v2/v4/v6=src[-2]+src[3] + VEC4_UADDL_8BITS v18, v19, v22, v23, v26, v27, v30, v31, v1, v3, v5, v7 //v1/v3/v5/v7=src[-1]+src[2] + VEC4_MLS_16BITS v1, v9, v3, v9, v5, v9, v7, v9, v0, v2, v4, v6 //v0/v2/v4/v6 -= 5*(src[-1]+src[2]) + + //1st row: + ext v18.16b, v16.16b, v16.16b, #2 //src[0] + ext v19.16b, v16.16b, v16.16b, #3 //src[1] + //2nd row: + ext v22.16b, v20.16b, v20.16b, #2 //src[0] + ext v23.16b, v20.16b, v20.16b, #3 //src[1] + //3rd row: + ext v26.16b, v24.16b, v24.16b, #2 //src[0] + ext v27.16b, v24.16b, v24.16b, #3 //src[1] + //4th row: + ext v30.16b, v28.16b, v28.16b, #2 //src[0] + ext v31.16b, v28.16b, v28.16b, #3 //src[1] + + VEC4_UADDL_8BITS v18, v19, v22, v23, v26, v27, v30, v31, v1, v3, v5, v7 //v1/v3/v5/v7=src[0]+src[1] + VEC4_MLA_16BITS v1, v8, v3, v8, v5, v8, v7, v8, v0, v2, v4, v6 //v0/v2/v4/v6+=20*(src[0]+src[1]) + + VEC4_SQRSHRUN_16BITS_SHIFT5 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_ST1_8BITS_8ELEMENT x2, x3, v1, v3, v5, v7 + cbnz x4, w8_h_mc_luma_loop + + ldp d8,d9,[sp],#16 +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20WidthEq4_AArch64_neon + sub x0, x0, #2 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + asr x4, x4, #1 +w4_h_mc_luma_loop: + ld1 {v2.16b}, [x0], x1 //only use 9(4+5); 1st row src[-2:6] + //prfm pldl1strm, [x0] + ld1 {v3.16b}, [x0], x1 //only use 9(4+5); 2nd row src[-2:6] + //prfm pldl1strm, [x0] + + zip1 v4.4s, v2.4s, v3.4s // v4=src[-2] 1st:2nd + ext v17.16b, v4.16b, v4.16b, #8 // v17=src[2:5] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[-1:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[-1:6] + zip1 v5.4s, v2.4s, v3.4s // v5=src[-1:2] 1st:2nd + ext v7.16b, v5.16b, v4.16b, #8 //v7=src[3:6] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[0:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[0:6] + zip1 v6.4s, v2.4s, v3.4s // v6=src[0:3] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[1:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[1:6] + zip1 v16.4s, v2.4s, v3.4s // v16=src[1:4] 1st:2nd + + FILTER_6TAG_8BITS1 v4, v5, v6, v16, v17, v7, v20, v0, v1 + + st1 {v20.s}[0], [x2], x3 //write 4Byte + st1 {v20.s}[1], [x2], x3 //write 4Byte + sub x4, x4, #1 + cbnz x4, w4_h_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer10WidthEq16_AArch64_neon + sub x0, x0, #2 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w16_xy_10_mc_luma_loop: + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 //only use 21(16+5); v2=src[-2] + trn1 v2.2d, v2.2d, v3.2d + //prfm pldl1strm, [x0] + ext v5.16b, v2.16b, v4.16b, #1 //v5=src[-1] + ext v6.16b, v2.16b, v4.16b, #2 //v6=src[0] + ext v7.16b, v2.16b, v4.16b, #3 //v7=src[1] + ext v16.16b, v2.16b, v4.16b, #4 //v16=src[2] + ext v17.16b, v2.16b, v4.16b, #5 //v17=src[3] + + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v2, v5, v6, v7, v16, v17, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v2, v5, v6, v7, v16, v17, v20, v0, v1 + + sub x4, x4, #1 + st1 {v20.16b}, [x2], x3 //write 16Byte + cbnz x4, w16_xy_10_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer10WidthEq8_AArch64_neon + sub x0, x0, #2 + stp d8,d9, [sp,#-16]! + movi v8.8h, #20, lsl #0 + movi v9.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w8_xy_10_mc_luma_loop: + VEC4_LD1_8BITS_16ELEMENT x0, x1, v16, v20, v24, v28 //load src[-2] in v16,v20,v24,v28 for 4 row; only use 13(8+5); + sub x4, x4, #4 + + //1st row: + ext v17.16b, v16.16b, v16.16b, #5 //src[3] + ext v18.16b, v16.16b, v16.16b, #1 //src[-1] + ext v19.16b, v16.16b, v16.16b, #4 //src[2] + //2nd row: + ext v21.16b, v20.16b, v20.16b, #5 //src[3] + ext v22.16b, v20.16b, v20.16b, #1 //src[-1] + ext v23.16b, v20.16b, v20.16b, #4 //src[2] + //3rd row: + ext v25.16b, v24.16b, v24.16b, #5 //src[3] + ext v26.16b, v24.16b, v24.16b, #1 //src[-1] + ext v27.16b, v24.16b, v24.16b, #4 //src[2] + //4th row: + ext v29.16b, v28.16b, v28.16b, #5 //src[3] + ext v30.16b, v28.16b, v28.16b, #1 //src[-1] + ext v31.16b, v28.16b, v28.16b, #4 //src[2] + + VEC4_UADDL_8BITS v16, v17, v20, v21, v24, v25, v28, v29, v0, v2, v4, v6 //v0/v2/v4/v6=src[-2]+src[3] + VEC4_UADDL_8BITS v18, v19, v22, v23, v26, v27, v30, v31, v1, v3, v5, v7 //v1/v3/v5/v7=src[-1]+src[2] + VEC4_MLS_16BITS v1, v9, v3, v9, v5, v9, v7, v9, v0, v2, v4, v6 //v0/v2/v4/v6 -= 5*(src[-1]+src[2]) + + //1st row: + ext v18.16b, v16.16b, v16.16b, #2 //src[0] + ext v19.16b, v16.16b, v16.16b, #3 //src[1] + //2nd row: + ext v22.16b, v20.16b, v20.16b, #2 //src[0] + ext v23.16b, v20.16b, v20.16b, #3 //src[1] + //3rd row: + ext v26.16b, v24.16b, v24.16b, #2 //src[0] + ext v27.16b, v24.16b, v24.16b, #3 //src[1] + //4th row: + ext v30.16b, v28.16b, v28.16b, #2 //src[0] + ext v31.16b, v28.16b, v28.16b, #3 //src[1] + + VEC4_UADDL_8BITS v18, v19, v22, v23, v26, v27, v30, v31, v1, v3, v5, v7 //v1/v3/v5/v7=src[0]+src[1] + VEC4_MLA_16BITS v1, v8, v3, v8, v5, v8, v7, v8, v0, v2, v4, v6 //v0/v2/v4/v6+=20*(src[0]+src[1]) + VEC4_SQRSHRUN_16BITS_SHIFT5 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_UADDL_8BITS v1, v18, v3, v22, v5, v26, v7, v30, v0, v2, v4, v6 //average with arc[0] + VEC4_RSHRN_16BITS_SHIFT1 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_ST1_8BITS_8ELEMENT x2, x3, v1, v3, v5, v7 + cbnz x4, w8_xy_10_mc_luma_loop + + ldp d8,d9,[sp],#16 +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer10WidthEq4_AArch64_neon + sub x0, x0, #2 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + asr x4, x4, #1 +w4_xy_10_mc_luma_loop: + ld1 {v2.16b}, [x0], x1 //only use 9(4+5); 1st row src[-2:6] + //prfm pldl1strm, [x0] + ld1 {v3.16b}, [x0], x1 //only use 9(4+5); 2nd row src[-2:6] + //prfm pldl1strm, [x0] + + zip1 v4.4s, v2.4s, v3.4s // v4=src[-2] 1st:2nd + ext v17.16b, v4.16b, v4.16b, #8 // v17=src[2:5] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[-1:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[-1:6] + zip1 v5.4s, v2.4s, v3.4s // v5=src[-1:2] 1st:2nd + ext v7.16b, v5.16b, v4.16b, #8 //v7=src[3:6] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[0:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[0:6] + zip1 v6.4s, v2.4s, v3.4s // v6=src[0:3] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[1:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[1:6] + zip1 v16.4s, v2.4s, v3.4s // v16=src[1:4] 1st:2nd + + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v4, v5, v6, v16, v17, v7, v20, v0, v1 + + st1 {v20.s}[0], [x2], x3 //write 4Byte + st1 {v20.s}[1], [x2], x3 //write 4Byte + sub x4, x4, #1 + cbnz x4, w4_xy_10_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer30WidthEq16_AArch64_neon + sub x0, x0, #2 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w16_xy_30_mc_luma_loop: + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 //only use 21(16+5); v2=src[-2] + trn1 v2.2d, v2.2d, v3.2d + //prfm pldl1strm, [x0] + ext v5.16b, v2.16b, v4.16b, #1 //v5=src[-1] + ext v6.16b, v2.16b, v4.16b, #2 //v6=src[0] + ext v7.16b, v2.16b, v4.16b, #3 //v7=src[1] + ext v16.16b, v2.16b, v4.16b, #4 //v16=src[2] + ext v17.16b, v2.16b, v4.16b, #5 //v17=src[3] + + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v2, v5, v6, v7, v16, v17, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v2, v5, v6, v7, v16, v17, v20, v0, v1 + + sub x4, x4, #1 + st1 {v20.16b}, [x2], x3 //write 16Byte + cbnz x4, w16_xy_30_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer30WidthEq8_AArch64_neon + sub x0, x0, #2 + stp d8,d9, [sp,#-16]! + movi v8.8h, #20, lsl #0 + movi v9.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w8_xy_30_mc_luma_loop: + VEC4_LD1_8BITS_16ELEMENT x0, x1, v16, v20, v24, v28 //load src[-2] in v16,v20,v24,v28 for 4 row; only use 13(8+5); + sub x4, x4, #4 + + //1st row: + ext v17.16b, v16.16b, v16.16b, #5 //src[3] + ext v18.16b, v16.16b, v16.16b, #1 //src[-1] + ext v19.16b, v16.16b, v16.16b, #4 //src[2] + //2nd row: + ext v21.16b, v20.16b, v20.16b, #5 //src[3] + ext v22.16b, v20.16b, v20.16b, #1 //src[-1] + ext v23.16b, v20.16b, v20.16b, #4 //src[2] + //3rd row: + ext v25.16b, v24.16b, v24.16b, #5 //src[3] + ext v26.16b, v24.16b, v24.16b, #1 //src[-1] + ext v27.16b, v24.16b, v24.16b, #4 //src[2] + //4th row: + ext v29.16b, v28.16b, v28.16b, #5 //src[3] + ext v30.16b, v28.16b, v28.16b, #1 //src[-1] + ext v31.16b, v28.16b, v28.16b, #4 //src[2] + + VEC4_UADDL_8BITS v16, v17, v20, v21, v24, v25, v28, v29, v0, v2, v4, v6 //v0/v2/v4/v6=src[-2]+src[3] + VEC4_UADDL_8BITS v18, v19, v22, v23, v26, v27, v30, v31, v1, v3, v5, v7 //v1/v3/v5/v7=src[-1]+src[2] + VEC4_MLS_16BITS v1, v9, v3, v9, v5, v9, v7, v9, v0, v2, v4, v6 //v0/v2/v4/v6 -= 5*(src[-1]+src[2]) + + //1st row: + ext v18.16b, v16.16b, v16.16b, #2 //src[0] + ext v19.16b, v16.16b, v16.16b, #3 //src[1] + //2nd row: + ext v22.16b, v20.16b, v20.16b, #2 //src[0] + ext v23.16b, v20.16b, v20.16b, #3 //src[1] + //3rd row: + ext v26.16b, v24.16b, v24.16b, #2 //src[0] + ext v27.16b, v24.16b, v24.16b, #3 //src[1] + //4th row: + ext v30.16b, v28.16b, v28.16b, #2 //src[0] + ext v31.16b, v28.16b, v28.16b, #3 //src[1] + + VEC4_UADDL_8BITS v18, v19, v22, v23, v26, v27, v30, v31, v1, v3, v5, v7 //v1/v3/v5/v7=src[0]+src[1] + VEC4_MLA_16BITS v1, v8, v3, v8, v5, v8, v7, v8, v0, v2, v4, v6 //v0/v2/v4/v6+=20*(src[0]+src[1]) + VEC4_SQRSHRUN_16BITS_SHIFT5 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_UADDL_8BITS v1, v19, v3, v23, v5, v27, v7, v31, v0, v2, v4, v6 //average with arc[0] + VEC4_RSHRN_16BITS_SHIFT1 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_ST1_8BITS_8ELEMENT x2, x3, v1, v3, v5, v7 + cbnz x4, w8_xy_30_mc_luma_loop + + ldp d8,d9,[sp],#16 +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer30WidthEq4_AArch64_neon + sub x0, x0, #2 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + asr x4, x4, #1 +w4_xy_30_mc_luma_loop: + ld1 {v2.16b}, [x0], x1 //only use 9(4+5); 1st row src[-2:6] + //prfm pldl1strm, [x0] + ld1 {v3.16b}, [x0], x1 //only use 9(4+5); 2nd row src[-2:6] + //prfm pldl1strm, [x0] + + zip1 v4.4s, v2.4s, v3.4s // v4=src[-2] 1st:2nd + ext v17.16b, v4.16b, v4.16b, #8 // v17=src[2:5] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[-1:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[-1:6] + zip1 v5.4s, v2.4s, v3.4s // v5=src[-1:2] 1st:2nd + ext v7.16b, v5.16b, v4.16b, #8 //v7=src[3:6] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[0:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[0:6] + zip1 v6.4s, v2.4s, v3.4s // v6=src[0:3] 1st:2nd + + ext v2.16b, v2.16b, v4.16b, #1 //1st row src[1:6] + ext v3.16b, v3.16b, v4.16b, #1 //2nd row src[1:6] + zip1 v16.4s, v2.4s, v3.4s // v16=src[1:4] 1st:2nd + + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v4, v5, v6, v16, v17, v7, v20, v0, v1 + + st1 {v20.s}[0], [x2], x3 //write 4Byte + st1 {v20.s}[1], [x2], x3 //write 4Byte + sub x4, x4, #1 + cbnz x4, w4_xy_30_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer01WidthEq16_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[2*stride] + + +w16_xy_01_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 0 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 1 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 2 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v5, v6, v7, v2, v3, v4, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v5, v6, v7, v2, v3, v4, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 3 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v6, v7, v2, v3, v4, v5, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v6, v7, v2, v3, v4, v5, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 4 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v7, v2, v3, v4, v5, v6, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v7, v2, v3, v4, v5, v6, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 5 line + + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 6 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_0 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 7 line + + mov v3.16b, v5.16b + mov v5.16b, v7.16b + mov v7.16b, v2.16b + mov v2.16b, v4.16b + mov v4.16b, v6.16b + mov v6.16b, v7.16b + sub x4, x4, #8 + cbnz x4, w16_xy_01_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer01WidthEq8_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v30.8h, #20, lsl #0 + movi v31.8h, #5, lsl #0 + + ld1 {v16.8b}, [x0], x1 // v16=src[-2*stride] + ld1 {v17.8b}, [x0], x1 // v17=src[-1*stride] + ld1 {v18.8b}, [x0], x1 // v18=src[0*stride] + ld1 {v19.8b}, [x0], x1 // v19=src[1*stride] + ld1 {v20.8b}, [x0], x1 // v20=src[2*stride] + +w8_xy_01_mc_luma_loop: + ld1 {v21.8b}, [x0], x1 // v21=src[3*stride] + ld1 {v22.8b}, [x0], x1 // v22=src[4*stride] + ld1 {v23.8b}, [x0], x1 // v23=src[5*stride] + ld1 {v24.8b}, [x0], x1 // v24=src[6*stride] + + VEC4_UADDL_8BITS v16, v21, v17, v22, v18, v23, v19, v24, v0, v2, v4, v6 //v0/v2/v4/v6 =src[-2]+src[3] + VEC4_UADDL_8BITS v17, v20, v18, v21, v19, v22, v20, v23, v1, v3, v5, v7 //v1/v3/v5/v7 =src[-1]+src[2] + VEC4_MLS_16BITS v1, v31, v3, v31, v5, v31, v7, v31, v0, v2, v4, v6 //v0/v2/v4/v6 -=5*(src[-1]+src[2]) + VEC4_UADDL_8BITS v18, v19, v19, v20, v20, v21, v21, v22, v1, v3, v5, v7 //v1/v3/v5/v7 =src[0]+src[1] + VEC4_MLA_16BITS v1, v30, v3, v30, v5, v30, v7, v30, v0, v2, v4, v6 //v0/v2/v4/v6 += 20*(src[0]+src[1]) + VEC4_SQRSHRUN_16BITS_SHIFT5 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_UADDL_8BITS v1, v18, v3, v19, v5, v20, v7, v21, v0, v2, v4, v6 //v0/v2/v4/v6 = average with src[0] + VEC4_RSHRN_16BITS_SHIFT1 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_ST1_8BITS_8ELEMENT x2, x3, v1, v3, v5, v7 //store 8bytes*4row + + sub x4, x4, #4 + mov v16.16b, v20.16b + mov v17.16b, v21.16b + mov v18.16b, v22.16b + mov v19.16b, v23.16b + mov v20.16b, v24.16b + + cbnz x4, w8_xy_01_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer01WidthEq4_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.s}[0], [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.s}[0], [x0], x1 // v3=src[-1*stride] + mov v2.s[1], v3.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v4.s}[0], [x0], x1 // v4=src[0*stride] + mov v3.s[1], v4.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v5.s}[0], [x0], x1 // v5=src[1*stride] + mov v4.s[1], v5.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v6.s}[0], [x0], x1 // v6=src[2*stride] + mov v5.s[1], v6.s[0] + +w4_xy_01_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.s}[0], [x0], x1 // v7=src[3*stride] + mov v6.s[1], v7.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v7.s}[1], [x0], x1 // v7=src[4*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 0 line + st1 {v20.s}[1], [x2], x3 //write 4Byte : 1 line + mov v2.s[0], v7.s[1] + + //prfm pldl1strm, [x0, x1] + ld1 {v2.s}[1], [x0], x1 // v2=src[5*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.s}[1], [x0], x1 // v2=src[6*stride] + mov v3.s[0], v2.s[1] + FILTER_6TAG_8BITS1_AVERAGE_WITH_0 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 2 line + st1 {v20.s}[1], [x2], x3 //write 4Byte : 3 line + mov v4.s[0], v3.s[1] + + mov v21.8b, v6.8b + mov v6.8b, v4.8b + mov v4.8b, v2.8b + mov v2.8b, v21.8b + mov v21.8b, v3.8b + mov v3.8b, v7.8b + mov v7.8b, v5.8b + mov v5.8b, v21.8b + + sub x4, x4, #4 + cbnz x4, w4_xy_01_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer03WidthEq16_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[2*stride] + + +w16_xy_03_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 0 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 1 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 2 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 3 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v6, v7, v2, v3, v4, v5, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v6, v7, v2, v3, v4, v5, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 4 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v7, v2, v3, v4, v5, v6, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v7, v2, v3, v4, v5, v6, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 5 line + + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 6 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[3*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2_AVERAGE_WITH_1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 7 line + + mov v3.16b, v5.16b + mov v5.16b, v7.16b + mov v7.16b, v2.16b + mov v2.16b, v4.16b + mov v4.16b, v6.16b + mov v6.16b, v7.16b + sub x4, x4, #8 + cbnz x4, w16_xy_03_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer03WidthEq8_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v30.8h, #20, lsl #0 + movi v31.8h, #5, lsl #0 + + ld1 {v16.8b}, [x0], x1 // v16=src[-2*stride] + ld1 {v17.8b}, [x0], x1 // v17=src[-1*stride] + ld1 {v18.8b}, [x0], x1 // v18=src[0*stride] + ld1 {v19.8b}, [x0], x1 // v19=src[1*stride] + ld1 {v20.8b}, [x0], x1 // v20=src[2*stride] + +w8_xy_03_mc_luma_loop: + ld1 {v21.8b}, [x0], x1 // v21=src[3*stride] + ld1 {v22.8b}, [x0], x1 // v22=src[4*stride] + ld1 {v23.8b}, [x0], x1 // v23=src[5*stride] + ld1 {v24.8b}, [x0], x1 // v24=src[6*stride] + + VEC4_UADDL_8BITS v16, v21, v17, v22, v18, v23, v19, v24, v0, v2, v4, v6 //v0/v2/v4/v6 =src[-2]+src[3] + VEC4_UADDL_8BITS v17, v20, v18, v21, v19, v22, v20, v23, v1, v3, v5, v7 //v1/v3/v5/v7 =src[-1]+src[2] + VEC4_MLS_16BITS v1, v31, v3, v31, v5, v31, v7, v31, v0, v2, v4, v6 //v0/v2/v4/v6 -=5*(src[-1]+src[2]) + VEC4_UADDL_8BITS v18, v19, v19, v20, v20, v21, v21, v22, v1, v3, v5, v7 //v1/v3/v5/v7 =src[0]+src[1] + VEC4_MLA_16BITS v1, v30, v3, v30, v5, v30, v7, v30, v0, v2, v4, v6 //v0/v2/v4/v6 += 20*(src[0]+src[1]) + VEC4_SQRSHRUN_16BITS_SHIFT5 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_UADDL_8BITS v1, v19, v3, v20, v5, v21, v7, v22, v0, v2, v4, v6 //v0/v2/v4/v6 = average with src[0] + VEC4_RSHRN_16BITS_SHIFT1 v0, v2, v4, v6, v1, v3, v5, v7 + + VEC4_ST1_8BITS_8ELEMENT x2, x3, v1, v3, v5, v7 //store 8bytes*4row + + sub x4, x4, #4 + mov v16.16b, v20.16b + mov v17.16b, v21.16b + mov v18.16b, v22.16b + mov v19.16b, v23.16b + mov v20.16b, v24.16b + + cbnz x4, w8_xy_03_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer03WidthEq4_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.s}[0], [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.s}[0], [x0], x1 // v3=src[-1*stride] + mov v2.s[1], v3.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v4.s}[0], [x0], x1 // v4=src[0*stride] + mov v3.s[1], v4.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v5.s}[0], [x0], x1 // v5=src[1*stride] + mov v4.s[1], v5.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v6.s}[0], [x0], x1 // v6=src[2*stride] + mov v5.s[1], v6.s[0] + +w4_xy_03_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.s}[0], [x0], x1 // v7=src[3*stride] + mov v6.s[1], v7.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v7.s}[1], [x0], x1 // v7=src[4*stride] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 0 line + st1 {v20.s}[1], [x2], x3 //write 4Byte : 1 line + mov v2.s[0], v7.s[1] + + //prfm pldl1strm, [x0, x1] + ld1 {v2.s}[1], [x0], x1 // v2=src[5*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.s}[1], [x0], x1 // v2=src[6*stride] + mov v3.s[0], v2.s[1] + FILTER_6TAG_8BITS1_AVERAGE_WITH_1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 2 line + st1 {v20.s}[1], [x2], x3 //write 4Byte : 3 line + mov v4.s[0], v3.s[1] + + mov v21.8b, v6.8b + mov v6.8b, v4.8b + mov v4.8b, v2.8b + mov v2.8b, v21.8b + mov v21.8b, v3.8b + mov v3.8b, v7.8b + mov v7.8b, v5.8b + mov v5.8b, v21.8b + + sub x4, x4, #4 + cbnz x4, w4_xy_03_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02WidthEq16_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[2*stride] + + +w16_xy_02_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 0 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[3*stride] + FILTER_6TAG_8BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 1 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[3*stride] + FILTER_6TAG_8BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS2 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 2 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[3*stride] + FILTER_6TAG_8BITS1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + FILTER_6TAG_8BITS2 v5, v6, v7, v2, v3, v4, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 3 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[3*stride] + FILTER_6TAG_8BITS1 v6, v7, v2, v3, v4, v5, v20, v0, v1 + FILTER_6TAG_8BITS2 v6, v7, v2, v3, v4, v5, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 4 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[3*stride] + FILTER_6TAG_8BITS1 v7, v2, v3, v4, v5, v6, v20, v0, v1 + FILTER_6TAG_8BITS2 v7, v2, v3, v4, v5, v6, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 5 line + + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 6 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[3*stride] + FILTER_6TAG_8BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 7 line + + mov v3.16b, v5.16b + mov v5.16b, v7.16b + mov v7.16b, v2.16b + mov v2.16b, v4.16b + mov v4.16b, v6.16b + mov v6.16b, v7.16b + sub x4, x4, #8 + cbnz x4, w16_xy_02_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02WidthEq8_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v30.8h, #20, lsl #0 + movi v31.8h, #5, lsl #0 + + ld1 {v16.8b}, [x0], x1 // v16=src[-2*stride] + ld1 {v17.8b}, [x0], x1 // v17=src[-1*stride] + ld1 {v18.8b}, [x0], x1 // v18=src[0*stride] + ld1 {v19.8b}, [x0], x1 // v19=src[1*stride] + ld1 {v20.8b}, [x0], x1 // v20=src[2*stride] + +w8_xy_02_mc_luma_loop: + ld1 {v21.8b}, [x0], x1 // v21=src[3*stride] + ld1 {v22.8b}, [x0], x1 // v22=src[4*stride] + ld1 {v23.8b}, [x0], x1 // v23=src[5*stride] + ld1 {v24.8b}, [x0], x1 // v24=src[6*stride] + + VEC4_UADDL_8BITS v16, v21, v17, v22, v18, v23, v19, v24, v0, v2, v4, v6 //v0/v2/v4/v6 =src[-2]+src[3] + VEC4_UADDL_8BITS v17, v20, v18, v21, v19, v22, v20, v23, v1, v3, v5, v7 //v1/v3/v5/v7 =src[-1]+src[2] + VEC4_MLS_16BITS v1, v31, v3, v31, v5, v31, v7, v31, v0, v2, v4, v6 //v0/v2/v4/v6 -=5*(src[-1]+src[2]) + VEC4_UADDL_8BITS v18, v19, v19, v20, v20, v21, v21, v22, v1, v3, v5, v7 //v1/v3/v5/v7 =src[0]+src[1] + VEC4_MLA_16BITS v1, v30, v3, v30, v5, v30, v7, v30, v0, v2, v4, v6 //v0/v2/v4/v6 += 20*(src[0]+src[1]) + VEC4_SQRSHRUN_16BITS_SHIFT5 v0, v2, v4, v6, v1, v3, v5, v7 + VEC4_ST1_8BITS_8ELEMENT x2, x3, v1, v3, v5, v7 //store 8bytes*4row + + sub x4, x4, #4 + mov v16.16b, v20.16b + mov v17.16b, v21.16b + mov v18.16b, v22.16b + mov v19.16b, v23.16b + mov v20.16b, v24.16b + + cbnz x4, w8_xy_02_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02WidthEq4_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.s}[0], [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.s}[0], [x0], x1 // v3=src[-1*stride] + mov v2.s[1], v3.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v4.s}[0], [x0], x1 // v4=src[0*stride] + mov v3.s[1], v4.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v5.s}[0], [x0], x1 // v5=src[1*stride] + mov v4.s[1], v5.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v6.s}[0], [x0], x1 // v6=src[2*stride] + mov v5.s[1], v6.s[0] + +w4_xy_02_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.s}[0], [x0], x1 // v7=src[3*stride] + mov v6.s[1], v7.s[0] + //prfm pldl1strm, [x0, x1] + ld1 {v7.s}[1], [x0], x1 // v7=src[4*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 0 line + st1 {v20.s}[1], [x2], x3 //write 4Byte : 1 line + mov v2.s[0], v7.s[1] + + //prfm pldl1strm, [x0, x1] + ld1 {v2.s}[1], [x0], x1 // v2=src[5*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.s}[1], [x0], x1 // v2=src[6*stride] + mov v3.s[0], v2.s[1] + FILTER_6TAG_8BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 2 line + st1 {v20.s}[1], [x2], x3 //write 4Byte : 3 line + mov v4.s[0], v3.s[1] + + mov v21.8b, v6.8b + mov v6.8b, v4.8b + mov v4.8b, v2.8b + mov v2.8b, v21.8b + mov v21.8b, v3.8b + mov v3.8b, v7.8b + mov v7.8b, v5.8b + mov v5.8b, v21.8b + + sub x4, x4, #4 + cbnz x4, w4_xy_02_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22WidthEq16_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + stp d8, d9, [sp,#-16]! + stp d10, d11, [sp,#-16]! + stp d12, d13, [sp,#-16]! + stp d14, d15, [sp,#-16]! + sub x0, x0, #2 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x1 // v5=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v8.8b, v9.8b, v10.8b}, [x0], x1 // v8=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v11.8b, v12.8b, v13.8b}, [x0], x1 // v11=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v14.8b, v15.8b, v16.8b}, [x0], x1 // v14=src[2*stride] + +w16_hv_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v17.8b, v18.8b, v19.8b}, [x0], x1 // v17=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v5, v8, v11, v14, v17, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v6, v9, v12, v15, v18, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v7, v10, v13, v16, v19, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 0 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v5, v8, v11, v14, v17, v2, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v6, v9, v12, v15, v18, v3, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v7, v10, v13, v16, v19, v4, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 1 line + + //prfm pldl1strm, [x0, x1] + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v8, v11, v14, v17, v2, v5, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v9, v12, v15, v18, v3, v6, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v10, v13, v16, v19, v4, v7, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 2 line + + //prfm pldl1strm, [x0, x1] + ld1 {v8.8b, v9.8b, v10.8b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v11, v14, v17, v2, v5, v8, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v12, v15, v18, v3, v6, v9, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v13, v16, v19, v4, v7, v10, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 3 line + + //prfm pldl1strm, [x0, x1] + ld1 {v11.8b, v12.8b, v13.8b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v14, v17, v2, v5, v8, v11, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v15, v18, v3, v6, v9, v12, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v16, v19, v4, v7, v10, v13, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 4 line + + //prfm pldl1strm, [x0, x1] + ld1 {v14.8b, v15.8b, v16.8b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v17, v2, v5, v8, v11, v14, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v18, v3, v6, v9, v12, v15, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v19, v4, v7, v10, v13, v16, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 5 line + + //prfm pldl1strm, [x0, x1] + ld1 {v17.8b, v18.8b, v19.8b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v5, v8, v11, v14, v17, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v6, v9, v12, v15, v18, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v7, v10, v13, v16, v19, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 6 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v5, v8, v11, v14, v17, v2, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v6, v9, v12, v15, v18, v3, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v7, v10, v13, v16, v19, v4, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x3 //write 16Byte : 7 line + + mov v5.16b, v11.16b + mov v11.16b, v17.16b + mov v30.16b, v2.16b + mov v2.16b, v8.16b + mov v8.16b, v14.16b + mov v14.16b, v30.16b + + mov v6.16b, v12.16b + mov v12.16b, v18.16b + mov v30.16b, v3.16b + mov v3.16b, v9.16b + mov v9.16b, v15.16b + mov v15.16b, v30.16b + + mov v7.16b, v13.16b + mov v13.16b, v19.16b + mov v30.16b, v4.16b + mov v4.16b, v10.16b + mov v10.16b, v16.16b + mov v16.16b, v30.16b + + sub x4, x4, #8 + cbnz x4, w16_hv_mc_luma_loop + + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22WidthEq8_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v5=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v8=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v11=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v14=src[2*stride] + +w8_hv_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v2, v3, v4, v5, v6, v7, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x3 //write 8Byte : 0 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v3, v4, v5, v6, v7, v2, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x3 //write 8Byte : 1 line + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v4, v5, v6, v7, v2, v3, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x3 //write 8Byte : 2 line + + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v5, v6, v7, v2, v3, v4, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x3 //write 8Byte : 3 line + + + mov v5.16b, v3.16b + mov v3.16b, v7.16b + mov v30.16b, v2.16b + mov v2.16b, v6.16b + mov v6.16b, v4.16b + mov v4.16b, v30.16b + + sub x4, x4, #4 + cbnz x4, w8_hv_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END +//void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); + +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22WidthEq4_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[2*stride] + +w4_hv_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + // vertical filtered into v20/v21 1st line + FILTER_6TAG_8BITS_TO_16BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v2, v3, v4, v5, v6, v7, v21, v0, v1 + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v16=src[4*stride] + // vertical filtered into v22/v23 2nd line + FILTER_6TAG_8BITS_TO_16BITS1 v3, v4, v5, v6, v7, v2, v22, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v3, v4, v5, v6, v7, v2, v23, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v24, v25, v26 + UNPACK_2_16BITS_TO_ABC v22, v23, v28, v29, v30 + zip1 v24.2d, v24.2d, v28.2d + zip1 v25.2d, v25.2d, v29.2d + zip1 v26.2d, v26.2d, v30.2d + FILTER_3_IN_16BITS_TO_8BITS1 v24, v25, v26, v27 //output to v27[0] + st1 {v27.s}[0], [x2], x3 //write 4Byte : 0 line + st1 {v27.s}[1], [x2], x3 //write 4Byte : 1 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[5*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v4, v5, v6, v7, v2, v3, v21, v0, v1 + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[6*stride] + FILTER_6TAG_8BITS_TO_16BITS1 v5, v6, v7, v2, v3, v4, v22, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v5, v6, v7, v2, v3, v4, v23, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v24, v25, v26 + UNPACK_2_16BITS_TO_ABC v22, v23, v28, v29, v30 + zip1 v24.2d, v24.2d, v28.2d + zip1 v25.2d, v25.2d, v29.2d + zip1 v26.2d, v26.2d, v30.2d + FILTER_3_IN_16BITS_TO_8BITS1 v24, v25, v26, v27 //output to v27[0] + st1 {v27.s}[0], [x2], x3 //write 4Byte : 2 line + st1 {v27.s}[1], [x2], x3 //write 4Byte : 3 line + + mov v5.16b, v3.16b + mov v3.16b, v7.16b + mov v30.16b, v2.16b + mov v2.16b, v6.16b + mov v6.16b, v4.16b + mov v4.16b, v30.16b + + sub x4, x4, #4 + cbnz x4, w4_hv_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END +//void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McCopyWidthEq16_AArch64_neon + //prfm pldl1strm, [x0] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w16_copy_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v0.16b}, [x0], x1 //read 16Byte : 0 line + st1 {v0.16b}, [x2], x3 //write 16Byte : 0 line + //prfm pldl1strm, [x0, x1] + ld1 {v1.16b}, [x0], x1 //read 16Byte : 1 line + st1 {v1.16b}, [x2], x3 //write 16Byte : 1 line + + sub x4, x4, #2 + cbnz x4, w16_copy_loop +WELS_ASM_AARCH64_FUNC_END +//void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McCopyWidthEq8_AArch64_neon + //prfm pldl1strm, [x0] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w8_copy_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v0.8b}, [x0], x1 //read 16Byte : 0 line + st1 {v0.8b}, [x2], x3 //write 16Byte : 0 line + //prfm pldl1strm, [x0, x1] + ld1 {v1.8b}, [x0], x1 //read 16Byte : 1 line + st1 {v1.8b}, [x2], x3 //write 16Byte : 1 line + + sub x4, x4, #2 + cbnz x4, w8_copy_loop +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN McCopyWidthEq4_AArch64_neon + //prfm pldl1strm, [x0] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 +w4_copy_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v0.s}[0], [x0], x1 //read 16Byte : 0 line + st1 {v0.s}[0], [x2], x3 //write 16Byte : 0 line + //prfm pldl1strm, [x0, x1] + ld1 {v1.s}[0], [x0], x1 //read 16Byte : 1 line + st1 {v1.s}[0], [x2], x3 //write 16Byte : 1 line + + sub x4, x4, #2 + cbnz x4, w4_copy_loop +WELS_ASM_AARCH64_FUNC_END + +//void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, +//const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); + +WELS_ASM_AARCH64_FUNC_BEGIN PixStrideAvgWidthEq16_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + SIGN_EXTENSION x6,w6 +enc_w16_pix_avg_loop: + ld1 {v0.16b}, [x2], x3 //read 16Byte : src0: 0 line + ld1 {v1.16b}, [x4], x5 //read 16Byte : src1: 0 line + ld1 {v2.16b}, [x2], x3 //read 16Byte : src0: 1 line + ld1 {v3.16b}, [x4], x5 //read 16Byte : src1: 1 line + ld1 {v4.16b}, [x2], x3 //read 16Byte : src0: 2 line + ld1 {v5.16b}, [x4], x5 //read 16Byte : src1: 2 line + ld1 {v6.16b}, [x2], x3 //read 16Byte : src0: 3 line + ld1 {v7.16b}, [x4], x5 //read 16Byte : src1: 3 line + AVERAGE_TWO_8BITS1 v16, v0, v1 + AVERAGE_TWO_8BITS2 v16, v0, v1 + st1 {v16.16b}, [x0], x1 //write 16Byte : 0 line + + + AVERAGE_TWO_8BITS1 v16, v2, v3 + AVERAGE_TWO_8BITS2 v16, v2, v3 + st1 {v16.16b}, [x0], x1 //write 16Byte : 1 line + + + AVERAGE_TWO_8BITS1 v16, v4, v5 + AVERAGE_TWO_8BITS2 v16, v4, v5 + st1 {v16.16b}, [x0], x1 //write 16Byte : 2 line + + AVERAGE_TWO_8BITS1 v16, v6, v7 + AVERAGE_TWO_8BITS2 v16, v6, v7 + st1 {v16.16b}, [x0], x1 //write 16Byte : 3 line + + sub x6, x6, #4 + cbnz x6, enc_w16_pix_avg_loop +WELS_ASM_AARCH64_FUNC_END + +//void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, +// const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN PixStrideAvgWidthEq8_AArch64_neon + //prfm pldl1strm, [x2] + //prfm pldl1strm, [x4] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + SIGN_EXTENSION x6,w6 +enc_w8_pix_avg_loop: + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v0.8b}, [x2], x3 //read 8Byte : src0: 0 line + ld1 {v1.8b}, [x4], x5 //read 8Byte : src1: 0 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v2.8b}, [x2], x3 //read 8Byte : src0: 1 line + ld1 {v3.8b}, [x4], x5 //read 8Byte : src1: 1 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v4.8b}, [x2], x3 //read 8Byte : src0: 2 line + ld1 {v5.8b}, [x4], x5 //read 8Byte : src1: 2 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v6.8b}, [x2], x3 //read 8Byte : src0: 3 line + ld1 {v7.8b}, [x4], x5 //read 8Byte : src1: 3 line + AVERAGE_TWO_8BITS1 v16, v0, v1 + st1 {v16.8b}, [x0], x1 //write 8Byte : 0 line + + AVERAGE_TWO_8BITS1 v16, v2, v3 + st1 {v16.8b}, [x0], x1 //write 8Byte : 1 line + + + AVERAGE_TWO_8BITS1 v16, v4, v5 + st1 {v16.8b}, [x0], x1 //write 8Byte : 2 line + + AVERAGE_TWO_8BITS1 v16, v6, v7 + st1 {v16.8b}, [x0], x1 //write 8Byte : 3 line + + sub x6, x6, #4 + cbnz x6, enc_w8_pix_avg_loop +WELS_ASM_AARCH64_FUNC_END +//void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, +// const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN PixelAvgWidthEq16_AArch64_neon + //prfm pldl1strm, [x2] + //prfm pldl1strm, [x4] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + SIGN_EXTENSION x6,w6 +w16_pix_avg_loop: + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v0.16b}, [x2], x3 //read 16Byte : src0: 0 line + ld1 {v1.16b}, [x4], x5 //read 16Byte : src1: 0 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v2.16b}, [x2], x3 //read 16Byte : src0: 1 line + ld1 {v3.16b}, [x4], x5 //read 16Byte : src1: 1 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v4.16b}, [x2], x3 //read 16Byte : src0: 2 line + ld1 {v5.16b}, [x4], x5 //read 16Byte : src1: 2 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v6.16b}, [x2], x3 //read 16Byte : src0: 3 line + ld1 {v7.16b}, [x4], x5 //read 16Byte : src1: 3 line + AVERAGE_TWO_8BITS1 v16, v0, v1 + AVERAGE_TWO_8BITS2 v16, v0, v1 + st1 {v16.16b}, [x0], x1 //write 16Byte : 0 line + + + AVERAGE_TWO_8BITS1 v16, v2, v3 + AVERAGE_TWO_8BITS2 v16, v2, v3 + st1 {v16.16b}, [x0], x1 //write 16Byte : 1 line + + + AVERAGE_TWO_8BITS1 v16, v4, v5 + AVERAGE_TWO_8BITS2 v16, v4, v5 + st1 {v16.16b}, [x0], x1 //write 16Byte : 2 line + + AVERAGE_TWO_8BITS1 v16, v6, v7 + AVERAGE_TWO_8BITS2 v16, v6, v7 + st1 {v16.16b}, [x0], x1 //write 16Byte : 3 line + + sub x6, x6, #4 + cbnz x6, w16_pix_avg_loop +WELS_ASM_AARCH64_FUNC_END +//void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, +// const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN PixelAvgWidthEq8_AArch64_neon + //prfm pldl1strm, [x2] + //prfm pldl1strm, [x4] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + SIGN_EXTENSION x6,w6 +w8_pix_avg_loop: + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v0.8b}, [x2], x3 //read 8Byte : src0: 0 line + ld1 {v1.8b}, [x4], x5 //read 8Byte : src1: 0 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v2.8b}, [x2], x3 //read 8Byte : src0: 1 line + ld1 {v3.8b}, [x4], x5 //read 8Byte : src1: 1 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v4.8b}, [x2], x3 //read 8Byte : src0: 2 line + ld1 {v5.8b}, [x4], x5 //read 8Byte : src1: 2 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v6.8b}, [x2], x3 //read 8Byte : src0: 3 line + ld1 {v7.8b}, [x4], x5 //read 8Byte : src1: 3 line + AVERAGE_TWO_8BITS1 v16, v0, v1 + st1 {v16.8b}, [x0], x1 //write 8Byte : 0 line + + AVERAGE_TWO_8BITS1 v16, v2, v3 + st1 {v16.8b}, [x0], x1 //write 8Byte : 1 line + + + AVERAGE_TWO_8BITS1 v16, v4, v5 + st1 {v16.8b}, [x0], x1 //write 8Byte : 2 line + + AVERAGE_TWO_8BITS1 v16, v6, v7 + st1 {v16.8b}, [x0], x1 //write 8Byte : 3 line + + sub x6, x6, #4 + cbnz x6, w8_pix_avg_loop +WELS_ASM_AARCH64_FUNC_END + +//void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, +// const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN PixelAvgWidthEq4_AArch64_neon + //prfm pldl1strm, [x2] + //prfm pldl1strm, [x4] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + SIGN_EXTENSION x6,w6 +w4_pix_avg_loop: + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v0.s}[0], [x2], x3 //read 4Byte : src0: 0 line + ld1 {v1.s}[0], [x4], x5 //read 4Byte : src1: 0 line + //prfm pldl1strm, [x2, x3] + //prfm pldl1strm, [x4, x5] + ld1 {v0.s}[1], [x2], x3 //read 4Byte : src0: 1 line + ld1 {v1.s}[1], [x4], x5 //read 4Byte : src1: 1 line + AVERAGE_TWO_8BITS1 v2, v0, v1 + st1 {v2.s}[0], [x0], x1 //write 4Byte : 0 line + st1 {v2.s}[1], [x0], x1 //write 4Byte : 1 line + + sub x6, x6, #2 + cbnz x6, w4_pix_avg_loop +WELS_ASM_AARCH64_FUNC_END +//void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t* pWeights, int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McChromaWidthEq8_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + ld4r {v28.8b, v29.8b, v30.8b, v31.8b}, [x4] //load A/B/C/D + ld1 {v16.16b}, [x0], x1 // src[x] + ext v17.16b, v16.16b, v16.16b, #1 // src[x+1] +w8_mc_chroma_loop: + ld1 {v18.16b}, [x0], x1 // src[x+stride] + ext v19.16b, v18.16b, v18.16b, #1 // src[x+stride+1] + + ld1 {v20.16b}, [x0], x1 // src[x+2*stride] + ext v21.16b, v20.16b, v20.16b, #1 // src[x+2*stride+1] + + ld1 {v22.16b}, [x0], x1 // src[x+3*stride] + ext v23.16b, v22.16b, v22.16b, #1 // src[x+3*stride+1] + + ld1 {v24.16b}, [x0], x1 // src[x+4*stride] + ext v25.16b, v24.16b, v24.16b, #1 // src[x+4*stride+1] + + umull v0.8h, v16.8b, v28.8b + umull v2.8h, v18.8b, v28.8b + umull v4.8h, v20.8b, v28.8b + umull v6.8h, v22.8b, v28.8b + + umlal v0.8h, v17.8b, v29.8b + umlal v2.8h, v19.8b, v29.8b + umlal v4.8h, v21.8b, v29.8b + umlal v6.8h, v23.8b, v29.8b + + umlal v0.8h, v18.8b, v30.8b + umlal v2.8h, v20.8b, v30.8b + umlal v4.8h, v22.8b, v30.8b + umlal v6.8h, v24.8b, v30.8b + + umlal v0.8h, v19.8b, v31.8b + umlal v2.8h, v21.8b, v31.8b + umlal v4.8h, v23.8b, v31.8b + umlal v6.8h, v25.8b, v31.8b + + rshrn v1.8b, v0.8h, #6 + st1 {v1.8b}, [x2], x3 + + rshrn v3.8b, v2.8h, #6 + st1 {v3.8b}, [x2], x3 + + rshrn v5.8b, v4.8h, #6 + st1 {v5.8b}, [x2], x3 + + rshrn v7.8b, v6.8h, #6 + st1 {v7.8b}, [x2], x3 + + mov v16.16b, v24.16b + mov v17.16b, v25.16b + sub x5, x5, #4 + cbnz x5, w8_mc_chroma_loop +WELS_ASM_AARCH64_FUNC_END +//void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t* pWeights, int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McChromaWidthEq4_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + ld4r {v4.8b, v5.8b, v6.8b, v7.8b}, [x4] //load A/B/C/D + ld1 {v0.8b}, [x0], x1 // src[x] + ext v1.8b, v0.8b, v0.8b, #1 // src[x+1] +w4_mc_chroma_loop: + ld1 {v2.8b}, [x0], x1 // src[x+stride] + ext v3.8b, v2.8b, v2.8b, #1 // src[x+stride+1] + ld1 {v18.8b}, [x0], x1 // src[x+2*stride] + ext v19.8b, v18.8b, v18.8b, #1 // src[x+2*stride+1] + + zip1 v0.4s, v0.4s, v2.4s + zip1 v1.4s, v1.4s, v3.4s + zip1 v2.4s, v2.4s, v18.4s + zip1 v3.4s, v3.4s, v19.4s + + umull v16.8h, v0.8b, v4.8b + umlal v16.8h, v1.8b, v5.8b + umlal v16.8h, v2.8b, v6.8b + umlal v16.8h, v3.8b, v7.8b + rshrn v17.8b, v16.8h, #6 + st1 {v17.s}[0], [x2], x3 + st1 {v17.s}[1], [x2], x3 + + mov v0.8b, v18.8b + mov v1.8b, v19.8b + sub x5, x5, #2 + cbnz x5, w4_mc_chroma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// width+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20Width17_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x3, x3, #16 + mov x5, #16 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + ldr q22, filter_para +w17_h_mc_luma_loop: + ld1 {v2.16b, v3.16b}, [x0], x1 //only use 22(17+5); v2=src[-2] + + //prfm pldl1strm, [x0] + ext v5.16b, v2.16b, v3.16b, #1 //v5=src[-1] + ext v6.16b, v2.16b, v3.16b, #2 //v6=src[0] + ext v7.16b, v2.16b, v3.16b, #3 //v7=src[1] + ext v16.16b, v2.16b, v3.16b, #4 //v16=src[2] + ext v17.16b, v2.16b, v3.16b, #5 //v17=src[3] + + FILTER_6TAG_8BITS1 v2, v5, v6, v7, v16, v17, v20, v0, v1 + FILTER_6TAG_8BITS2 v2, v5, v6, v7, v16, v17, v20, v0, v1 + st1 {v20.16b}, [x2], x5 //write 16Byte + + ext v21.8b, v3.8b, v3.8b, #7 // [0][1][2][3][4][5]XY-->O[0][1][2][3][4][5]X + FILTER_SINGLE_TAG_8BITS v21, v22, v23, h21 + st1 {v21.b}[0], [x2], x3 //write 16th Byte + + sub x4, x4, #1 + cbnz x4, w17_h_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// width+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20Width9_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x3, x3, #8 + mov x5, #8 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + ldr q22, filter_para +w9_h_mc_luma_loop: + ld1 {v2.16b}, [x0], x1 //only use 14(9+5); v2=src[-2] + mov v3.d[0], v2.d[1] + //prfm pldl1strm, [x0] + ext v5.16b, v2.16b, v4.16b, #1 //v5=src[-1] + ext v6.16b, v2.16b, v4.16b, #2 //v6=src[0] + ext v7.16b, v2.16b, v4.16b, #3 //v7=src[1] + ext v16.16b, v2.16b, v4.16b, #4 //v16=src[2] + ext v17.16b, v2.16b, v4.16b, #5 //v17=src[3] + + FILTER_6TAG_8BITS1 v2, v5, v6, v7, v16, v17, v20, v0, v1 + st1 {v20.8b}, [x2], x5 //write 8Byte + + ext v21.8b, v3.8b, v3.8b, #7 // [0][1][2][3][4][5]XY-->O[0][1][2][3][4][5]X + FILTER_SINGLE_TAG_8BITS v21, v22, v23, h21 + st1 {v21.b}[0], [x2], x3 //write 9th Byte + + sub x4, x4, #1 + cbnz x4, w9_h_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer20Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// width+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20Width5_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x3, x3, #4 + mov x5, #4 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 +w5_h_mc_luma_loop: + ld1 {v2.16b}, [x0], x1 //only use 10(5+5); v2=src[-2] + + ext v5.16b, v2.16b, v4.16b, #1 //v5=src[-1] + ext v6.16b, v2.16b, v4.16b, #2 //v6=src[0] + ext v7.16b, v2.16b, v4.16b, #3 //v7=src[1] + ext v16.16b, v2.16b, v4.16b, #4 //v16=src[2] + ext v17.16b, v2.16b, v4.16b, #5 //v17=src[3] + + FILTER_6TAG_8BITS1 v2, v5, v6, v7, v16, v17, v20, v0, v1 + st1 {v20.s}[0], [x2], x5 //write 4Byte + st1 {v20.b}[4], [x2], x3 //write 5th Byte + + sub x4, x4, #1 + cbnz x4, w5_h_mc_luma_loop +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22Width17_AArch64_neon + stp d8, d9, [sp,#-16]! + stp d10, d11, [sp,#-16]! + stp d12, d13, [sp,#-16]! + stp d14, d15, [sp,#-16]! + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + sub x3, x3, #16 + mov x5, #16 + ldr q29, filter_para + + sub x4, x4, #1 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x1 // v5=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v8.8b, v9.8b, v10.8b}, [x0], x1 // v8=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v11.8b, v12.8b, v13.8b}, [x0], x1 // v11=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v14.8b, v15.8b, v16.8b}, [x0], x1 // v14=src[2*stride] + +w17_hv_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v17.8b, v18.8b, v19.8b}, [x0], x1 // v17=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v5, v8, v11, v14, v17, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v6, v9, v12, v15, v18, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v7, v10, v13, v16, v19, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15 Byte : 0 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 0 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 // v2=src[4*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v5, v8, v11, v14, v17, v2, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v6, v9, v12, v15, v18, v3, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v7, v10, v13, v16, v19, v4, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15Byte : 1 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 1 line + + //prfm pldl1strm, [x0, x1] + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x1 // v2=src[5*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v8, v11, v14, v17, v2, v5, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v9, v12, v15, v18, v3, v6, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v10, v13, v16, v19, v4, v7, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15Byte : 2 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 2 line + + //prfm pldl1strm, [x0, x1] + ld1 {v8.8b, v9.8b, v10.8b}, [x0], x1 // v2=src[6*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v11, v14, v17, v2, v5, v8, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v12, v15, v18, v3, v6, v9, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v13, v16, v19, v4, v7, v10, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15Byte : 3 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 3 line + + //prfm pldl1strm, [x0, x1] + ld1 {v11.8b, v12.8b, v13.8b}, [x0], x1 // v2=src[7*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v14, v17, v2, v5, v8, v11, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v15, v18, v3, v6, v9, v12, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v16, v19, v4, v7, v10, v13, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15Byte : 4 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 4 line + + //prfm pldl1strm, [x0, x1] + ld1 {v14.8b, v15.8b, v16.8b}, [x0], x1 // v2=src[8*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v17, v2, v5, v8, v11, v14, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v18, v3, v6, v9, v12, v15, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v19, v4, v7, v10, v13, v16, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15Byte : 5 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 5 line + + //prfm pldl1strm, [x0, x1] + ld1 {v17.8b, v18.8b, v19.8b}, [x0], x1 // v2=src[9*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v5, v8, v11, v14, v17, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v6, v9, v12, v15, v18, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v7, v10, v13, v16, v19, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15Byte : 6 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 6 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x1 // v2=src[10*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v5, v8, v11, v14, v17, v2, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v6, v9, v12, v15, v18, v3, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v7, v10, v13, v16, v19, v4, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15Byte : 7 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 7 line + + mov v5.16b, v11.16b + mov v11.16b, v17.16b + mov v30.16b, v2.16b + mov v2.16b, v8.16b + mov v8.16b, v14.16b + mov v14.16b, v30.16b + + mov v6.16b, v12.16b + mov v12.16b, v18.16b + mov v30.16b, v3.16b + mov v3.16b, v9.16b + mov v9.16b, v15.16b + mov v15.16b, v30.16b + + mov v7.16b, v13.16b + mov v13.16b, v19.16b + mov v30.16b, v4.16b + mov v4.16b, v10.16b + mov v10.16b, v16.16b + mov v16.16b, v30.16b + + sub x4, x4, #8 + cbnz x4, w17_hv_mc_luma_loop + + //prfm pldl1strm, [x0, x1] + ld1 {v17.8b, v18.8b, v19.8b}, [x0], x1 // v17=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v5, v8, v11, v14, v17, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v6, v9, v12, v15, v18, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + // vertical filtered into v21/v22 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v7, v10, v13, v16, v19, v22, v0, v1 + UNPACK_2_16BITS_TO_ABC v21, v22, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS2 v23, v24, v25, v26 //output to v26[1] + st1 {v26.16b}, [x2], x5 //write 0:15 Byte : 0 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v22, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 16th Byte : 0 line + + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);//width+1&&height+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22Width9_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + sub x3, x3, #8 + mov x5, #8 + ldr q29, filter_para + sub x4, x4, #1 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v5=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v8=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v11=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v14=src[2*stride] + +w9_hv_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v2, v3, v4, v5, v6, v7, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x5 //write 0:7Byte : 0 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v21, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 8th Byte : 0 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[4*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v3, v4, v5, v6, v7, v2, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x5 //write 0:7Byte : 1 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v21, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 8th Byte : 1 line + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[5*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v4, v5, v6, v7, v2, v3, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x5 //write 0:7Byte : 2 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v21, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 8th Byte : 2 line + + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[6*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v5, v6, v7, v2, v3, v4, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x5 //write 0:7Byte : 3 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v21, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 8th Byte : 3 line + + + mov v5.16b, v3.16b + mov v3.16b, v7.16b + mov v30.16b, v2.16b + mov v2.16b, v6.16b + mov v6.16b, v4.16b + mov v4.16b, v30.16b + + sub x4, x4, #4 + cbnz x4, w9_hv_mc_luma_loop + + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v2, v3, v4, v5, v6, v7, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.8b}, [x2], x5 //write 0:7Byte : 0 line + UNPACK_FILTER_SINGLE_TAG_16BITS v26, v21, v29, v27, v28, d26 + st1 {v26.b}[0], [x2], x3 //write 8th Byte : 0 line +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer22Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);//width+1&&height+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22Width5_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, #2 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + sub x3, x3, #4 + mov x5, #4 + ldr q29, filter_para + sub x4, x4, #1 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v5=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v8=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v11=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v14=src[2*stride] + +w5_hv_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v2, v3, v4, v5, v6, v7, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.s}[0], [x2], x5 //write 0:3Byte : 0 line + st1 {v26.b}[4], [x2], x3 //write 5th Byte : 0 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[4*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v3, v4, v5, v6, v7, v2, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.s}[0], [x2], x5 //write 0:3Byte : 1 line + st1 {v26.b}[4], [x2], x3 //write 5th Byte : 1 line + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[5*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v4, v5, v6, v7, v2, v3, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.s}[0], [x2], x5 //write 0:3Byte : 2 line + st1 {v26.b}[4], [x2], x3 //write 5th Byte : 2 line + + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[6*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v5, v6, v7, v2, v3, v4, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.s}[0], [x2], x5 //write 0:3Byte : 3 line + st1 {v26.b}[4], [x2], x3 //write 5th Byte : 3 line + + + mov v5.16b, v3.16b + mov v3.16b, v7.16b + mov v30.16b, v2.16b + mov v2.16b, v6.16b + mov v6.16b, v4.16b + mov v4.16b, v30.16b + + sub x4, x4, #4 + cbnz x4, w5_hv_mc_luma_loop + + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + // vertical filtered into v20/v21 + FILTER_6TAG_8BITS_TO_16BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS_TO_16BITS2 v2, v3, v4, v5, v6, v7, v21, v0, v1 + // horizon filtered + UNPACK_2_16BITS_TO_ABC v20, v21, v23, v24, v25 + FILTER_3_IN_16BITS_TO_8BITS1 v23, v24, v25, v26 //output to v26[0] + st1 {v26.s}[0], [x2], x5 //write 0:3Byte : 0 line + st1 {v26.b}[4], [x2], x3 //write 5th Byte : 0 line +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// height+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02Height17_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + sub x4, x4, #1 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[2*stride] + + +w17_v_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 0 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[4*stride] + FILTER_6TAG_8BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 1 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v3.16b}, [x0], x1 // v3=src[5*stride] + FILTER_6TAG_8BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + FILTER_6TAG_8BITS2 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 2 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v4.16b}, [x0], x1 // v4=src[6*stride] + FILTER_6TAG_8BITS1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + FILTER_6TAG_8BITS2 v5, v6, v7, v2, v3, v4, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 3 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v5.16b}, [x0], x1 // v5=src[7*stride] + FILTER_6TAG_8BITS1 v6, v7, v2, v3, v4, v5, v20, v0, v1 + FILTER_6TAG_8BITS2 v6, v7, v2, v3, v4, v5, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 4 line + + + //prfm pldl1strm, [x0, x1] + ld1 {v6.16b}, [x0], x1 // v6=src[8*stride] + FILTER_6TAG_8BITS1 v7, v2, v3, v4, v5, v6, v20, v0, v1 + FILTER_6TAG_8BITS2 v7, v2, v3, v4, v5, v6, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 5 line + + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[9*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 6 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.16b}, [x0], x1 // v2=src[10*stride] + FILTER_6TAG_8BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + FILTER_6TAG_8BITS2 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : 7 line + + mov v3.16b, v5.16b + mov v5.16b, v7.16b + mov v7.16b, v2.16b + mov v2.16b, v4.16b + mov v4.16b, v6.16b + mov v6.16b, v7.16b + sub x4, x4, #8 + cbnz x4, w17_v_mc_luma_loop + + //prfm pldl1strm, [x0, x1] + ld1 {v7.16b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + FILTER_6TAG_8BITS2 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.16b}, [x2], x3 //write 16Byte : last line +WELS_ASM_AARCH64_FUNC_END +//void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// height+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02Height9_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + sub x4, x4, #1 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.8b}, [x0], x1 // v3=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.8b}, [x0], x1 // v4=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.8b}, [x0], x1 // v5=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.8b}, [x0], x1 // v6=src[2*stride] + +w9_v_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.8b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.8b}, [x2], x3 //write 8Byte : 0 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b}, [x0], x1 // v2=src[4*stride] + FILTER_6TAG_8BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.8b}, [x2], x3 //write 8Byte : 1 line + + //prfm pldl1strm, [x0, x1] + ld1 {v3.8b}, [x0], x1 // v3=src[5*stride] + FILTER_6TAG_8BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.8b}, [x2], x3 //write 8Byte : 2 line + + //prfm pldl1strm, [x0, x1] + ld1 {v4.8b}, [x0], x1 // v4=src[6*stride] + FILTER_6TAG_8BITS1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + st1 {v20.8b}, [x2], x3 //write 8Byte : 3 line + + mov v5.16b, v3.16b + mov v3.16b, v7.16b + mov v7.16b, v2.16b + mov v2.16b, v6.16b + mov v6.16b, v4.16b + mov v4.16b, v7.16b + sub x4, x4, #4 + cbnz x4, w9_v_mc_luma_loop + + //prfm pldl1strm, [x0, x1] + ld1 {v7.8b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.8b}, [x2], x3 //write 8Byte : 0 line +WELS_ASM_AARCH64_FUNC_END + +//void McHorVer02Height5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// height+1 +WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02Height5_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x4,w4 + sub x0, x0, x1, lsl #1 + movi v0.8h, #20, lsl #0 + movi v1.8h, #5, lsl #0 + sub x4, x4, #1 + + //prfm pldl1strm, [x0] + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b}, [x0], x1 // v2=src[-2*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v3.8b}, [x0], x1 // v3=src[-1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v4.8b}, [x0], x1 // v4=src[0*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v5.8b}, [x0], x1 // v5=src[1*stride] + //prfm pldl1strm, [x0, x1] + ld1 {v6.8b}, [x0], x1 // v6=src[2*stride] + +w5_v_mc_luma_loop: + //prfm pldl1strm, [x0, x1] + ld1 {v7.8b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 0 line + + //prfm pldl1strm, [x0, x1] + ld1 {v2.8b}, [x0], x1 // v2=src[4*stride] + FILTER_6TAG_8BITS1 v3, v4, v5, v6, v7, v2, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 1 line + + //prfm pldl1strm, [x0, x1] + ld1 {v3.8b}, [x0], x1 // v3=src[5*stride] + FILTER_6TAG_8BITS1 v4, v5, v6, v7, v2, v3, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 2 line + + //prfm pldl1strm, [x0, x1] + ld1 {v4.8b}, [x0], x1 // v4=src[6*stride] + FILTER_6TAG_8BITS1 v5, v6, v7, v2, v3, v4, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 3 line + + mov v5.16b, v3.16b + mov v3.16b, v7.16b + mov v7.16b, v2.16b + mov v2.16b, v6.16b + mov v6.16b, v4.16b + mov v4.16b, v7.16b + sub x4, x4, #4 + cbnz x4, w5_v_mc_luma_loop + + //prfm pldl1strm, [x0, x1] + ld1 {v7.8b}, [x0], x1 // v7=src[3*stride] + FILTER_6TAG_8BITS1 v2, v3, v4, v5, v6, v7, v20, v0, v1 + st1 {v20.s}[0], [x2], x3 //write 4Byte : 0 line + +WELS_ASM_AARCH64_FUNC_END + +#endif + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsList.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsList.h new file mode 100644 index 000000000..c909be5e1 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsList.h @@ -0,0 +1,287 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsList + * + * \brief for the list function needed in ThreadPool + * + * \date 9/27/2015 Created + * + ************************************************************************************* + */ + + +#ifndef _WELS_LIST_H_ +#define _WELS_LIST_H_ + +#include "typedefs.h" +#include + +namespace WelsCommon { + +template +struct SNode { + TNodeType* pPointer; + SNode* pPrevNode; + SNode* pNextNode; +}; + +template +class CWelsList { + public: + CWelsList() { + m_iCurrentNodeCount = 0; + m_iMaxNodeCount = 50; + + m_pCurrentList = NULL; + m_pFirst = NULL; + m_pCurrent = NULL; + m_pLast = NULL; + }; + ~CWelsList() { + if (m_pCurrentList) { + free (m_pCurrentList); + m_pCurrentList = NULL; + } + + m_pCurrentList = NULL; + m_pFirst = NULL; + m_pCurrent = NULL; + m_pLast = NULL; + }; + + int32_t size() { + return m_iCurrentNodeCount; + } + + bool push_back (TNodeType* pNode) { + if (!pNode) { + return false; + } + + if (NULL == m_pCurrentList) { + m_pCurrentList = static_cast*> (malloc (m_iMaxNodeCount * sizeof (SNode))); + if (NULL == m_pCurrentList) { + return false; + } else { + ResetStorage(); + } + } + + if (NULL == m_pCurrent) { + if (!ExpandList()) { + return false; + } + } + + m_pCurrent->pPointer = pNode; + m_pCurrent = m_pCurrent->pNextNode; + m_iCurrentNodeCount++; + + return true; + } + + TNodeType* begin() { + if (m_pFirst) { + return m_pFirst->pPointer; + } + return NULL; + } + + void pop_front() { + if (m_iCurrentNodeCount == 0) { + return; + } + + SNode* pTemp = m_pFirst; + + m_pFirst = m_pFirst->pNextNode; + m_pFirst->pPrevNode = NULL; + + CleanOneNode (pTemp); + + m_pLast->pNextNode = pTemp; + pTemp->pPrevNode = m_pLast; + m_pLast = pTemp; + + if (NULL == m_pCurrent) + m_pCurrent = m_pLast; + + m_iCurrentNodeCount --; + } + + bool erase (TNodeType* pNode) { + if (0 == m_iCurrentNodeCount) { + return false; + } + + SNode* pTemp = m_pFirst; + do { + if (pNode == pTemp->pPointer) { + if (pTemp->pPrevNode) { + pTemp->pPrevNode->pNextNode = pTemp->pNextNode; + } else { + m_pFirst = pTemp->pNextNode; + } + + if (pTemp->pNextNode) { + pTemp->pNextNode->pPrevNode = pTemp->pPrevNode; + } + + CleanOneNode (pTemp); + m_iCurrentNodeCount --; + + m_pLast->pNextNode = pTemp; + pTemp->pPrevNode = m_pLast; + m_pLast = pTemp; + + return true; + } + + pTemp = pTemp->pNextNode; + + } while (pTemp && pTemp->pPointer); + return false; + } + + bool findNode (TNodeType* pNodeTarget) { + if ((m_iCurrentNodeCount > 0) && pNodeTarget) { + SNode* pNode = m_pFirst; + while (pNode) { + if (pNode->pPointer == pNodeTarget) { + return true; + } + pNode = pNode->pNextNode; + } + } + return false; + } + + TNodeType* getNode (int iNodeIdx) { + if ((iNodeIdx > m_iCurrentNodeCount - 1) || (0 == m_iCurrentNodeCount)) { + return NULL; + } + SNode* pNode = m_pFirst; + for (int i = 0; i < iNodeIdx; i++) { + if (pNode->pNextNode) { + pNode = pNode->pNextNode; + } else { + return NULL; + } + } + return pNode->pPointer; + } + + private: + bool ExpandList() { + SNode* tmpCurrentList = static_cast*> (malloc (m_iMaxNodeCount * 2 * sizeof ( + SNode))); + if (tmpCurrentList == NULL) { + return false; + } + InitStorage (tmpCurrentList, (m_iMaxNodeCount * 2) - 1); + + SNode* pTemp = m_pFirst; + for (int i = 0; ((i < m_iMaxNodeCount) && pTemp); i++) { + tmpCurrentList[i].pPointer = pTemp->pPointer; + pTemp = pTemp->pNextNode; + } + + free (m_pCurrentList); + m_pCurrentList = tmpCurrentList; + m_iCurrentNodeCount = m_iMaxNodeCount; + m_iMaxNodeCount = m_iMaxNodeCount * 2; + m_pFirst = & (m_pCurrentList[0]); + m_pLast = & (m_pCurrentList[m_iMaxNodeCount - 1]); + m_pCurrent = & (m_pCurrentList[m_iCurrentNodeCount]); + return true; + } + + void InitStorage (SNode* pList, const int32_t iMaxIndex) { + pList[0].pPrevNode = NULL; + pList[0].pPointer = NULL; + pList[0].pNextNode = & (pList[1]); + for (int i = 1; i < iMaxIndex; i++) { + pList[i].pPrevNode = & (pList[i - 1]); + pList[i].pPointer = NULL; + pList[i].pNextNode = & (pList[i + 1]); + } + pList[iMaxIndex].pPrevNode = & (pList[iMaxIndex - 1]); + pList[iMaxIndex].pPointer = NULL; + pList[iMaxIndex].pNextNode = NULL; + } + + + void CleanOneNode (SNode* pSNode) { + pSNode->pPointer = NULL; + pSNode->pPrevNode = NULL; + pSNode->pNextNode = NULL; + } + + void ResetStorage() { + InitStorage (m_pCurrentList, m_iMaxNodeCount - 1); + m_pCurrent = m_pCurrentList; + m_pFirst = & (m_pCurrentList[0]); + m_pLast = & (m_pCurrentList[m_iMaxNodeCount - 1]); + } + + private: + int32_t m_iCurrentNodeCount; + int32_t m_iMaxNodeCount; + SNode* m_pCurrentList; + SNode* m_pFirst; + SNode* m_pLast; + SNode* m_pCurrent; +}; + +template +class CWelsNonDuplicatedList : public CWelsList { + public: + bool push_back (TNodeType* pNode) { + if (0 != this->size()) { + if ((NULL != pNode) && (this->findNode (pNode))) { //not checking NULL for easier testing + return false; + } + } + + return CWelsList::push_back (pNode); + } + +}; + + +} + + +#endif + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsLock.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsLock.h new file mode 100644 index 000000000..ec5190f1e --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsLock.h @@ -0,0 +1,97 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsLock.h + * + * \brief class wrapping for locks + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ + +#ifndef _WELS_LOCK_H_ +#define _WELS_LOCK_H_ + +#include "macros.h" +#include "typedefs.h" +#include "WelsThreadLib.h" + +namespace WelsCommon { + +class CWelsLock { + DISALLOW_COPY_AND_ASSIGN (CWelsLock); + public: + CWelsLock() { + WelsMutexInit (&m_cMutex); + } + + virtual ~CWelsLock() { + WelsMutexDestroy (&m_cMutex); + } + + WELS_THREAD_ERROR_CODE Lock() { + return WelsMutexLock (&m_cMutex); + } + + WELS_THREAD_ERROR_CODE Unlock() { + return WelsMutexUnlock (&m_cMutex); + } + + private: + WELS_MUTEX m_cMutex; +}; + +class CWelsAutoLock { + DISALLOW_COPY_AND_ASSIGN (CWelsAutoLock); + public: + CWelsAutoLock (CWelsLock& cLock) : m_cLock (cLock) { + m_cLock.Lock(); + } + + virtual ~CWelsAutoLock() { + m_cLock.Unlock(); + } + + private: + CWelsLock& m_cLock; +}; + +} + +#endif + + + + + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsTask.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsTask.h new file mode 100644 index 000000000..9501c9db6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsTask.h @@ -0,0 +1,75 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsTask.h + * + * \brief Interfaces introduced in thread pool + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ + +#ifndef _WELS_TASK_H_ +#define _WELS_TASK_H_ + +#include "codec_def.h" + +namespace WelsCommon { + +class IWelsTaskSink { + public: + virtual int OnTaskExecuted() = 0; + virtual int OnTaskCancelled() = 0; +}; + +class IWelsTask { + public: + IWelsTask (IWelsTaskSink* pSink) { + m_pSink = pSink; + }; + virtual ~IWelsTask() { } + + virtual int Execute() = 0; + + IWelsTaskSink* GetSink() { + return m_pSink; + }; + + protected: + IWelsTaskSink* m_pSink; +}; + +} + +#endif + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsTaskThread.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsTaskThread.h new file mode 100644 index 000000000..24f399e4a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsTaskThread.h @@ -0,0 +1,83 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsTaskThread.h + * + * \brief connecting task and thread + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ + + +#ifndef _WELS_TASK_THREAD_H_ +#define _WELS_TASK_THREAD_H_ + + +#include "WelsTask.h" +#include "WelsThread.h" + +namespace WelsCommon { + +class CWelsTaskThread; + +class IWelsTaskThreadSink { + public: + virtual WELS_THREAD_ERROR_CODE OnTaskStart (CWelsTaskThread* pThread, IWelsTask* pTask) = 0; + virtual WELS_THREAD_ERROR_CODE OnTaskStop (CWelsTaskThread* pThread, IWelsTask* pTask) = 0; +}; + +class CWelsTaskThread : public CWelsThread { + public: + CWelsTaskThread (IWelsTaskThreadSink* pSink); + virtual ~CWelsTaskThread(); + + WELS_THREAD_ERROR_CODE SetTask (IWelsTask* pTask); + virtual void ExecuteTask(); + + uintptr_t GetID() const { + return m_uiID; + } + + private: + CWelsLock m_cLockTask; + IWelsTaskThreadSink* m_pSink; + IWelsTask* m_pTask; + uintptr_t m_uiID; + + DISALLOW_COPY_AND_ASSIGN (CWelsTaskThread); +}; + +} + +#endif + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThread.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThread.h new file mode 100644 index 000000000..c8b6b1c66 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThread.h @@ -0,0 +1,106 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsThread.h + * + * \brief Interfaces introduced in threads + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ + + +#ifndef _WELS_THREAD_H_ +#define _WELS_THREAD_H_ + + +#include "macros.h" +#include "WelsLock.h" +#include "WelsThreadLib.h" + +namespace WelsCommon { + +class CWelsThread { + public: + CWelsThread(); + virtual ~CWelsThread(); + + virtual void Thread(); + virtual void ExecuteTask() = 0; + virtual WELS_THREAD_ERROR_CODE Start(); + virtual void Kill(); + WELS_MUTEX m_hMutex; + protected: + static WELS_THREAD_ROUTINE_TYPE TheThread (void* pParam); + + void SetRunning (bool bRunning) { + CWelsAutoLock cLock (m_cLockStatus); + + m_bRunning = bRunning; + } + void SetEndFlag (bool bEndFlag) { + CWelsAutoLock cLock (m_cLockStatus); + + m_bEndFlag = bEndFlag; + } + + bool GetRunning() const { + return m_bRunning; + } + + bool GetEndFlag() const { + return m_bEndFlag; + } + + void SignalThread() { + WelsEventSignal (&m_hEvent, &m_hMutex, &m_iConVar); + } + + private: + WELS_THREAD_HANDLE m_hThread; + WELS_EVENT m_hEvent; + CWelsLock m_cLockStatus; + bool m_bRunning; + bool m_bEndFlag; + int m_iConVar; + + DISALLOW_COPY_AND_ASSIGN (CWelsThread); +}; + + +} + + + +#endif + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThreadLib.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThreadLib.h new file mode 100644 index 000000000..cd26dbdd9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThreadLib.h @@ -0,0 +1,151 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsThreadLib.h + * + * \brief Interfaces introduced in thread programming + * + * \date 11/17/2009 Created + * + ************************************************************************************* + */ + +#ifndef _WELS_THREAD_API_H_ +#define _WELS_THREAD_API_H_ + +#include "typedefs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) + +#include + +typedef HANDLE WELS_THREAD_HANDLE; +typedef LPTHREAD_START_ROUTINE LPWELS_THREAD_ROUTINE; + +typedef CRITICAL_SECTION WELS_MUTEX; +typedef HANDLE WELS_EVENT; + +#define WELS_THREAD_ROUTINE_TYPE DWORD WINAPI +#define WELS_THREAD_ROUTINE_RETURN(rc) return (DWORD)rc; + +#ifdef WINAPI_FAMILY +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +#define WP80 + +#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0) +#define GetSystemInfo(x) GetNativeSystemInfo(x) +#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS) +#define CreateSemaphore(a, b, c, d) CreateSemaphoreEx(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS) +#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE) +#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE) +#endif +#endif + +#else // NON-WINDOWS + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef pthread_t WELS_THREAD_HANDLE; +typedef void* (*LPWELS_THREAD_ROUTINE) (void*); + +typedef pthread_mutex_t WELS_MUTEX; + +#ifdef __APPLE__ +typedef pthread_cond_t WELS_EVENT; +#else +typedef sem_t* WELS_EVENT; +#endif + +#define WELS_THREAD_ROUTINE_TYPE void * +#define WELS_THREAD_ROUTINE_RETURN(rc) return (void*)(intptr_t)rc; + +#endif//_WIN32 + +typedef int32_t WELS_THREAD_ERROR_CODE; +typedef int32_t WELS_THREAD_ATTR; + +typedef struct _WelsLogicalProcessorInfo { + int32_t ProcessorCount; +} WelsLogicalProcessInfo; + +#define WELS_THREAD_ERROR_OK 0 +#define WELS_THREAD_ERROR_GENERAL ((uint32_t)(-1)) +#define WELS_THREAD_ERROR_WAIT_OBJECT_0 0 +#define WELS_THREAD_ERROR_WAIT_TIMEOUT ((uint32_t)0x00000102L) +#define WELS_THREAD_ERROR_WAIT_FAILED WELS_THREAD_ERROR_GENERAL + +WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex); +WELS_THREAD_ERROR_CODE WelsMutexLock (WELS_MUTEX* mutex); +WELS_THREAD_ERROR_CODE WelsMutexUnlock (WELS_MUTEX* mutex); +WELS_THREAD_ERROR_CODE WelsMutexDestroy (WELS_MUTEX* mutex); + +WELS_THREAD_ERROR_CODE WelsEventOpen (WELS_EVENT* p_event, const char* event_name = NULL); +WELS_THREAD_ERROR_CODE WelsEventClose (WELS_EVENT* event, const char* event_name = NULL); + +WELS_THREAD_ERROR_CODE WelsEventSignal (WELS_EVENT* event,WELS_MUTEX *pMutex, int* iCondition); +WELS_THREAD_ERROR_CODE WelsEventWait (WELS_EVENT* event,WELS_MUTEX *pMutex, int& iCondition); +WELS_THREAD_ERROR_CODE WelsEventWaitWithTimeOut (WELS_EVENT* event, uint32_t dwMilliseconds,WELS_MUTEX *pMutex = NULL); +WELS_THREAD_ERROR_CODE WelsMultipleEventsWaitSingleBlocking (uint32_t nCount, WELS_EVENT* event_list, + WELS_EVENT* master_event = NULL,WELS_MUTEX *pMutex = NULL); + +WELS_THREAD_ERROR_CODE WelsThreadCreate (WELS_THREAD_HANDLE* thread, LPWELS_THREAD_ROUTINE routine, + void* arg, WELS_THREAD_ATTR attr); + +WELS_THREAD_ERROR_CODE WelsThreadSetName (const char* thread_name); + +WELS_THREAD_ERROR_CODE WelsThreadJoin (WELS_THREAD_HANDLE thread); + +WELS_THREAD_HANDLE WelsThreadSelf(); + +WELS_THREAD_ERROR_CODE WelsQueryLogicalProcessInfo (WelsLogicalProcessInfo* pInfo); + +void WelsSleep (uint32_t dwMilliSecond); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThreadPool.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThreadPool.h new file mode 100644 index 000000000..a24309aa0 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/WelsThreadPool.h @@ -0,0 +1,124 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsThreadPool.h + * + * \brief Interfaces introduced in thread pool + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ + + +#ifndef _WELS_THREAD_POOL_H_ +#define _WELS_THREAD_POOL_H_ + +#include +#include "WelsTask.h" +#include "WelsTaskThread.h" +#include "WelsList.h" + +namespace WelsCommon { + + +class CWelsThreadPool : public CWelsThread, public IWelsTaskThreadSink { + public: + enum { + DEFAULT_THREAD_NUM = 4, + }; + + static WELS_THREAD_ERROR_CODE SetThreadNum (int32_t iMaxThreadNum); + + static CWelsThreadPool* AddReference(); + void RemoveInstance(); + + static bool IsReferenced(); + + //IWelsTaskThreadSink + virtual WELS_THREAD_ERROR_CODE OnTaskStart (CWelsTaskThread* pThread, IWelsTask* pTask); + virtual WELS_THREAD_ERROR_CODE OnTaskStop (CWelsTaskThread* pThread, IWelsTask* pTask); + + // CWelsThread + virtual void ExecuteTask(); + + WELS_THREAD_ERROR_CODE QueueTask (IWelsTask* pTask); + int32_t GetThreadNum() const { + return m_iMaxThreadNum; + } + + + protected: + WELS_THREAD_ERROR_CODE Init(); + WELS_THREAD_ERROR_CODE Uninit(); + + WELS_THREAD_ERROR_CODE CreateIdleThread(); + void DestroyThread (CWelsTaskThread* pThread); + WELS_THREAD_ERROR_CODE AddThreadToIdleQueue (CWelsTaskThread* pThread); + WELS_THREAD_ERROR_CODE AddThreadToBusyList (CWelsTaskThread* pThread); + WELS_THREAD_ERROR_CODE RemoveThreadFromBusyList (CWelsTaskThread* pThread); + bool AddTaskToWaitedList (IWelsTask* pTask); + CWelsTaskThread* GetIdleThread(); + IWelsTask* GetWaitedTask(); + int32_t GetIdleThreadNum(); + int32_t GetBusyThreadNum(); + int32_t GetWaitedTaskNum(); + void ClearWaitedTasks(); + + private: + CWelsThreadPool(); + virtual ~CWelsThreadPool(); + + WELS_THREAD_ERROR_CODE StopAllRunning(); + + static int32_t m_iRefCount; + static int32_t m_iMaxThreadNum; + static CWelsThreadPool* m_pThreadPoolSelf; + + CWelsNonDuplicatedList* m_cWaitedTasks; + CWelsNonDuplicatedList* m_cIdleThreads; + CWelsList* m_cBusyThreads; + + CWelsLock m_cLockPool; + CWelsLock m_cLockWaitedTasks; + CWelsLock m_cLockIdleTasks; + CWelsLock m_cLockBusyTasks; + + DISALLOW_COPY_AND_ASSIGN (CWelsThreadPool); +}; + +} + + +#endif + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h new file mode 100644 index 000000000..69a7ae398 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h @@ -0,0 +1,340 @@ +/*! + * \copy + * Copyright (c) 2013, Loongson Technology Co.,Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef ASMDEFS_MMI_H_ +#define ASMDEFS_MMI_H_ + +#define CACHE_LINE_SIZE 32 + +#if defined(_ABI64) && _MIPS_SIM == _ABI64 +# define mips_reg int64_t +# define PTRSIZE " 8 " +# define PTRLOG " 3 " +# define PTR_ADDU "daddu " +# define PTR_ADDIU "daddiu " +# define PTR_ADDI "daddi " +# define PTR_SUBU "dsubu " +# define PTR_L "ld " +# define PTR_S "sd " +# define PTR_SRA "dsra " +# define PTR_SRL "dsrl " +# define PTR_SLL "dsll " +#else +# define mips_reg int32_t +# define PTRSIZE " 4 " +# define PTRLOG " 2 " +# define PTR_ADDU "addu " +# define PTR_ADDIU "addiu " +# define PTR_ADDI "addi " +# define PTR_SUBU "subu " +# define PTR_L "lw " +# define PTR_S "sw " +# define PTR_SRA "sra " +# define PTR_SRL "srl " +# define PTR_SLL "sll " +#endif + +#define MMI_XSawp_BH(f0, f2, f4, f6, f8, f10) \ + "mov.d "#f8", "#f2" \n\t" \ + "punpckhbh "#f2", "#f0", "#f4" \n\t" \ + "punpcklbh "#f0", "#f0", "#f4" \n\t" \ + "punpckhbh "#f10", "#f8", "#f6" \n\t" \ + "punpcklbh "#f8", "#f8", "#f6" \n\t" + +#define MMI_XSawp_HW(f0, f2, f4, f6, f8, f10) \ + "mov.d "#f8", "#f2" \n\t" \ + "punpckhhw "#f2", "#f0", "#f4" \n\t" \ + "punpcklhw "#f0", "#f0", "#f4" \n\t" \ + "punpckhhw "#f10", "#f8", "#f6" \n\t" \ + "punpcklhw "#f8", "#f8", "#f6" \n\t" + +#define MMI_XSawp_WD(f0, f2, f4, f6, f8, f10) \ + "mov.d "#f8", "#f2" \n\t" \ + "punpckhwd "#f2", "#f0", "#f4" \n\t" \ + "punpcklwd "#f0", "#f0", "#f4" \n\t" \ + "punpckhwd "#f10", "#f8", "#f6" \n\t" \ + "punpcklwd "#f8", "#f8", "#f6" \n\t" + +#define MMI_XSawp_DQ(f0, f2, f4, f6, f8, f10) \ + "mov.d "#f8", "#f2" \n\t" \ + "mov.d "#f2", "#f4" \n\t" \ + "mov.d "#f10", "#f6" \n\t" + +#define WELS_AbsH(f0, f2, f4, f6, f8, f10) \ + "xor "#f8", "#f8", "#f8" \n\t" \ + "psubh "#f10", "#f8", "#f6" \n\t" \ + "psubh "#f8", "#f8", "#f4" \n\t" \ + "pmaxsh "#f0", "#f4", "#f8" \n\t" \ + "pmaxsh "#f2", "#f6", "#f10" \n\t" + +#define MMI_SumSub(f0, f2, f4, f6, f8, f10) \ + "mov.d "#f8", "#f4" \n\t" \ + "mov.d "#f10", "#f6" \n\t" \ + "paddh "#f4", "#f4", "#f0" \n\t" \ + "paddh "#f6", "#f6", "#f2" \n\t" \ + "psubh "#f0", "#f0", "#f8" \n\t" \ + "psubh "#f2", "#f2", "#f10" \n\t" + +#define MMI_LoadDiff8P(f0, f2, f4, f6, f8, r0, r1) \ + "gsldlc1 "#f0", 0x7("#r0") \n\t" \ + "gsldlc1 "#f4", 0x7("#r1") \n\t" \ + "gsldrc1 "#f0", 0x0("#r0") \n\t" \ + "gsldrc1 "#f4", 0x0("#r1") \n\t" \ + "punpckhbh "#f2", "#f0", "#f8" \n\t" \ + "punpcklbh "#f0", "#f0", "#f8" \n\t" \ + "punpckhbh "#f6", "#f4", "#f8" \n\t" \ + "punpcklbh "#f4", "#f4", "#f8" \n\t" \ + "psubh "#f0", "#f0", "#f4" \n\t" \ + "psubh "#f2", "#f2", "#f6" \n\t" + +#define MMI_TransTwo4x4H(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18) \ + MMI_XSawp_HW(f0, f2, f4, f6, f16, f18) \ + MMI_XSawp_HW(f8, f10, f12, f14, f4, f6) \ + MMI_XSawp_WD(f0, f2, f8, f10, f12, f14) \ + MMI_XSawp_WD(f16, f18, f4, f6, f8, f10) \ + MMI_XSawp_DQ(f0, f2, f16, f18, f4, f6) \ + MMI_XSawp_DQ(f12, f14, f8, f10, f16, f18) + +#define MMI_TransTwo8x8B(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26, f28, f30, r0, r1) \ + "dmfc1 "#r0", "#f28" \n\t" \ + "dmfc1 "#r1", "#f30" \n\t" \ + MMI_XSawp_BH(f0, f2, f4, f6, f28, f30) \ + MMI_XSawp_BH(f8, f10, f12, f14, f4, f6) \ + MMI_XSawp_BH(f16, f18, f20, f22, f12, f14) \ + "dmtc1 "#r0", "#f20" \n\t" \ + "dmtc1 "#r1", "#f22" \n\t" \ + "dmfc1 "#r0", "#f12" \n\t" \ + "dmfc1 "#r1", "#f14" \n\t" \ + MMI_XSawp_BH(f24, f26, f20, f22, f12, f14) \ + MMI_XSawp_HW(f0, f2, f8, f10, f20, f22) \ + MMI_XSawp_HW(f28, f30, f4, f6, f8, f10) \ + MMI_XSawp_HW(f16, f18, f24, f26, f4, f6) \ + "dmtc1 "#r0", "#f24" \n\t" \ + "dmtc1 "#r1", "#f26" \n\t" \ + "dmfc1 "#r0", "#f8" \n\t" \ + "dmfc1 "#r1", "#f10" \n\t" \ + MMI_XSawp_HW(f24, f26, f12, f14, f8, f10) \ + MMI_XSawp_WD(f0, f2, f16, f18, f12, f14) \ + MMI_XSawp_WD(f20, f22, f4, f6, f16, f18) \ + MMI_XSawp_WD(f28, f30, f24, f26, f4, f6) \ + "dmtc1 "#r0", "#f24" \n\t" \ + "dmtc1 "#r1", "#f26" \n\t" \ + "dmfc1 "#r0", "#f16" \n\t" \ + "dmfc1 "#r1", "#f18" \n\t" \ + MMI_XSawp_WD(f24, f26, f8, f10, f16, f18) \ + MMI_XSawp_DQ(f0, f2, f28, f30, f8, f10) \ + MMI_XSawp_DQ(f12, f14, f4, f6, f28, f30) \ + MMI_XSawp_DQ(f20, f22, f24, f26, f4, f6) \ + "dmtc1 "#r0", "#f24" \n\t" \ + "dmtc1 "#r1", "#f26" \n\t" \ + "dmfc1 "#r0", "#f0" \n\t" \ + "dmfc1 "#r1", "#f2" \n\t" \ + MMI_XSawp_DQ(f24, f26, f16, f18, f0, f2) \ + "dmtc1 "#r0", "#f16" \n\t" \ + "dmtc1 "#r1", "#f18" \n\t" + +#define MMI_XSwap_HW_SINGLE(f0, f2, f4) \ + "punpckhhw "#f4", "#f0", "#f2" \n\t" \ + "punpcklhw "#f0", "#f0", "#f2" \n\t" + +#define MMI_XSwap_WD_SINGLE(f0, f2, f4) \ + "punpckhwd "#f4", "#f0", "#f2" \n\t" \ + "punpcklwd "#f0", "#f0", "#f2" \n\t" + +#define MMI_Trans4x4H_SINGLE(f0, f2, f4, f6, f8) \ + MMI_XSwap_HW_SINGLE(f0, f2, f8) \ + MMI_XSwap_HW_SINGLE(f4, f6, f2) \ + MMI_XSwap_WD_SINGLE(f0, f4, f6) \ + MMI_XSwap_WD_SINGLE(f8, f2, f4) + +#define MMI_SumSub_SINGLE(f0, f2, f4) \ + "mov.d "#f4", "#f2" \n\t" \ + "psubh "#f2", "#f2", "#f0" \n\t" \ + "paddh "#f0", "#f0", "#f4" \n\t" + +#define MMI_SumSubMul2_SINGLE(f0, f2, f4, f6) \ + "mov.d "#f4", "#f0" \n\t" \ + "psllh "#f0", "#f0", "#f6" \n\t" \ + "paddh "#f0", "#f0", "#f2" \n\t" \ + "psllh "#f2", "#f2", "#f6" \n\t" \ + "psubh "#f4", "#f4", "#f2" \n\t" + +//f4 should be 0x0 +#define MMI_Copy8Times(f0, f2, f4, r0) \ + "dmtc1 "#r0", "#f0" \n\t" \ + "pshufh "#f0", "#f0", "#f4" \n\t" \ + "mov.d "#f2", "#f0" \n\t" + +//f4 should be 0x0 +#define MMI_Copy16Times(f0, f2, f4, r0) \ + "dmtc1 "#r0", "#f0" \n\t" \ + "punpcklbh "#f0", "#f0", "#f0" \n\t" \ + "pshufh "#f0", "#f0", "#f4" \n\t" \ + "mov.d "#f2", "#f0" \n\t" + +#define MMI_SumSubDiv2_SINGLE(f0, f2, f4, f6) \ + "psrah "#f4", "#f2", "#f6" \n\t" \ + "paddh "#f4", "#f4", "#f0" \n\t" \ + "psrah "#f0", "#f0", "#f6" \n\t" \ + "psubh "#f0", "#f0", "#f2" \n\t" + +#define MMI_IDCT_SINGLE(f0, f2, f4, f6, f8, f10, f12) \ + MMI_SumSub_SINGLE(f6, f8, f10) \ + MMI_SumSubDiv2_SINGLE(f4, f2, f0, f12) \ + MMI_SumSub_SINGLE(f0, f6, f10) \ + MMI_SumSub_SINGLE(f4, f8, f10) + +#define MMI_StoreDiff4P_SINGLE(f0, f2, f4, f6, r0, r1, f8) \ + "gsldlc1 "#f2", 0x7("#r1") \n\t" \ + "gsldrc1 "#f2", 0x0("#r1") \n\t" \ + "punpcklbh "#f2", "#f2", "#f6" \n\t" \ + "paddh "#f0", "#f0", "#f4" \n\t" \ + "psrah "#f0", "#f0", "#f8" \n\t" \ + "paddsh "#f0", "#f0", "#f2" \n\t" \ + "packushb "#f0", "#f0", "#f2" \n\t" \ + "gsswlc1 "#f0", 0x3("#r0") \n\t" \ + "gsswrc1 "#f0", 0x0("#r0") \n\t" + +#define SUMH_HORIZON(f0, f2, f4, f6, f8) \ + "paddh "#f0", "#f0", "#f2" \n\t" \ + "punpckhhw "#f2", "#f0", "#f8" \n\t" \ + "punpcklhw "#f0", "#f0", "#f8" \n\t" \ + "paddw "#f0", "#f0", "#f2" \n\t" \ + "punpckhwd "#f2", "#f0", "#f0" \n\t" \ + "paddw "#f0", "#f0", "#f2" \n\t" + +#define LOAD_COLUMN(f0, f2, f4, f6, f8, f10, f12, f14, r0, r1, r2) \ + "daddu "#r2", "#r0", "#r1" \n\t" \ + "gsldlc1 "#f0", 0x7("#r0") \n\t" \ + "gsldlc1 "#f4", 0x7("#r2") \n\t" \ + "gsldrc1 "#f0", 0x0("#r0") \n\t" \ + "gsldrc1 "#f4", 0x0("#r2") \n\t" \ + "punpcklbh "#f0", "#f0", "#f4" \n\t" \ + "daddu "#r0", "#r2", "#r1" \n\t" \ + "daddu "#r2", "#r0", "#r1" \n\t" \ + "gsldlc1 "#f8", 0x7("#r0") \n\t" \ + "gsldlc1 "#f4", 0x7("#r2") \n\t" \ + "gsldrc1 "#f8", 0x0("#r0") \n\t" \ + "gsldrc1 "#f4", 0x0("#r2") \n\t" \ + "punpcklbh "#f8", "#f8", "#f4" \n\t" \ + "punpckhhw "#f2", "#f0", "#f8" \n\t" \ + "punpcklhw "#f0", "#f0", "#f8" \n\t" \ + "daddu "#r0", "#r2", "#r1" \n\t" \ + "daddu "#r2", "#r0", "#r1" \n\t" \ + "gsldlc1 "#f12", 0x7("#r0") \n\t" \ + "gsldlc1 "#f4", 0x7("#r2") \n\t" \ + "gsldrc1 "#f12", 0x0("#r0") \n\t" \ + "gsldrc1 "#f4", 0x0("#r2") \n\t" \ + "punpcklbh "#f12", "#f12", "#f4" \n\t" \ + "daddu "#r0", "#r2", "#r1" \n\t" \ + "daddu "#r2", "#r0", "#r1" \n\t" \ + "gsldlc1 "#f8", 0x7("#r0") \n\t" \ + "gsldlc1 "#f4", 0x7("#r2") \n\t" \ + "gsldrc1 "#f8", 0x0("#r0") \n\t" \ + "gsldrc1 "#f4", 0x0("#r2") \n\t" \ + "punpcklbh "#f8", "#f8", "#f4" \n\t" \ + "punpckhhw "#f14", "#f12", "#f8" \n\t" \ + "punpcklhw "#f12", "#f12", "#f8" \n\t" \ + "daddu "#r0", "#r2", "#r1" \n\t" \ + "punpcklwd "#f0", "#f2", "#f14" \n\t" \ + "punpckhwd "#f2", "#f2", "#f14" \n\t" + +#define LOAD_COLUMN_C(f0, f2, f4, f6, r0, r1, r2) \ + "daddu "#r2", "#r0", "#r1" \n\t" \ + "gsldlc1 "#f0", 0x7("#r0") \n\t" \ + "gsldlc1 "#f2", 0x7("#r2") \n\t" \ + "gsldrc1 "#f0", 0x0("#r0") \n\t" \ + "gsldrc1 "#f2", 0x0("#r2") \n\t" \ + "punpcklbh "#f0", "#f0", "#f2" \n\t" \ + "daddu "#r0", "#r2", "#r1" \n\t" \ + "daddu "#r2", "#r0", "#r1" \n\t" \ + "gsldlc1 "#f4", 0x7("#r0") \n\t" \ + "gsldlc1 "#f2", 0x7("#r2") \n\t" \ + "gsldrc1 "#f4", 0x0("#r0") \n\t" \ + "gsldrc1 "#f2", 0x0("#r2") \n\t" \ + "punpcklbh "#f4", "#f4", "#f2" \n\t" \ + "punpckhhw "#f0", "#f0", "#f4" \n\t" \ + "daddu "#r0", "#r2", "#r1" \n\t" + +/** + * backup register + */ +#define BACKUP_REG \ + double __attribute__((aligned(16))) __back_temp[8]; \ + if (_MIPS_SIM == _ABI64) \ + __asm__ volatile ( \ + "gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ + "gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ + "gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ + "gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ + : \ + : [temp]"r"(__back_temp) \ + : "memory" \ + ); \ + else \ + __asm__ volatile ( \ + "gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ + "gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ + "gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ + : \ + : [temp]"r"(__back_temp) \ + : "memory" \ + ); + +/** + * recover register + */ +#define RECOVER_REG \ + if (_MIPS_SIM == _ABI64) \ + __asm__ volatile ( \ + "gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ + "gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ + "gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ + "gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ + : \ + : [temp]"r"(__back_temp) \ + : "memory" \ + ); \ + else \ + __asm__ volatile ( \ + "gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ + "gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ + "gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ + : \ + : [temp]"r"(__back_temp) \ + : "memory" \ + ); + +# define OK 1 +# define NOTOK 0 + +#endif /* ASMDEFS_MMI_H_ */ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/copy_mb.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/copy_mb.h new file mode 100644 index 000000000..532702a9e --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/copy_mb.h @@ -0,0 +1,96 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_COPY_MB_H_ +#define WELS_COPY_MB_H_ + +#include "typedefs.h" + +/**************************************************************************** + * Copy functions + ****************************************************************************/ +void WelsCopy4x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy4x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); // +void WelsCopy16x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); // +void WelsCopy16x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined (X86_ASM) + +void WelsCopy8x8_mmx (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x16_mmx (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x8NotAligned_sse2 (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +void WelsCopy16x16_sse2 (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +void WelsCopy16x16NotAligned_sse2 (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +#endif//X86_ASM + +#if defined (HAVE_NEON) +void WelsCopy8x8_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x16_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x16NotAligned_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x8NotAligned_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x16_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +#endif + +#if defined (HAVE_NEON_AARCH64) +void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x16_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x16NotAligned_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x8NotAligned_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x16_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +#endif + +#if defined (HAVE_MMI) +void WelsCopy8x8_mmi (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x16_mmi (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x8NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +void WelsCopy16x16_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +void WelsCopy16x16NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +#endif//HAVE_MMI + +#if defined (HAVE_MSA) +void WelsCopy8x8_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x16_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x8_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +void WelsCopy16x16_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +#endif//HAVE_MSA +#if defined(__cplusplus) +} +#endif//__cplusplus + +#endif //SAMPLE_H_ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/cpu.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/cpu.h new file mode 100644 index 000000000..0cbcdb44f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/cpu.h @@ -0,0 +1,80 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file cpu.h + * + * \brief CPU feature compatibility detection + * + * \date 04/29/2009 Created + * + ************************************************************************************* + */ +#if !defined(WELS_CPU_DETECTION_H__) +#define WELS_CPU_DETECTION_H__ + +#include "typedefs.h" +#include "cpu_core.h" + + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +/* + * cpuid support verify routine + * return 0 if cpuid is not supported by cpu + */ +int32_t WelsCPUIdVerify(); + +void WelsCPUId (uint32_t uiIndex, uint32_t* pFeatureA, uint32_t* pFeatureB, uint32_t* pFeatureC, uint32_t* pFeatureD); + +int32_t WelsCPUSupportAVX (uint32_t eax, uint32_t ecx); +int32_t WelsCPUSupportFMA (uint32_t eax, uint32_t ecx); + +void WelsEmms(); + +/* + * clear FPU registers states for potential float based calculation if support + */ +void WelsCPURestore (const uint32_t kuiCPU); + +#else +#define WelsEmms() +#endif + +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors); + +#if defined(__cplusplus) +} +#endif//__cplusplus + +#endif//WELS_CPU_DETECTION_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/cpu_core.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/cpu_core.h new file mode 100644 index 000000000..f25787b04 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/cpu_core.h @@ -0,0 +1,95 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file cpu_core.h + * + * \brief cpu core feature detection + * + * \date 4/24/2009 Created + * + ************************************************************************************* + */ +#if !defined(WELS_CPU_CORE_FEATURE_DETECTION_H__) +#define WELS_CPU_CORE_FEATURE_DETECTION_H__ + +/* + * WELS CPU feature flags + */ +#define WELS_CPU_MMX 0x00000001 /* mmx */ +#define WELS_CPU_MMXEXT 0x00000002 /* mmx-ext*/ +#define WELS_CPU_SSE 0x00000004 /* sse */ +#define WELS_CPU_SSE2 0x00000008 /* sse 2 */ +#define WELS_CPU_SSE3 0x00000010 /* sse 3 */ +#define WELS_CPU_SSE41 0x00000020 /* sse 4.1 */ +#define WELS_CPU_3DNOW 0x00000040 /* 3dnow! */ +#define WELS_CPU_3DNOWEXT 0x00000080 /* 3dnow! ext */ +#define WELS_CPU_ALTIVEC 0x00000100 /* altivec */ +#define WELS_CPU_SSSE3 0x00000200 /* ssse3 */ +#define WELS_CPU_SSE42 0x00000400 /* sse 4.2 */ + +/* CPU features application extensive */ +#define WELS_CPU_FPU 0x00001000 /* x87-FPU on chip */ +#define WELS_CPU_HTT 0x00002000 /* Hyper-Threading Technology (HTT), Multi-threading enabled feature: + physical processor package is capable of supporting more than one logic processor + */ +#define WELS_CPU_CMOV 0x00004000 /* Conditional Move Instructions, + also if x87-FPU is present at indicated by the CPUID.FPU feature bit, then FCOMI and FCMOV are supported + */ +#define WELS_CPU_MOVBE 0x00008000 /* MOVBE instruction */ +#define WELS_CPU_AES 0x00010000 /* AES instruction extensions */ +#define WELS_CPU_FMA 0x00020000 /* AVX VEX FMA instruction sets */ +#define WELS_CPU_AVX 0x00000800 /* Advanced Vector eXtentions */ + +#ifdef HAVE_AVX2 +#define WELS_CPU_AVX2 0x00040000 /* AVX2 */ +#else +#define WELS_CPU_AVX2 0x00000000 /* !AVX2 */ +#endif + +#define WELS_CPU_CACHELINE_16 0x10000000 /* CacheLine Size 16 */ +#define WELS_CPU_CACHELINE_32 0x20000000 /* CacheLine Size 32 */ +#define WELS_CPU_CACHELINE_64 0x40000000 /* CacheLine Size 64 */ +#define WELS_CPU_CACHELINE_128 0x80000000 /* CacheLine Size 128 */ + +/* For the android OS */ +#define WELS_CPU_ARMv7 0x000001 /* ARMv7 */ +#define WELS_CPU_VFPv3 0x000002 /* VFPv3 */ +#define WELS_CPU_NEON 0x000004 /* NEON */ + +/* For loongson */ +#define WELS_CPU_MMI 0x00000001 /* mmi */ +#define WELS_CPU_MSA 0x00000002 /* msa */ + +/* + * Interfaces for CPU core feature detection as below + */ + +#endif//WELS_CPU_CORE_FEATURE_DETECTION_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/crt_util_safe_x.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/crt_util_safe_x.h new file mode 100644 index 000000000..47068c256 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/crt_util_safe_x.h @@ -0,0 +1,101 @@ +/*! + * \copy + * Copyright (c) 2010-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file crt_util_safe_x.h + * + * \brief Safe CRT like util for cross platfroms support + * + * \date 06/04/2010 Created + * + ************************************************************************************* + */ +#ifndef WELS_CRT_UTIL_SAFE_CROSS_PLATFORMS_H__ +#define WELS_CRT_UTIL_SAFE_CROSS_PLATFORMS_H__ + +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#include +#include +#else +#include +#include "typedefs.h" +#endif//_WIN32 + +#include "typedefs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define WELS_FILE_SEEK_SET SEEK_SET +#define WELS_FILE_SEEK_CUR SEEK_CUR +#define WESL_FILE_SEEK_END SEEK_END + +typedef FILE WelsFileHandle; + +#ifdef _WIN32 +typedef struct _timeb SWelsTime; +#else +typedef struct TagWelsTime { + time_t time; + unsigned short millitm; +} SWelsTime; +#endif + +int32_t WelsSnprintf (char* buffer, int32_t sizeOfBuffer, const char* format, ...); +char* WelsStrncpy (char* dest, int32_t sizeInBytes, const char* src); +char* WelsStrcat (char* dest, uint32_t sizeInBytes, const char* src); +int32_t WelsVsnprintf (char* buffer, int32_t sizeOfBuffer, const char* format, va_list argptr); + +WelsFileHandle* WelsFopen (const char* filename, const char* mode); +int32_t WelsFclose (WelsFileHandle* fp); +int32_t WelsFread (void* buffer, int32_t size, int32_t count, WelsFileHandle* fp); +int32_t WelsFwrite (const void* buffer, int32_t size, int32_t count, WelsFileHandle* fp); +int32_t WelsFseek (WelsFileHandle* fp, int32_t offset, int32_t origin); +int32_t WelsFflush (WelsFileHandle* fp); + +int32_t WelsGetTimeOfDay (SWelsTime* tp); +int32_t WelsStrftime (char* buffer, int32_t size, const char* format, const SWelsTime* tp); +uint16_t WelsGetMillisecond (const SWelsTime* tp); + + +#ifdef __cplusplus +} +#endif + +#endif//WELS_CRT_UTIL_SAFE_CROSS_PLATFORMS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/deblocking_common.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/deblocking_common.h new file mode 100644 index 000000000..3ec9b2e5d --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/deblocking_common.h @@ -0,0 +1,112 @@ +#ifndef WELS_DEBLOCKING_COMMON_H__ +#define WELS_DEBLOCKING_COMMON_H__ +#include "typedefs.h" +void DeblockLumaLt4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockLumaLt4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTc); +void DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTc); +void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockChromaLt4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTc); +void DeblockChromaEq4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockChromaLt4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTc); +void DeblockChromaEq4H2_c (uint8_t* pPixCbCr,int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void WelsNonZeroCount_c (int8_t* pNonZeroCount); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#ifdef X86_ASM +void DeblockLumaLt4V_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4V_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockLumaTransposeH2V_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pDst); +void DeblockLumaTransposeV2H_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc); +void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaEq4V_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4V_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void WelsNonZeroCount_sse2 (int8_t* pNonZeroCount); +#endif + +#if defined(HAVE_NEON) +void DeblockLumaLt4V_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4V_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockLumaLt4H_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4H_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockChromaLt4V_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4V_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); + +void DeblockChromaLt4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void WelsNonZeroCount_neon (int8_t* pNonZeroCount); +#endif + +#if defined(HAVE_NEON_AARCH64) +void DeblockLumaLt4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockLumaLt4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void WelsNonZeroCount_AArch64_neon (int8_t* pNonZeroCount); +#endif + +#if defined(HAVE_MMI) +void DeblockLumaLt4V_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4V_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockLumaTransposeH2V_mmi (uint8_t* pPixY, int32_t iStride, uint8_t* pDst); +void DeblockLumaTransposeV2H_mmi (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc); +void DeblockLumaLt4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaEq4V_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4V_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4H_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4H_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void WelsNonZeroCount_mmi (int8_t* pNonZeroCount); +#endif//HAVE_MMI + +#if defined(HAVE_MSA) +void DeblockLumaLt4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockLumaLt4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaEq4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void WelsNonZeroCount_msa (int8_t* pNonZeroCount); +#endif//HAVE_MSA +#if defined(__cplusplus) +} +#endif//__cplusplus + +#endif //WELS_DEBLOCKING_COMMON_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/expand_pic.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/expand_pic.h new file mode 100644 index 000000000..2b06d9e47 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/expand_pic.h @@ -0,0 +1,107 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file expand_pic.h + * + * \brief Interface for expanding reconstructed picture to be used for reference + * + * \date 06/08/2009 + ************************************************************************************* + */ + +#ifndef EXPAND_PICTURE_H +#define EXPAND_PICTURE_H + +#include "typedefs.h" + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#define PADDING_LENGTH 32 // reference extension +#define CHROMA_PADDING_LENGTH 16 // chroma reference extension + +#if defined(X86_ASM) +void ExpandPictureLuma_sse2 (uint8_t* pDst, + const int32_t kiStride, + const int32_t kiPicW, + const int32_t kiPicH); +void ExpandPictureChromaAlign_sse2 (uint8_t* pDst, + const int32_t kiStride, + const int32_t kiPicW, + const int32_t kiPicH); +void ExpandPictureChromaUnalign_sse2 (uint8_t* pDst, + const int32_t kiStride, + const int32_t kiPicW, + const int32_t kiPicH); +#endif//X86_ASM + +#if defined(HAVE_NEON) +void ExpandPictureLuma_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); +void ExpandPictureChroma_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); +#endif +#if defined(HAVE_NEON_AARCH64) +void ExpandPictureLuma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); +void ExpandPictureChroma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, + const int32_t kiPicH); +#endif + +#if defined(HAVE_MMI) +void ExpandPictureLuma_mmi (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, + const int32_t kiPicH); +void ExpandPictureChromaAlign_mmi (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, + const int32_t kiPicH); +void ExpandPictureChromaUnalign_mmi (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, + const int32_t kiPicH); +#endif//HAVE_MMI + +typedef void (*PExpandPictureFunc) (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); + +typedef struct TagExpandPicFunc { + PExpandPictureFunc pfExpandLumaPicture; + PExpandPictureFunc pfExpandChromaPicture[2]; +} SExpandPicFunc; + +void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight); +void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight); + +void ExpandReferencingPicture (uint8_t* pData[3], int32_t iWidth, int32_t iHeight, int32_t iStride[3], + PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]); + +void InitExpandPictureFunc (SExpandPicFunc* pExpandPicFunc, const uint32_t kuiCPUFlags); + +#if defined(__cplusplus) +} +#endif//__cplusplus + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/golomb_common.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/golomb_common.h new file mode 100644 index 000000000..04f87b004 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/golomb_common.h @@ -0,0 +1,166 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file golomb_common.h + * + * \brief Exponential Golomb entropy coding/decoding routine + * + * \date 03/12/2015 Created + * + ************************************************************************************* + */ +#ifndef EXPONENTIAL_GOLOMB_ENTROPY_CODING_COMMON_H__ +#define EXPONENTIAL_GOLOMB_ENTROPY_CODING_COMMON_H__ + +#include "typedefs.h" + +namespace WelsCommon { + +#define WRITE_BE_32(ptr, val) do { \ + (ptr)[0] = (val) >> 24; \ + (ptr)[1] = (val) >> 16; \ + (ptr)[2] = (val) >> 8; \ + (ptr)[3] = (val) >> 0; \ + } while (0) +/************************************************************************/ +/* GOLOMB CODIMG FOR WELS COMMON */ +/************************************************************************/ + + +/*! + * \brief initialize bitstream writing + * + * \param pBs Bit string auxiliary pointer + * \param pBuf bit-stream pBuffer + * \param iSize iSize in bits for decoder; iSize in bytes for encoder + * + * \return iSize of pBuffer pData in byte; failed in -1 return + */ +static inline int32_t InitBits (SBitStringAux* pBs, const uint8_t* kpBuf, const int32_t kiSize) { + uint8_t* ptr = (uint8_t*)kpBuf; + + pBs->pStartBuf = ptr; + pBs->pCurBuf = ptr; + pBs->pEndBuf = ptr + kiSize; + pBs->iLeftBits = 32; + pBs->uiCurBits = 0; + + return kiSize; +} + +static inline int32_t BsWriteBits (PBitStringAux pBitString, int32_t iLen, const uint32_t kuiValue) { + if (iLen < pBitString->iLeftBits) { + pBitString->uiCurBits = (pBitString->uiCurBits << iLen) | kuiValue; + pBitString->iLeftBits -= iLen; + } else { + iLen -= pBitString->iLeftBits; + pBitString->uiCurBits = (pBitString->uiCurBits << pBitString->iLeftBits) | (kuiValue >> iLen); + WRITE_BE_32 (pBitString->pCurBuf, pBitString->uiCurBits); + pBitString->pCurBuf += 4; + pBitString->uiCurBits = kuiValue & ((1 << iLen) - 1); + pBitString->iLeftBits = 32 - iLen; + } + return 0; +} + +/* + * Write 1 bit + */ +static inline int32_t BsWriteOneBit (PBitStringAux pBitString, const uint32_t kuiValue) { + BsWriteBits (pBitString, 1, kuiValue); + return 0; +} + +static inline int32_t BsFlush (PBitStringAux pBitString) { + WRITE_BE_32 (pBitString->pCurBuf, pBitString->uiCurBits << pBitString->iLeftBits); + pBitString->pCurBuf += 4 - pBitString->iLeftBits / 8; + pBitString->iLeftBits = 32; + pBitString->uiCurBits = 0; + return 0; +} + +/* + * Write unsigned exp golomb codes + */ + +static inline int32_t BsWriteUE (PBitStringAux pBitString, const uint32_t kuiValue) { + uint32_t iTmpValue = kuiValue + 1; + if (256 > kuiValue) { + BsWriteBits (pBitString, g_kuiGolombUELength[kuiValue], kuiValue + 1); + } else { + uint32_t n = 0; + if (iTmpValue & 0xffff0000) { + iTmpValue >>= 16; + n += 16; + } + if (iTmpValue & 0xff00) { + iTmpValue >>= 8; + n += 8; + } + + //n += (g_kuiGolombUELength[iTmpValue] >> 1); + + n += (g_kuiGolombUELength[iTmpValue - 1] >> 1); + BsWriteBits (pBitString, (n << 1) + 1, kuiValue + 1); + } + return 0; +} + +/* + * Write signed exp golomb codes + */ +static inline int32_t BsWriteSE (PBitStringAux pBitString, const int32_t kiValue) { + uint32_t iTmpValue; + if (0 == kiValue) { + BsWriteOneBit (pBitString, 1); + } else if (0 < kiValue) { + iTmpValue = (kiValue << 1) - 1; + BsWriteUE (pBitString, iTmpValue); + } else { + iTmpValue = ((-kiValue) << 1); + BsWriteUE (pBitString, iTmpValue); + } + return 0; +} + + +/* + * Write RBSP trailing bits + */ +static inline int32_t BsRbspTrailingBits (PBitStringAux pBitString) { + BsWriteOneBit (pBitString, 1); + BsFlush (pBitString); + + return 0; +} + +} +#endif//EXPONENTIAL_GOLOMB_ENTROPY_CODING_COMMON_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/intra_pred_common.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/intra_pred_common.h new file mode 100644 index 000000000..87cf023fa --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/intra_pred_common.h @@ -0,0 +1,81 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file intra_pred_common.h + * + * \brief interfaces for intra predictor about 16x16. + * + * \date 4/2/2014 Created + * + ************************************************************************************* + */ + +#ifndef INTRA_PRED_COMMON_H +#define INTRA_PRED_COMMON_H + +#include "typedefs.h" + + +void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +//for intra-prediction ASM functions +void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//X86_ASM + +#if defined(HAVE_NEON) +void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_NEON + +#if defined(HAVE_NEON_AARCH64) +void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_NEON_AARCH64 + +#if defined(HAVE_MMI) +void WelsI16x16LumaPredV_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_MMI +#if defined(__cplusplus) +} +#endif//__cplusplus +#endif// + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/ls_defines.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/ls_defines.h new file mode 100644 index 000000000..7b2914ea3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/ls_defines.h @@ -0,0 +1,130 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef ___LD_ST_MACROS___ +#define ___LD_ST_MACROS___ + +#include +#include "typedefs.h" + +#ifdef __GNUC__ + +struct tagUnaligned_64 { + uint64_t l; +} __attribute__ ((packed)) __attribute__ ((may_alias)); +struct tagUnaligned_32 { + uint32_t l; +} __attribute__ ((packed)) __attribute__ ((may_alias)); +struct tagUnaligned_16 { + uint16_t l; +} __attribute__ ((packed)) __attribute__ ((may_alias)); + +#define LD16(a) (((struct tagUnaligned_16 *) (a))->l) +#define LD32(a) (((struct tagUnaligned_32 *) (a))->l) +#define LD64(a) (((struct tagUnaligned_64 *) (a))->l) + +#define STRUCTA(size, align) struct tagUnaligned_##size##_##align {\ + uint##size##_t l; \ +} __attribute__ ((aligned(align))) __attribute__ ((may_alias)) +STRUCTA (16, 2); +STRUCTA (32, 2); +STRUCTA (32, 4); +STRUCTA (64, 2); +STRUCTA (64, 4); +STRUCTA (64, 8); +//#define _USE_STRUCT_INT_CVT +//#ifdef _USE_STRUCT_INT_CVT +#define ST16(a, b) (((struct tagUnaligned_16 *) (a))->l) = (b) +#define ST32(a, b) (((struct tagUnaligned_32 *) (a))->l) = (b) +#define ST64(a, b) (((struct tagUnaligned_64 *) (a))->l) = (b) + +#define LDA(a, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l) +#define STA(a, b, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l) = (b) +#define LD16A2(a) LDA(a, 16, 2) +#define LD32A2(a) LDA(a, 32, 2) +#define LD32A4(a) LDA(a, 32, 4) +#define LD64A2(a) LDA(a, 64, 2) +#define LD64A4(a) LDA(a, 64, 4) +#define LD64A8(a) LDA(a, 64, 8) +#define ST16A2(a, b) STA(a, b, 16, 2) +#define ST32A2(a, b) STA(a, b, 32, 2) +#define ST32A4(a, b) STA(a, b, 32, 4) +#define ST64A2(a, b) STA(a, b, 64, 2) +#define ST64A4(a, b) STA(a, b, 64, 4) +#define ST64A8(a, b) STA(a, b, 64, 8) +//#else +//inline void __ST16(void *dst, uint16_t v) { memcpy(dst, &v, 2); } +//inline void __ST32(void *dst, uint32_t v) { memcpy(dst, &v, 4); } +//inline void __ST64(void *dst, uint64_t v) { memcpy(dst, &v, 8); } +//#endif + +#else + +//#define INTD16(a) (*((int16_t*)(a))) +//#define INTD32(a) (*((int32_t*)(a))) +//#define INTD64(a) (*((int64_t*)(a))) + +#define LD16(a) (*((uint16_t*)(a))) +#define LD32(a) (*((uint32_t*)(a))) +#define LD64(a) (*((uint64_t*)(a))) + +#define ST16(a, b) *((uint16_t*)(a)) = (b) +#define ST32(a, b) *((uint32_t*)(a)) = (b) +#define ST64(a, b) *((uint64_t*)(a)) = (b) +#define LD16A2 LD16 +#define LD32A2 LD32 +#define LD32A4 LD32 +#define LD64A2 LD64 +#define LD64A4 LD64 +#define LD64A8 LD64 +#define ST16A2 ST16 +#define ST32A2 ST32 +#define ST32A4 ST32 +#define ST64A2 ST64 +#define ST64A4 ST64 +#define ST64A8 ST64 + +#endif /* !__GNUC__ */ + +#ifndef INTD16 +#define INTD16 LD16 +#endif//INTD16 + +#ifndef INTD32 +#define INTD32 LD32 +#endif//INTD32 + +#ifndef INTD64 +#define INTD64 LD64 +#endif//INTD64 + +#endif//___LD_ST_MACROS___ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/macros.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/macros.h new file mode 100644 index 000000000..95a7c9298 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/macros.h @@ -0,0 +1,329 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file macros.h + * + * \brief MACRO based tool utilization + * + * \date 3/13/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_MACRO_UTILIZATIONS_H__ +#define WELS_MACRO_UTILIZATIONS_H__ + +#include +#include +#include +#include "typedefs.h" + + +/* +* ENFORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack +* _tp: type +* _nm: var name +* _sz: size +* _al: align bytes +* auxiliary var: _nm ## _tEmP +*/ +#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \ + _tp _nm ## _tEmP[(_sz)+(_al)-1]; \ + _tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp)); + + +#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \ + assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\ + _tp _nm ## _tEmP[(_cx)*(_cy)+(_al)/sizeof(_tp)-1]; \ + _tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \ + _nm ## _tEmP_al -= (((uintptr_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \ + _tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al; + + +#if defined(_MSC_VER) + +#if(_MSC_VER < 1700) +#define inline __inline +#endif + +#define ALIGNED_DECLARE( type, var, n ) __declspec(align(n)) type var + +#elif defined(__GNUC__) + +#define ALIGNED_DECLARE( type, var, n ) type var __attribute__((aligned(n))) +#endif//_MSC_VER + + +#ifndef WELS_ALIGN +#define WELS_ALIGN(x, n) (((x)+(n)-1)&~((n)-1)) +#endif//WELS_ALIGN + + +#if 1 // Alternative implementation of WELS_MAX and WELS_MIN +#ifndef WELS_MAX +#define WELS_MAX(x, y) ((x) > (y) ? (x) : (y)) +#endif//WELS_MAX + +#ifndef WELS_MIN +#define WELS_MIN(x, y) ((x) < (y) ? (x) : (y)) +#endif//WELS_MIN +#ifndef WELS_MIN_POSITIVE +#define WELS_MIN_POSITIVE(x, y) (x >= 0 && y >= 0) ? WELS_MIN(x, y) : WELS_MAX(x, y); +#endif//WELS_MIN_POSITIVE +#else // Alternative implementation of WELS_MAX and WELS_MIN +#ifndef WELS_MAX +#define WELS_MAX(x, y) ((x) - (((x)-(y))&(((x)-(y))>>31))) +#endif//WELS_MAX + +#ifndef WELS_MIN +#define WELS_MIN(x, y) ((y) + (((x)-(y))&(((x)-(y))>>31))) +#endif//WELS_MIN +#endif // Alternative implementation of WELS_MAX and WELS_MIN + + +#ifndef WELS_CEIL +#define WELS_CEIL(x) ceil(x) // FIXME: low complexity instead of math library used +#endif//WELS_CEIL + +#ifndef WELS_FLOOR +#define WELS_FLOOR(x) floor(x) // FIXME: low complexity instead of math library used +#endif//WELS_FLOOR + +#ifndef WELS_ROUND +#define WELS_ROUND(x) ((int32_t)(0.5+(x))) +#endif//WELS_ROUND + +#ifndef WELS_ROUND64 +#define WELS_ROUND64(x) ((int64_t)(0.5+(x))) +#endif//WELS_ROUND + +#ifndef WELS_DIV_ROUND +#define WELS_DIV_ROUND(x,y) ((int32_t)((y)==0?((x)/((y)+1)):(((y)/2+(x))/(y)))) +#endif//WELS_DIV_ROUND + +#ifndef WELS_DIV_ROUND64 +#define WELS_DIV_ROUND64(x,y) ((int64_t)((y)==0?((x)/((y)+1)):(((y)/2+(x))/(y)))) +#endif//WELS_DIV_ROUND64 + +#define WELS_NON_ZERO_COUNT_AVERAGE(nC,nA,nB) { \ + nC = nA + nB + 1; \ + nC >>= (uint8_t)( nA != -1 && nB != -1); \ + nC += (uint8_t)(nA == -1 && nB == -1); \ +} + +static inline int32_t CeilLog2 (int32_t i) { + int32_t s = 0; + i--; + while (i > 0) { + s++; + i >>= 1; + } + return s; +} +/* +the second path will degrades the performance +*/ +#if 1 +static inline int32_t WelsMedian (int32_t iX, int32_t iY, int32_t iZ) { + int32_t iMin = iX, iMax = iX; + + if (iY < iMin) + iMin = iY; + else + iMax = iY; + + if (iZ < iMin) + iMin = iZ; + else if (iZ > iMax) + iMax = iZ; + + return (iX + iY + iZ) - (iMin + iMax); +} +#else +static inline int32_t WelsMedian (int32_t iX, int32_t iY, int32_t iZ) { + int32_t iTmp = (iX - iY) & ((iX - iY) >> 31); + iX -= iTmp; + iY += iTmp; + iY -= (iY - iZ) & ((iY - iZ) >> 31); + iY += (iX - iY) & ((iX - iY) >> 31); + return iY; +} + +#endif + +#ifndef NEG_NUM +//#define NEG_NUM( num ) (-num) +#define NEG_NUM(iX) (1+(~(iX))) +#endif// NEG_NUM + +static inline uint8_t WelsClip1 (int32_t iX) { + uint8_t uiTmp = (uint8_t) (((iX) & ~255) ? (- (iX) >> 31) : (iX)); + return uiTmp; +} + +#ifndef WELS_SIGN +#define WELS_SIGN(iX) ((int32_t)(iX) >> 31) +#endif //WELS_SIGN +#ifndef WELS_ABS +#if 1 +#define WELS_ABS(iX) ((iX)>0 ? (iX) : -(iX)) +#else +#define WELS_ABS(iX) ((WELS_SIGN(iX) ^ (int32_t)(iX)) - WELS_SIGN(iX)) +#endif +#endif //WELS_ABS + +// WELS_CLIP3 +#ifndef WELS_CLIP3 +#define WELS_CLIP3(iX, iY, iZ) ((iX) < (iY) ? (iY) : ((iX) > (iZ) ? (iZ) : (iX))) +#endif //WELS_CLIP3 + +template T WelsClip3(T iX, T iY, T iZ) { + if (iX < iY) + return iY; + if (iX > iZ) + return iZ; + return iX; +} + +#define DISALLOW_COPY_AND_ASSIGN(cclass) \ +private: \ +cclass(const cclass &); \ +cclass& operator=(const cclass &); + +/* + * Description: to check variable validation and return the specified result + * iResult: value to be checked + * iExpected: the expected value + */ +#ifndef WELS_VERIFY_RETURN_IFNEQ +#define WELS_VERIFY_RETURN_IFNEQ(iResult, iExpected) \ + if (iResult != iExpected) { \ + return iResult; \ + } +#endif//#if WELS_VERIFY_RETURN_IF + +/* + * Description: to check variable validation and return the specified result + * iResult: value to be return + * bCaseIf: negative condition to be verified + */ +#ifndef WELS_VERIFY_RETURN_IF +#define WELS_VERIFY_RETURN_IF(iResult, bCaseIf) \ + if (bCaseIf) { \ + return iResult; \ + } +#endif//#if WELS_VERIFY_RETURN_IF + +/* + * Description: to check variable validation and return the specified result + * with correspoinding process advance. + * result: value to be return + * case_if: negative condition to be verified + * proc: process need perform + */ +#ifndef WELS_VERIFY_RETURN_PROC_IF +#define WELS_VERIFY_RETURN_PROC_IF(iResult, bCaseIf, fProc) \ + if (bCaseIf) { \ + fProc; \ + return iResult; \ + } +#endif//#if WELS_VERIFY_RETURN_PROC_IF + +static inline int32_t WELS_LOG2 (uint32_t v) { + int32_t r = 0; + while (v >>= 1) { + ++r; + } + return r; + +} + +#define CLIP3_QP_0_51(q) WELS_CLIP3(q, 0, 51) // ((q) < (0) ? (0) : ((q) > (51) ? (51) : (q))) +#define CALC_BI_STRIDE(width,bitcount) ((((width * bitcount) + 31) & ~31) >> 3) + + + + +#ifndef BUTTERFLY1x2 +#define BUTTERFLY1x2(b) (((b)<<8) | (b)) +#endif//BUTTERFLY1x2 + +#ifndef BUTTERFLY2x4 +#define BUTTERFLY2x4(wd) (((uint32_t)(wd)<<16) |(wd)) +#endif//BUTTERFLY2x4 + +#ifndef BUTTERFLY4x8 +#define BUTTERFLY4x8(dw) (((uint64_t)(dw)<<32) | (dw)) +#endif//BUTTERFLY4x8 + +static inline bool WELS_POWER2_IF (uint32_t v) { + return (v && ! (v & (v - 1))); +} + +#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4) +#define WELS_GCC_UNUSED __attribute__((__unused__)) +#else +#define WELS_GCC_UNUSED +#endif + +inline bool CheckInRangeCloseOpen (const int16_t kiCurrent, const int16_t kiMin, const int16_t kiMax) { + return ((kiCurrent >= kiMin) && (kiCurrent < kiMax)); +} + +static inline void WelsSetMemUint32_c (uint32_t* pDst, uint32_t iValue, int32_t iSizeOfData) { + for (int i = 0; i < iSizeOfData; i++) { + pDst[i] = iValue; + } +} + +static inline void WelsSetMemUint16_c (uint16_t* pDst, uint16_t iValue, int32_t iSizeOfData) { + for (int i = 0; i < iSizeOfData; i++) { + pDst[i] = iValue; + } +} + +inline void WelsSetMemMultiplebytes_c (void* pDst, uint32_t iValue, int32_t iSizeOfData, int32_t iDataLengthOfData) { + assert (4 == iDataLengthOfData || 2 == iDataLengthOfData || 1 == iDataLengthOfData); + + // TODO: consider add assembly for these functions + if (0 != iValue) { + if (4 == iDataLengthOfData) { + WelsSetMemUint32_c (static_cast (pDst), static_cast (iValue), iSizeOfData); + } else if (2 == iDataLengthOfData) { + WelsSetMemUint16_c (static_cast (pDst), static_cast (iValue), iSizeOfData); + } else { + memset (static_cast (pDst), static_cast (iValue), iSizeOfData); + } + } else { + memset (static_cast (pDst), 0, iSizeOfData * iDataLengthOfData); + } +} + +#endif//WELS_MACRO_UTILIZATIONS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/mc.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/mc.h new file mode 100644 index 000000000..8de1ed4c2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/mc.h @@ -0,0 +1,363 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef MC_H +#define MC_H + +#include "typedefs.h" + +typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight); + +typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); +typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, + int32_t, int32_t); + +typedef struct TagMcFunc { + PWelsLumaHalfpelMcFunc pfLumaHalfpelHor; + PWelsLumaHalfpelMcFunc pfLumaHalfpelVer; + PWelsLumaHalfpelMcFunc pfLumaHalfpelCen; + PWelsMcFunc pMcChromaFunc; + + PWelsMcFunc pMcLumaFunc; + PWelsSampleAveragingFunc pfSampleAveraging; +} SMcFunc; + +namespace WelsCommon { + +void InitMcFunc (SMcFunc* pMcFunc, uint32_t iCpu); + +} // namespace WelsCommon + + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(HAVE_NEON) +void McCopyWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); + +void McCopyWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); + +void McCopyWidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); + +void McChromaWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t* pWeights, int32_t iHeight); + +void McChromaWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t* pWeights, int32_t iHeight); + +void PixelAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight); +void PixelAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight); +void PixelAvgWidthEq4_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight); + +void McHorVer01WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer01WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer01WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer03WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer03WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer03WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); + +void McHorVer10WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer10WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer10WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer30WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer30WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer30WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); + +//horizontal filter to gain half sample, that is (2, 0) location in quarter sample +void McHorVer20WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer20WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer20WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); + +//vertical filter to gain half sample, that is (0, 2) location in quarter sample +void McHorVer02WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer02WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer02WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); + +//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample +void McHorVer22WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer22WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer22WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); + +void PixStrideAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, + const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); +void PixStrideAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, + const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); + +void McHorVer20Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// width+1 +void McHorVer20Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// width+1 +void McHorVer20Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// width+1 + +void McHorVer02Height17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// height+1 +void McHorVer02Height9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// height+1 +void McHorVer02Height5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// height+1 + +void McHorVer22Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);//width+1&&height+1 +void McHorVer22Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);//width+1&&height+1 +void McHorVer22Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);//width+1&&height+1 +#endif + +#if defined(HAVE_NEON_AARCH64) +void McCopyWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t* pWeights, int32_t iHeight); +void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t* pWeights, int32_t iHeight); +void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +//horizontal filter to gain half sample, that is (2, 0) location in quarter sample +void McHorVer20WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +//vertical filter to gain half sample, that is (0, 2) location in quarter sample +void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample +void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, + const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); +void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, + const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); +void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// width+1 +void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// width+1 +void McHorVer20Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// width+1 +void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// height+1 +void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// height+1 +void McHorVer02Height5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);// height+1 +void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);//width+1&&height+1 +void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);//width+1&&height+1 +void McHorVer22Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight);//width+1&&height+1 +#endif + +#if defined(X86_ASM) +//***************************************************************************// +// MMXEXT definition // +//***************************************************************************// +void McHorVer20WidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McChromaWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + const uint8_t* kpABCD, int32_t iHeight); +void McCopyWidthEq8_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void PixelAvgWidthEq4_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +void PixelAvgWidthEq8_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); + +//***************************************************************************// +// SSE2 definition // +//***************************************************************************// +void McChromaWidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + const uint8_t* kpABCD, int32_t iHeight); +void McCopyWidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer20WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer20WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer02WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer22Width8HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); +void McHorVer22Width8VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); +void McHorVer22Width8VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); + +void PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); + +void McHorVer20Width9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight); +void McHorVer20Width5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); + +void McHorVer02Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight); +void McHorVer02Height5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); + +void McHorVer22HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride, + int32_t iWidth, + int32_t iHeight); +void McHorVer22Width5HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride, + int32_t iWidth, int32_t iHeight); +void McHorVer22Width4VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); +void McHorVer22Width4VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); + +//***************************************************************************// +// SSE3 definition // +//***************************************************************************// +void McCopyWidthEq16_sse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight); + +//***************************************************************************// +// SSSE3 definition // +//***************************************************************************// +void McChromaWidthEq8_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + const uint8_t* kpABCD, int32_t iHeight); +void McHorVer02_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); +void McHorVer02Width4S16ToU8_ssse3 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); +void McHorVer02Width5S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iHeight); +void McHorVer02WidthGe8S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); +void McHorVer20_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); +void McHorVer20Width4U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); +void McHorVer20Width5Or9Or17_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); +void McHorVer20Width8U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, + int16_t* pDst, int32_t iDstStride, int32_t iHeight); +void McHorVer20Width9Or17U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, + int16_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); + +//***************************************************************************// +// AVX2 definition // +//***************************************************************************// +#ifdef HAVE_AVX2 +void McHorVer02_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); +void McHorVer02Width4S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); +void McHorVer02Width5S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); +void McHorVer02Width8S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); +void McHorVer02Width9S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); +void McHorVer02Width16Or17S16ToU8_avx2 (const int16_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); +void McHorVer20_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); +void McHorVer20Width5Or9Or17_avx2 (const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); +void McHorVer20Width4U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); +void McHorVer20Width8U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); +void McHorVer20Width16U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); +void McHorVer20Width17U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); +#endif //HAVE_AVX2 + +#endif //X86_ASM + +#if defined(__cplusplus) +} +#endif//__cplusplus + +#endif//MC_H diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/measure_time.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/measure_time.h new file mode 100644 index 000000000..e38280897 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/measure_time.h @@ -0,0 +1,88 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file measure_time.h + * + * \brief time cost measure utilization + * + * \date 04/28/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_TIME_COST_MEASURE_UTIL_H__ +#define WELS_TIME_COST_MEASURE_UTIL_H__ + +#include + +#include "typedefs.h" +#ifndef _WIN32 +#include +#else +#include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif//__cplusplus + +/*! + * \brief time cost measure utilization + * \param void + * \return time elapsed since run (unit: microsecond) + */ + +static inline int64_t WelsTime (void) { +#ifndef _WIN32 + struct timeval tv_date; + + gettimeofday (&tv_date, NULL); + return ((int64_t) tv_date.tv_sec * 1000000 + (int64_t) tv_date.tv_usec); +#else + static int64_t iMtimeFreq = 0; + int64_t iMtimeCur = 0; + int64_t iResult = 0; + if (!iMtimeFreq) { + QueryPerformanceFrequency ((LARGE_INTEGER*)&iMtimeFreq); + if (!iMtimeFreq) + iMtimeFreq = 1; + } + QueryPerformanceCounter ((LARGE_INTEGER*)&iMtimeCur); + iResult = (int64_t) ((double)iMtimeCur * 1e6 / (double)iMtimeFreq + 0.5); + return iResult; +#endif//_WIN32 +} + +#ifdef __cplusplus +} +#endif + +#endif//WELS_TIME_COST_MEASURE_UTIL_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/memory_align.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/memory_align.h new file mode 100644 index 000000000..c70c8a292 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/memory_align.h @@ -0,0 +1,116 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#if !defined(WELS_COMMON_MEMORY_ALIGN_H__) +#define WELS_COMMON_MEMORY_ALIGN_H__ + +#include "typedefs.h" + +// NOTE: please do not clean below lines even comment, turn on for potential memory leak verify and memory usage monitor etc. +//#define MEMORY_CHECK +#define MEMORY_MONITOR +#ifdef MEMORY_CHECK +#ifndef MEMORY_MONITOR +#define MEMORY_MONITOR +#endif//MEMORY_MONITOR +#endif//MEMORY_CHECK + + +#ifdef MEMORY_CHECK +#include +#endif//MEMORY_CHECK + +namespace WelsCommon { + +class CMemoryAlign { + public: +CMemoryAlign (const uint32_t kuiCacheLineSize); +virtual ~CMemoryAlign(); + +void* WelsMallocz (const uint32_t kuiSize, const char* kpTag); +void* WelsMalloc (const uint32_t kuiSize, const char* kpTag); +void WelsFree (void* pPointer, const char* kpTag); +const uint32_t WelsGetCacheLineSize() const; +const uint32_t WelsGetMemoryUsage() const; + + private: +// private copy & assign constructors adding to fix klocwork scan issues +CMemoryAlign (const CMemoryAlign& kcMa); +CMemoryAlign& operator= (const CMemoryAlign& kcMa); + + protected: +uint32_t m_nCacheLineSize; + +#ifdef MEMORY_MONITOR +uint32_t m_nMemoryUsageInBytes; +#endif//MEMORY_MONITOR +}; + +/*! +************************************************************************************* +* \brief malloc with zero filled utilization in Wels +* +* \param kuiSize size of memory block required +* +* \return allocated memory pointer exactly, failed in case of NULL return +* +* \note N/A +************************************************************************************* +*/ +void* WelsMallocz (const uint32_t kuiSize, const char* kpTag); + +/*! +************************************************************************************* +* \brief free utilization in Wels +* +* \param pPtr data pointer to be free. +* i.e, uint8_t *pPtr = actual data to be free, argv = &pPtr. +* +* \return NONE +* +* \note N/A +************************************************************************************* +*/ +void WelsFree (void* pPtr, const char* kpTag); + +#define WELS_SAFE_FREE(pPtr, pTag) if (pPtr) { WelsFree(pPtr, pTag); pPtr = NULL; } + +#define WELS_NEW_OP(object, type) \ + (type*)(new object); + +#define WELS_DELETE_OP(p) \ + if(p) delete p; \ + p = NULL; + +} + +#endif//WELS_COMMON_MEMORY_ALIGN_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/msa_macros.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/msa_macros.h new file mode 100644 index 000000000..2eef0e5b8 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/msa_macros.h @@ -0,0 +1,2393 @@ +/* + * Copyright © 2020 Loongson Technology Co. Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Yin Shiyou (yinshiyou-hf@loongson.cn) + * Gu Xiwei (guxiwei-hf@loongson.cn) + */ + +/* + * This header file is copied from loongson LSOM project. + * MSA macros is implemented with msa intrinsics in msa.h, + * and used for simplifing MSA optimization. + */ + +#ifndef _MSA_MACROS_H +#define _MSA_MACROS_H 1 +#define MSA_MACROS_VERSION 18 +#include + +#if (__mips_isa_rev >= 6) + #define LH(psrc) \ + ( { \ + uint16_t val_lh_m = *(uint16_t *)(psrc); \ + val_lh_m; \ + } ) + + #define LW(psrc) \ + ( { \ + uint32_t val_lw_m = *(uint32_t *)(psrc); \ + val_lw_m; \ + } ) + + #if (__mips == 64) + #define LD(psrc) \ + ( { \ + uint64_t val_ld_m = *(uint64_t *)(psrc); \ + val_ld_m; \ + } ) + #else // !(__mips == 64) + #define LD(psrc) \ + ( { \ + uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ + uint32_t val0_ld_m, val1_ld_m; \ + uint64_t val_ld_m = 0; \ + \ + val0_ld_m = LW(psrc_ld_m); \ + val1_ld_m = LW(psrc_ld_m + 4); \ + \ + val_ld_m = (uint64_t) (val1_ld_m); \ + val_ld_m = (uint64_t) ((val_ld_m << 32) & 0xFFFFFFFF00000000); \ + val_ld_m = (uint64_t) (val_ld_m | (uint64_t) val0_ld_m); \ + \ + val_ld_m; \ + } ) + #endif // (__mips == 64) + + #define SH(val, pdst) *(uint16_t *)(pdst) = (val); + #define SW(val, pdst) *(uint32_t *)(pdst) = (val); + #define SD(val, pdst) *(uint64_t *)(pdst) = (val); + +#else // !(__mips_isa_rev >= 6) + #define LH(psrc) \ + ( { \ + uint8_t *psrc_lh_m = (uint8_t *) (psrc); \ + uint16_t val_lh_m; \ + \ + __asm__ volatile ( \ + "ulh %[val_lh_m], %[psrc_lh_m] \n\t" \ + \ + : [val_lh_m] "=r" (val_lh_m) \ + : [psrc_lh_m] "m" (*psrc_lh_m) \ + ); \ + \ + val_lh_m; \ + } ) + + #define LW(psrc) \ + ( { \ + uint8_t *psrc_lw_m = (uint8_t *) (psrc); \ + uint32_t val_lw_m; \ + \ + __asm__ volatile ( \ + "ulw %[val_lw_m], %[psrc_lw_m] \n\t" \ + \ + : [val_lw_m] "=r" (val_lw_m) \ + : [psrc_lw_m] "m" (*psrc_lw_m) \ + ); \ + \ + val_lw_m; \ + } ) + + #if (__mips == 64) + #define LD(psrc) \ + ( { \ + uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ + uint64_t val_ld_m = 0; \ + \ + __asm__ volatile ( \ + "uld %[val_ld_m], %[psrc_ld_m] \n\t" \ + \ + : [val_ld_m] "=r" (val_ld_m) \ + : [psrc_ld_m] "m" (*psrc_ld_m) \ + ); \ + \ + val_ld_m; \ + } ) + #else // !(__mips == 64) + #define LD(psrc) \ + ( { \ + uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ + uint32_t val0_ld_m, val1_ld_m; \ + uint64_t val_ld_m = 0; \ + \ + val0_ld_m = LW(psrc_ld_m); \ + val1_ld_m = LW(psrc_ld_m + 4); \ + \ + val_ld_m = (uint64_t) (val1_ld_m); \ + val_ld_m = (uint64_t) ((val_ld_m << 32) & 0xFFFFFFFF00000000); \ + val_ld_m = (uint64_t) (val_ld_m | (uint64_t) val0_ld_m); \ + \ + val_ld_m; \ + } ) + #endif // (__mips == 64) + + #define SH(val, pdst) \ + { \ + uint8_t *pdst_sh_m = (uint8_t *) (pdst); \ + uint16_t val_sh_m = (val); \ + \ + __asm__ volatile ( \ + "ush %[val_sh_m], %[pdst_sh_m] \n\t" \ + \ + : [pdst_sh_m] "=m" (*pdst_sh_m) \ + : [val_sh_m] "r" (val_sh_m) \ + ); \ + } + + #define SW(val, pdst) \ + { \ + uint8_t *pdst_sw_m = (uint8_t *) (pdst); \ + uint32_t val_sw_m = (val); \ + \ + __asm__ volatile ( \ + "usw %[val_sw_m], %[pdst_sw_m] \n\t" \ + \ + : [pdst_sw_m] "=m" (*pdst_sw_m) \ + : [val_sw_m] "r" (val_sw_m) \ + ); \ + } + + #define SD(val, pdst) \ + { \ + uint8_t *pdst_sd_m = (uint8_t *) (pdst); \ + uint32_t val0_sd_m, val1_sd_m; \ + \ + val0_sd_m = (uint32_t) ((val) & 0x00000000FFFFFFFF); \ + val1_sd_m = (uint32_t) (((val) >> 32) & 0x00000000FFFFFFFF); \ + \ + SW(val0_sd_m, pdst_sd_m); \ + SW(val1_sd_m, pdst_sd_m + 4); \ + } +#endif // (__mips_isa_rev >= 6) + + + + + + +/* Description : Load vector elements with stride. + * Arguments : Inputs - psrc (source pointer to load from) + * - stride + * Outputs - out0, out1... + * Return Type - as per RTYPE + * Details : Loads elements in 'out0' from (psrc). + * Loads elements in 'out1' from (psrc + stride). + */ +#define MSA_LD_V(RTYPE, psrc, out) (out) = *((RTYPE *)(psrc)); + +#define MSA_LD_V2(RTYPE, psrc, stride, out0, out1) \ +{ \ + MSA_LD_V(RTYPE, (psrc), out0); \ + MSA_LD_V(RTYPE, (psrc) + (stride), out1); \ +} + +#define MSA_LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \ +{ \ + MSA_LD_V2(RTYPE, (psrc), stride, out0, out1); \ + MSA_LD_V2(RTYPE, (psrc) + 2 * (stride) , stride, out2, out3); \ +} + +#define MSA_LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, \ + out4, out5, out6, out7) \ +{ \ + MSA_LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ + MSA_LD_V4(RTYPE, (psrc) + 4 * (stride), stride, out4, out5, out6, out7); \ +} + +/* Description : Store vectors with stride. + * Arguments : Inputs - in0, in1... (source vector to be stored) + * - stride + * Outputs - pdst (destination pointer to store to) + * Details : Stores elements from 'in0' to (pdst). + * Stores elements from 'in1' to (pdst + stride). + */ +#define MSA_ST_V(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in); + +#define MSA_ST_V2(RTYPE, in0, in1, pdst, stride) \ +{ \ + MSA_ST_V(RTYPE, in0, (pdst)); \ + MSA_ST_V(RTYPE, in1, (pdst) + (stride)); \ +} + +#define MSA_ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \ +{ \ + MSA_ST_V2(RTYPE, in0, in1, (pdst), stride); \ + MSA_ST_V2(RTYPE, in2, in3, (pdst) + 2 * (stride), stride); \ +} + +#define MSA_ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ +{ \ + MSA_ST_V4(RTYPE, in0, in1, in2, in3, (pdst), stride); \ + MSA_ST_V4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * (stride), stride); \ +} + +/* Description : Store half word elements of vector with stride. + * Arguments : Inputs - in (source vector) + * - pdst (destination pointer to store to) + * - stride + * Details : Stores half word 'idx0' from 'in' to (pdst). + * Stores half word 'idx1' from 'in' to (pdst + stride). + * Similar for other elements. + */ +#define MSA_ST_H(in, idx, pdst) \ +{ \ + uint16_t out0_m; \ + out0_m = __msa_copy_u_h((v8i16) in, idx); \ + SH(out0_m, (pdst)); \ +} +#define MSA_ST_H2(in, idx0, idx1, pdst, stride) \ +{ \ + uint16_t out0_m, out1_m; \ + out0_m = __msa_copy_u_h((v8i16) in, idx0); \ + out1_m = __msa_copy_u_h((v8i16) in, idx1); \ + SH(out0_m, (pdst)); \ + SH(out1_m, (pdst) + stride); \ +} +#define MSA_ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride) \ +{ \ + uint16_t out0_m, out1_m, out2_m, out3_m; \ + out0_m = __msa_copy_u_h((v8i16) in, idx0); \ + out1_m = __msa_copy_u_h((v8i16) in, idx1); \ + out2_m = __msa_copy_u_h((v8i16) in, idx2); \ + out3_m = __msa_copy_u_h((v8i16) in, idx3); \ + SH(out0_m, (pdst)); \ + SH(out1_m, (pdst) + stride); \ + SH(out2_m, (pdst) + 2 * stride); \ + SH(out3_m, (pdst) + 3 * stride); \ +} +#define MSA_ST_H8(in, idx0, idx1, idx2, idx3, idx4, idx5, \ + idx6, idx7, pdst, stride) \ +{ \ + MSA_ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride) \ + MSA_ST_H4(in, idx4, idx5, idx6, idx7, (pdst) + 4*stride, stride) \ +} + +/* Description : Store word elements of vector with stride. + * Arguments : Inputs - in (source vector) + * - pdst (destination pointer to store to) + * - stride + * Details : Stores word 'idx0' from 'in' to (pdst). + * Stores word 'idx1' from 'in' to (pdst + stride). + * Similar for other elements. + */ +#define MSA_ST_W(in, idx, pdst) \ +{ \ + uint32_t out0_m; \ + out0_m = __msa_copy_u_w((v4i32) in, idx); \ + SW(out0_m, (pdst)); \ +} +#define MSA_ST_W2(in, idx0, idx1, pdst, stride) \ +{ \ + uint32_t out0_m, out1_m; \ + out0_m = __msa_copy_u_w((v4i32) in, idx0); \ + out1_m = __msa_copy_u_w((v4i32) in, idx1); \ + SW(out0_m, (pdst)); \ + SW(out1_m, (pdst) + stride); \ +} +#define MSA_ST_W4(in, idx0, idx1, idx2, idx3, pdst, stride) \ +{ \ + uint32_t out0_m, out1_m, out2_m, out3_m; \ + out0_m = __msa_copy_u_w((v4i32) in, idx0); \ + out1_m = __msa_copy_u_w((v4i32) in, idx1); \ + out2_m = __msa_copy_u_w((v4i32) in, idx2); \ + out3_m = __msa_copy_u_w((v4i32) in, idx3); \ + SW(out0_m, (pdst)); \ + SW(out1_m, (pdst) + stride); \ + SW(out2_m, (pdst) + 2*stride); \ + SW(out3_m, (pdst) + 3*stride); \ +} +#define MSA_ST_W8(in0, in1, idx0, idx1, idx2, idx3, \ + idx4, idx5, idx6, idx7, pdst, stride) \ +{ \ + MSA_ST_W4(in0, idx0, idx1, idx2, idx3, pdst, stride) \ + MSA_ST_W4(in1, idx4, idx5, idx6, idx7, pdst + 4*stride, stride) \ +} + +/* Description : Store double word elements of vector with stride. + * Arguments : Inputs - in (source vector) + * - pdst (destination pointer to store to) + * - stride + * Details : Stores double word 'idx0' from 'in' to (pdst). + * Stores double word 'idx1' from 'in' to (pdst + stride). + * Similar for other elements. + */ +#define MSA_ST_D(in, idx, pdst) \ +{ \ + uint64_t out0_m; \ + out0_m = __msa_copy_u_d((v2i64) in, idx); \ + SD(out0_m, (pdst)); \ +} +#define MSA_ST_D2(in, idx0, idx1, pdst, stride) \ +{ \ + uint64_t out0_m, out1_m; \ + out0_m = __msa_copy_u_d((v2i64) in, idx0); \ + out1_m = __msa_copy_u_d((v2i64) in, idx1); \ + SD(out0_m, (pdst)); \ + SD(out1_m, (pdst) + stride); \ +} +#define MSA_ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \ +{ \ + uint64_t out0_m, out1_m, out2_m, out3_m; \ + out0_m = __msa_copy_u_d((v2i64) in0, idx0); \ + out1_m = __msa_copy_u_d((v2i64) in0, idx1); \ + out2_m = __msa_copy_u_d((v2i64) in1, idx2); \ + out3_m = __msa_copy_u_d((v2i64) in1, idx3); \ + SD(out0_m, (pdst)); \ + SD(out1_m, (pdst) + stride); \ + SD(out2_m, (pdst) + 2 * stride); \ + SD(out3_m, (pdst) + 3 * stride); \ +} +#define MSA_ST_D8(in0, in1, in2, in3, idx0, idx1, idx2, idx3, \ + idx4, idx5, idx6, idx7, pdst, stride) \ +{ \ + MSA_ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \ + MSA_ST_D4(in2, in3, idx4, idx5, idx6, idx7, pdst + 4 * stride, stride) \ +} + +/* Description : Shuffle byte vector elements as per mask vector. + * Arguments : Inputs - in0, in1 (source vectors) + * - mask (mask vectors) + * Outputs - out (dstination vectors) + * Return Type - as per RTYPE + * Details : Selective byte elements from 'in0' & 'in1' are copied to 'out' as + * per control vector 'mask'. + */ +#define MSA_VSHF_B(RTYPE, in0, in1, mask, out) \ +{ \ + out = (RTYPE) __msa_vshf_b((v16i8) mask, (v16i8) in0, (v16i8) in1); \ +} + +#define MSA_VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ +{ \ + MSA_VSHF_B(RTYPE, in0, in1, mask0, out0) \ + MSA_VSHF_B(RTYPE, in2, in3, mask1, out1) \ +} + +#define MSA_VSHF_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + mask0, mask1, mask2, mask3, out0, out1, out2, out3) \ +{ \ + MSA_VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \ + MSA_VSHF_B2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \ +} + +/* Description : Shuffle halfword vector elements as per mask vector. + * Arguments : Inputs - in0, in1 (source vectors) + * - mask (mask vectors) + * Outputs - out (dstination vectors) + * Return Type - as per RTYPE + * Details : Selective halfword elements from 'in0' & 'in1' are copied to 'out' as + * per control vector 'mask'. + */ +#define MSA_VSHF_H(RTYPE, in0, in1, mask, out) \ +{ \ + out = (RTYPE) __msa_vshf_h((v8i16) mask, (v8i16) in0, (v8i16) in1); \ +} + +#define MSA_VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ +{ \ + MSA_VSHF_H(RTYPE, in0, in1, mask0, out0) \ + MSA_VSHF_H(RTYPE, in2, in3, mask1, out1) \ +} + +#define MSA_VSHF_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + mask0, mask1, mask2, mask3, out0, out1, out2, out3) \ +{ \ + MSA_VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \ + MSA_VSHF_H2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \ +} + +/* Description : Shuffle word vector elements as per mask vector. + * Arguments : Inputs - in0, in1 (source vectors) + * - mask (mask vectors) + * Outputs - out (dstination vectors) + * Return Type - as per RTYPE + * Details : Selective word elements from 'in0' & 'in1' are copied to 'out' as + * per control vector 'mask'. + */ +#define MSA_VSHF_W(RTYPE, in0, in1, mask, out) \ +{ \ + out = (RTYPE) __msa_vshf_w((v4i32) mask, (v4i32) in0, (v4i32) in1); \ +} + +#define MSA_VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ +{ \ + MSA_VSHF_W(RTYPE, in0, in1, mask0, out0) \ + MSA_VSHF_W(RTYPE, in2, in3, mask1, out1) \ +} + +#define MSA_VSHF_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + mask0, mask1, mask2, mask3, out0, out1, out2, out3) \ +{ \ + MSA_VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \ + MSA_VSHF_W2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \ +} + +/* Description : Interleave even byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even byte elements of 'in0' and even byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_B(RTYPE, in0, in1, out0); \ + MSA_ILVEV_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave even half word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even half word elements of 'in0' and even half word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_H(RTYPE, in0, in1, out0); \ + MSA_ILVEV_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave even word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even word elements of 'in0' and even word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_w((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_W(RTYPE, in0, in1, out0); \ + MSA_ILVEV_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave even double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even double word elements of 'in0' and even double word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_D(RTYPE, in0, in1, out0); \ + MSA_ILVEV_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd byte elements of 'in0' and odd byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_B(RTYPE, in0, in1, out0); \ + MSA_ILVOD_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd half word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd half word elements of 'in0' and odd half word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_H(RTYPE, in0, in1, out0); \ + MSA_ILVOD_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd word elements of 'in0' and odd word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_ILVOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_W(RTYPE, in0, in1, out0); \ + MSA_ILVOD_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd double word elements of 'in0' and odd double word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_D(RTYPE, in0, in1, out0); \ + MSA_ILVOD_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of byte elements of 'in0' and left half of byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_B(RTYPE, in0, in1, out0); \ + MSA_ILVL_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of halfword elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of halfword elements of 'in0' and left half of halfword + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_H(RTYPE, in0, in1, out0); \ + MSA_ILVL_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of word elements of 'in0' and left half of word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_W(RTYPE, in0, in1, out0); \ + MSA_ILVL_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of double word elements of 'in0' and left half of + * double word elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_D(RTYPE, in0, in1, out0); \ + MSA_ILVL_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of byte elements of 'in0' and right half of byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_B(RTYPE, in0, in1, out0); \ + MSA_ILVR_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of halfword elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of halfword elements of 'in0' and right half of halfword + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_H(RTYPE, in0, in1, out0); \ + MSA_ILVR_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of word elements of 'in0' and right half of word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_W(RTYPE, in0, in1, out0); \ + MSA_ILVR_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of double word elements of 'in0' and right half of + * double word elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_D(RTYPE, in0, in1, out0); \ + MSA_ILVR_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of byte elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of byte elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_B2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_B(RTYPE, in0, in1, out0); \ + MSA_ILVL_B(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_B4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_B2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_B2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of halfword elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of halfword elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_H2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_H(RTYPE, in0, in1, out0); \ + MSA_ILVL_H(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_H4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_H2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_H2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of word elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of word elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_W2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_W(RTYPE, in0, in1, out0); \ + MSA_ILVL_W(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_W4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_W2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_W2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of double word elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of double word elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_D2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_D(RTYPE, in0, in1, out0); \ + MSA_ILVL_D(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_D4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_D2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_D2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Indexed byte elements are replicated to all elements in + * output vector. + * Arguments : Inputs - in, idx + * Outputs - out + * Return Type - as per RTYPE + * Details : 'idx' element value from 'in' vector is replicated to all + * elements in 'out' vector. + * Valid index range for halfword operation is 0-7. + */ +#define MSA_SPLATI_B(RTYPE, in, idx, out) \ +{ \ + out = (RTYPE) __msa_splati_b((v16i8) in, idx); \ +} + +#define MSA_SPLATI_B2(RTYPE, in, idx0, idx1, out0, out1) \ +{ \ + MSA_SPLATI_B(RTYPE, in, idx0, out0) \ + MSA_SPLATI_B(RTYPE, in, idx1, out1) \ +} + +#define MSA_SPLATI_B4(RTYPE, in, idx0, idx1, idx2, idx3, \ + out0, out1, out2, out3) \ +{ \ + MSA_SPLATI_B2(RTYPE, in, idx0, idx1, out0, out1) \ + MSA_SPLATI_B2(RTYPE, in, idx2, idx3, out2, out3) \ +} + +/* Description : Indexed halfword elements are replicated to all elements in + * output vector. + * Arguments : Inputs - in, idx + * Outputs - out + * Return Type - as per RTYPE + * Details : 'idx' element value from 'in' vector is replicated to all + * elements in 'out' vector. + * Valid index range for halfword operation is 0-7. + */ +#define MSA_SPLATI_H(RTYPE, in, idx, out) \ +{ \ + out = (RTYPE) __msa_splati_h((v8i16) in, idx); \ +} + +#define MSA_SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \ +{ \ + MSA_SPLATI_H(RTYPE, in, idx0, out0) \ + MSA_SPLATI_H(RTYPE, in, idx1, out1) \ +} + +#define MSA_SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, \ + out0, out1, out2, out3) \ +{ \ + MSA_SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \ + MSA_SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3) \ +} + +/* Description : Indexed word elements are replicated to all elements in + * output vector. + * Arguments : Inputs - in, idx + * Outputs - out + * Return Type - as per RTYPE + * Details : 'idx' element value from 'in' vector is replicated to all + * elements in 'out' vector. + * Valid index range for halfword operation is 0-3. + */ +#define MSA_SPLATI_W(RTYPE, in, idx, out) \ +{ \ + out = (RTYPE) __msa_splati_w((v4i32) in, idx); \ +} + +#define MSA_SPLATI_W2(RTYPE, in, idx0, idx1, out0, out1) \ +{ \ + MSA_SPLATI_W(RTYPE, in, idx0, out0) \ + MSA_SPLATI_W(RTYPE, in, idx1, out1) \ +} + +#define MSA_SPLATI_W4(RTYPE, in, idx0, idx1, idx2, idx3, \ + out0, out1, out2, out3) \ +{ \ + MSA_SPLATI_W2(RTYPE, in, idx0, idx1, out0, out1) \ + MSA_SPLATI_W2(RTYPE, in, idx2, idx3, out2, out3) \ +} + +/* Description : Pack even byte elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even byte elements of 'in0' are copied to the left half of + * 'out' & even byte elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKEV_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_B(RTYPE, in0, in1, out0) \ + MSA_PCKEV_B(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack even halfword elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even halfword elements of 'in0' are copied to the left half of + * 'out' & even halfword elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKEV_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_H(RTYPE, in0, in1, out0) \ + MSA_PCKEV_H(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack even word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even word elements of 'in0' are copied to the left half of + * 'out' & even word elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKEV_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_PCKEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_W(RTYPE, in0, in1, out0) \ + MSA_PCKEV_W(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_W4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_W2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack even double word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even double word elements of 'in0' are copied to the left + * half of 'out' & even double word elements of 'in1' are + * copied to the right half of 'out'. + */ +#define MSA_PCKEV_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_D(RTYPE, in0, in1, out0) \ + MSA_PCKEV_D(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd byte elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd byte elements of 'in0' are copied to the left half of + * 'out' & odd byte elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKOD_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_B(RTYPE, in0, in1, out0) \ + MSA_PCKOD_B(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_B4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_B2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd halfword elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd halfword elements of 'in0' are copied to the left half of + * 'out' & odd halfword elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKOD_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_H(RTYPE, in0, in1, out0) \ + MSA_PCKOD_H(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_H2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd word elements of 'in0' are copied to the left half of + * 'out' & odd word elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKOD_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_PCKOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_W(RTYPE, in0, in1, out0) \ + MSA_PCKOD_W(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_W4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_W2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd double word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd double word elements of 'in0' are copied to the left + * half of 'out' & odd double word elements of 'in1' are + * copied to the right half of 'out'. + */ +#define MSA_PCKOD_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_D(RTYPE, in0, in1, out0) \ + MSA_PCKOD_D(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_D4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_D2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Dot product of unsigned byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned byte elements from 'mult' are multiplied with + * unsigned byte elements from 'cnst' producing a result + * twice the size of input i.e. unsigned halfword. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_UB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_u_h((v16u8) mult, (v16u8) cnst); \ +} + +#define MSA_DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_UB(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_UB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product of signed byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed byte elements from 'mult' are multiplied with + * signed byte elements from 'cnst' producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_SB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_s_h((v16i8) mult, (v16i8) cnst); \ +} + +#define MSA_DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_SB(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_SB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product of unsigned halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned halfword elements from 'mult' are multiplied with + * unsigned halfword elements from 'cnst' producing a result + * twice the size of input i.e. unsigned word. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_UH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_u_w((v8u16) mult, (v8u16) cnst); \ +} + +#define MSA_DOTP_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_UH(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_UH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_UH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_UH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product of signed halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed halfword elements from 'mult' are multiplied with + * signed halfword elements from 'cnst' producing a result + * twice the size of input i.e. signed word. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_SH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_s_w((v8i16) mult, (v8i16) cnst); \ +} + +#define MSA_DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_SH(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_SH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of unsigned byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned byte elements from 'mult' are multiplied with + * unsigned byte elements from 'cnst' producing a result + * twice the size of input i.e. unsigned halfword. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_UB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_u_h((v8u16) out, \ + (v16u8) mult, (v16u8) cnst); \ +} + +#define MSA_DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_UB(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_UB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_UB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of signed byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed byte elements from 'mult' are multiplied with + * signed byte elements from 'cnst' producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_SB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_s_h((v8i16) out, \ + (v16i8) mult, (v16i8) cnst); \ +} + +#define MSA_DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_SB(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_SB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of unsigned halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned halfword elements from 'mult' are multiplied with + * unsigned halfword elements from 'cnst' producing a result + * twice the size of input i.e. unsigned word. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_UH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_u_w((v4u32) out, \ + (v8u16) mult, (v8u16) cnst); \ +} + +#define MSA_DPADD_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_UH(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_UH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_UH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_UH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of signed halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed halfword elements from 'mult' are multiplied with + * signed halfword elements from 'cnst' producing a result + * twice the size of input i.e. signed word. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_SH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_s_w((v4i32) out, \ + (v8i16) mult, (v8i16) cnst); \ +} + +#define MSA_DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_SH(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_SH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_SH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Clip all signed halfword elements of input vector between min & max. + * out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in)). + * Arguments : Inputs - in (input vector) + * - min (min threshold) + * - max (max threshold) + * Outputs - in (output vector with clipped elements) + * Note : type of 'in' must be v8i16. + */ +#define MSA_CLIP_SH(in, min, max) \ +{ \ + in = __msa_max_s_h((v8i16) min, (v8i16) in); \ + in = __msa_min_s_h((v8i16) max, (v8i16) in); \ +} + +/* Description : Clip all signed halfword elements of input vector between 0 & 255. + * Arguments : Inputs - in (input vector) + * Outputs - in (output vector with clipped elements) + * Note : type of 'in' must be v8i16. + */ +#define MSA_CLIP_SH_0_255(in) \ +{ \ + in = __msa_maxi_s_h((v8i16) in, 0); \ + in = (v8i16) __msa_sat_u_h((v8u16) in, 7); \ +} + +#define MSA_CLIP_SH2_0_255(in0, in1) \ +{ \ + MSA_CLIP_SH_0_255(in0); \ + MSA_CLIP_SH_0_255(in1); \ +} + +#define MSA_CLIP_SH4_0_255(in0, in1, in2, in3) \ +{ \ + MSA_CLIP_SH2_0_255(in0, in1); \ + MSA_CLIP_SH2_0_255(in2, in3); \ +} + +#define MSA_CLIP_SH8_0_255(in0, in1, in2, in3, \ + in4, in5, in6, in7) \ +{ \ + MSA_CLIP_SH4_0_255(in0, in1, in2, in3); \ + MSA_CLIP_SH4_0_255(in4, in5, in6, in7); \ +} + +/* Description : Clip all signed word elements of input vector between 0 & 255. + * Arguments : Inputs - in (input vector) + * Outputs - in (output vector with clipped elements) + * Note : type of 'in' must be v4i32. + */ +#define MSA_CLIP_SW_0_255(in) \ +{ \ + in = __msa_maxi_s_w((v4i32) in, 0); \ + in = (v4i32) __msa_sat_u_w((v4u32) in, 7); \ +} + +#define MSA_CLIP_SW2_0_255(in0, in1) \ +{ \ + MSA_CLIP_SW_0_255(in0); \ + MSA_CLIP_SW_0_255(in1); \ +} + +#define MSA_CLIP_SW4_0_255(in0, in1, in2, in3) \ +{ \ + MSA_CLIP_SW2_0_255(in0, in1); \ + MSA_CLIP_SW2_0_255(in2, in3); \ +} + +#define MSA_CLIP_SW8_0_255(in0, in1, in2, in3, \ + in4, in5, in6, in7) \ +{ \ + MSA_CLIP_SW4_0_255(in0, in1, in2, in3); \ + MSA_CLIP_SW4_0_255(in4, in5, in6, in7); \ +} + +/* Description : Addition of 16 unsigned byte elements. + * 16 unsigned byte elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (unsigned byte vector) + * Outputs - sum_m (u32 sum) + * Return Type - unsigned word + */ +#define MSA_HADD_UB_U32(in, sum_m) \ +{ \ + v8u16 res_m; \ + v4u32 res0_m; \ + v2u64 res1_m, res2_m; \ + \ + res_m = __msa_hadd_u_h((v16u8) in, (v16u8) in); \ + res0_m = __msa_hadd_u_w(res_m, res_m); \ + res1_m = __msa_hadd_u_d(res0_m, res0_m); \ + res2_m = (v2u64) __msa_splati_d((v2i64) res1_m, 1); \ + res1_m += res2_m; \ + sum_m = __msa_copy_u_w((v4i32) res1_m, 0); \ +} + +/* Description : Addition of 8 unsigned halfword elements. + * 8 unsigned halfword elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (unsigned halfword vector) + * Outputs - sum_m (u32 sum) + * Return Type - unsigned word + */ +#define MSA_HADD_UH_U32(in, sum_m) \ +{ \ + v4u32 res_m; \ + v2u64 res0_m, res1_m; \ + \ + res_m = __msa_hadd_u_w((v8u16) in, (v8u16) in); \ + res0_m = __msa_hadd_u_d(res_m, res_m); \ + res1_m = (v2u64) __msa_splati_d((v2i64) res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_u_w((v4i32) res0_m, 0); \ +} + +/* Description : Addition of 4 unsigned word elements. + * 4 unsigned word elements of input vector are added together and + * resulted integer sum is returned. + * Arguments : Inputs - in (unsigned word vector) + * Outputs - sum_m (u32 sum) + * Return Type - unsigned word + */ +#define MSA_HADD_UW_U32(in, sum_m) \ +{ \ + v2u64 res0_m, res1_m; \ + \ + res0_m = __msa_hadd_u_d((v4u32) in, (v4u32) in); \ + res1_m = (v2u64) __msa_splati_d((v2i64) res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_u_w((v4i32) res0_m, 0); \ +} + +/* Description : Addition of 16 signed byte elements. + * 16 signed byte elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (signed byte vector) + * Outputs - sum_m (i32 sum) + * Return Type - signed word + */ +#define MSA_HADD_SB_S32(in, sum_m) \ +{ \ + v8i16 res_m; \ + v4i32 res0_m; \ + v2i64 res1_m, res2_m; \ + \ + res_m = __msa_hadd_s_h((v16i8) in, (v16i8) in); \ + res0_m = __msa_hadd_s_w(res_m, res_m); \ + res1_m = __msa_hadd_s_d(res0_m, res0_m); \ + res2_m = __msa_splati_d(res1_m, 1); \ + res1_m += res2_m; \ + sum_m = __msa_copy_s_w((v4i32) res1_m, 0); \ +} + +/* Description : Addition of 8 signed halfword elements. + * 8 signed halfword elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (signed halfword vector) + * Outputs - sum_m (i32 sum) + * Return Type - signed word + */ +#define MSA_HADD_SH_S32(in, sum_m) \ +{ \ + v4i32 res_m; \ + v2i64 res0_m, res1_m; \ + \ + res_m = __msa_hadd_s_w((v8i16) in, (v8i16) in); \ + res0_m = __msa_hadd_s_d(res_m, res_m); \ + res1_m = __msa_splati_d(res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_s_w((v4i32) res0_m, 0); \ +} + +/* Description : Addition of 4 signed word elements. + * 4 signed word elements of input vector are added together and + * resulted integer sum is returned. + * Arguments : Inputs - in (signed word vector) + * Outputs - sum_m (i32 sum) + * Return Type - signed word + */ +#define MSA_HADD_SW_S32(in, sum_m) \ +{ \ + v2i64 res0_m, res1_m; \ + \ + res0_m = __msa_hadd_s_d((v4i32) in, (v4i32) in); \ + res1_m = __msa_splati_d(res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_s_w((v4i32) res0_m, 0); \ +} + +/* Description : Saturate the unsigned halfword element values to the max + * unsigned value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v8u16 + * Details : Each unsigned halfword element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_UH(in, sat_val) \ +{ \ + in = __msa_sat_u_h(in, sat_val); \ +} + +#define MSA_SAT_UH2(in0, in1, sat_val) \ +{ \ + MSA_SAT_UH(in0, sat_val) \ + MSA_SAT_UH(in1, sat_val) \ +} + +#define MSA_SAT_UH4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_UH2(in0, in1, sat_val) \ + MSA_SAT_UH2(in2, in3, sat_val) \ +} + +/* Description : Saturate the signed halfword element values to the max + * signed value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v8i16 + * Details : Each signed halfword element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_SH(in, sat_val) \ +{ \ + in = __msa_sat_s_h(in, sat_val); \ +} + +#define MSA_SAT_SH2(in0, in1, sat_val) \ +{ \ + MSA_SAT_SH(in0, sat_val) \ + MSA_SAT_SH(in1, sat_val) \ +} + +#define MSA_SAT_SH4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_SH2(in0, in1, sat_val) \ + MSA_SAT_SH2(in2, in3, sat_val) \ +} + +/* Description : Saturate the unsigned word element values to the max + * unsigned value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v4u32 + * Details : Each unsigned word element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_UW(in, sat_val) \ +{ \ + in = __msa_sat_u_w(in, sat_val); \ +} + +#define MSA_SAT_UW2(in0, in1, sat_val) \ +{ \ + MSA_SAT_UW(in0, sat_val) \ + MSA_SAT_UW(in1, sat_val) \ +} + +#define MSA_SAT_UW4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_UW2(in0, in1, sat_val) \ + MSA_SAT_UW2(in2, in3, sat_val) \ +} + +/* Description : Saturate the signed word element values to the max + * signed value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v4i32 + * Details : Each signed word element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_SW(in, sat_val) \ +{ \ + in = __msa_sat_s_w(in, sat_val); \ +} + +#define MSA_SAT_SW2(in0, in1, sat_val) \ +{ \ + MSA_SAT_SW(in0, sat_val) \ + MSA_SAT_SW(in1, sat_val) \ +} + +#define MSA_SAT_SW4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_SW2(in0, in1, sat_val) \ + MSA_SAT_SW2(in2, in3, sat_val) \ +} + +/* Description : Each byte element is logically xor'ed with immediate 128. + * Arguments : Inputs - in + * Outputs - in (in-place) + * Return Type - as per RTYPE + * Details : Each unsigned byte element from input vector 'in' is + * logically xor'ed with 128 and result is in-place stored in + * 'in' vector. + */ +#define MSA_XORI_B_128(RTYPE, in) \ +{ \ + in = (RTYPE) __msa_xori_b((v16u8) in, 128); \ +} + +#define MSA_XORI_B2_128(RTYPE, in0, in1) \ +{ \ + MSA_XORI_B_128(RTYPE, in0); \ + MSA_XORI_B_128(RTYPE, in1); \ +} + +#define MSA_XORI_B4_128(RTYPE, in0, in1, in2, in3) \ +{ \ + MSA_XORI_B2_128(RTYPE, in0, in1); \ + MSA_XORI_B2_128(RTYPE, in2, in3); \ +} + +/* Description : Shift right logical all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRL_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_B(RTYPE, in0, shift); \ + MSA_SRL_B(RTYPE, in1, shift); \ +} + +#define MSA_SRL_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_B2(RTYPE, in0, in1, shift); \ + MSA_SRL_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRL_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_H(RTYPE, in0, shift); \ + MSA_SRL_H(RTYPE, in1, shift); \ +} + +#define MSA_SRL_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_H2(RTYPE, in0, in1, shift); \ + MSA_SRL_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRL_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_W(RTYPE, in0, shift); \ + MSA_SRL_W(RTYPE, in1, shift); \ +} + +#define MSA_SRL_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_W2(RTYPE, in0, in1, shift); \ + MSA_SRL_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical all double word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRL_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_D(RTYPE, in0, shift); \ + MSA_SRL_D(RTYPE, in1, shift); \ +} + +#define MSA_SRL_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_D2(RTYPE, in0, in1, shift); \ + MSA_SRL_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRLR_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_B(RTYPE, in0, shift); \ + MSA_SRLR_B(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_B2(RTYPE, in0, in1, shift); \ + MSA_SRLR_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRLR_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_H(RTYPE, in0, shift); \ + MSA_SRLR_H(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_H2(RTYPE, in0, in1, shift); \ + MSA_SRLR_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRLR_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_W(RTYPE, in0, shift); \ + MSA_SRLR_W(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_W2(RTYPE, in0, in1, shift); \ + MSA_SRLR_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all double word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRLR_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_D(RTYPE, in0, shift); \ + MSA_SRLR_D(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_D2(RTYPE, in0, in1, shift); \ + MSA_SRLR_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRAR_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_B(RTYPE, in0, shift); \ + MSA_SRAR_B(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_B2(RTYPE, in0, in1, shift); \ + MSA_SRAR_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRAR_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_H(RTYPE, in0, shift); \ + MSA_SRAR_H(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_H2(RTYPE, in0, in1, shift); \ + MSA_SRAR_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRAR_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_W(RTYPE, in0, shift); \ + MSA_SRAR_W(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_W2(RTYPE, in0, in1, shift); \ + MSA_SRAR_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all double word elements + * of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRAR_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_D(RTYPE, in0, shift); \ + MSA_SRAR_D(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_D2(RTYPE, in0, in1, shift); \ + MSA_SRAR_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in vector + * 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRARI_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_B(RTYPE, in0, shift); \ + MSA_SRARI_B(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_B2(RTYPE, in0, in1, shift); \ + MSA_SRARI_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in vector + * 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRARI_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_H(RTYPE, in0, shift); \ + MSA_SRARI_H(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_H2(RTYPE, in0, in1, shift); \ + MSA_SRARI_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in vector + * 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRARI_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_W(RTYPE, in0, shift); \ + MSA_SRARI_W(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_W2(RTYPE, in0, in1, shift); \ + MSA_SRARI_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all double word elements + * of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRARI_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_D(RTYPE, in0, shift); \ + MSA_SRARI_D(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_D2(RTYPE, in0, in1, shift); \ + MSA_SRARI_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Transposes input 4x4 byte block. + * Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block) + * Outputs - out0, out1, out2, out3 (output 4x4 byte block) + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE4x4_B(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + v16i8 zero_m = { 0 }; \ + \ + MSA_ILVR_B2(RTYPE, in2, in0, in3, in1, out2, out3); \ + out0 = (RTYPE) __msa_ilvr_b((v16i8) out3, (v16i8) out2); \ + out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 4); \ + out2 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out1, 4); \ + out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 4); \ +} + +/* Description : Transposes input 8x4 byte block into 4x8. + * Arguments : Inputs - in0, in1, in2 ~ in7 (input 8x4 byte block) + * Outputs - out0, out1, out2, out3 (output 4x8 byte block) + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x4_B(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + v16i8 zero_m = { 0 }; \ + \ + MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \ + out0, out1, out2, out3); \ + MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \ + out0 = (RTYPE) __msa_ilvr_b((v16i8) out3, (v16i8) out2); \ + out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 8); \ + out2 = (RTYPE) __msa_ilvl_b((v16i8) out3, (v16i8) out2); \ + out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 8); \ +} + +/* Description : Transposes 16x4 block into 4x16 with byte elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, + * in8, in9, in10, in11, in12, in13, in14, in15 + * Outputs - out0, out1, out2, out3 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE16x4_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + in8, in9, in10, in11, in12, in13, in14, in15, \ + out0, out1, out2, out3) \ +{ \ + v2i64 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + \ + MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \ + out0, out1, out2, out3); \ + MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \ + MSA_ILVRL_B2(v2i64, out3, out2, tmp0_m, tmp1_m); \ + \ + MSA_ILVR_B4(RTYPE, in10, in8, in11, in9, in14, in12, in15, in13, \ + out0, out1, out2, out3); \ + MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \ + MSA_ILVRL_B2(v2i64, out3, out2, tmp2_m, tmp3_m); \ + \ + MSA_ILVRL_D4(RTYPE, tmp2_m, tmp0_m, tmp3_m, tmp1_m, \ + out0, out1, out2, out3); \ +} + +/* Description : Transposes input 8x8 byte block. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 + * (input 8x8 byte block) + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * (output 8x8 byte block) + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x8_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ +{ \ + v16i8 zero_m = {0}; \ + \ + MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \ + out0, out1, out2, out3); \ + MSA_ILVRL_B4(RTYPE, out1, out0, out3, out2, out4, out5, out6, out7); \ + MSA_ILVRL_W4(RTYPE, out6, out4, out7, out5, out0, out2, out4, out6); \ + out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 8); \ + out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 8); \ + out5 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out4, 8); \ + out7 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out6, 8); \ +} + +/* Description : Transposes 16x8 block into 8x16 with byte elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, + * in8, in9, in10, in11, in12, in13, in14, in15 + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE16x8_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + in8, in9, in10, in11, in12, in13, in14, in15, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ +{ \ + v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + \ + MSA_ILVEV_D4(RTYPE, in8, in0, in9, in1, in10, in2, in11, in3, \ + out7, out6, out5, out4); \ + MSA_ILVEV_D4(RTYPE, in12, in4, in13, in5, in14, in6, in15, in7, \ + out3, out2, out1, out0); \ + \ + tmp0_m = __msa_ilvev_b((v16i8) out6, (v16i8) out7); \ + tmp1_m = __msa_ilvod_b((v16i8) out6, (v16i8) out7); \ + out6 = (RTYPE) __msa_ilvev_b((v16i8) out4, (v16i8) out5); \ + out5 = (RTYPE) __msa_ilvod_b((v16i8) out4, (v16i8) out5); \ + tmp2_m = __msa_ilvev_b((v16i8) out2, (v16i8) out3); \ + tmp3_m = __msa_ilvod_b((v16i8) out2, (v16i8) out3); \ + out2 = (RTYPE) __msa_ilvev_b((v16i8) out0, (v16i8) out1); \ + out1 = (RTYPE) __msa_ilvod_b((v16i8) out0, (v16i8) out1); \ + \ + MSA_ILVEV_H2(RTYPE, out6, tmp0_m, out2, tmp2_m, out3, out7); \ + out0 = (RTYPE) __msa_ilvev_w((v4i32) out7, (v4i32) out3); \ + out4 = (RTYPE) __msa_ilvod_w((v4i32) out7, (v4i32) out3); \ + \ + MSA_ILVOD_H2(RTYPE, out6, tmp0_m, out2, tmp2_m, out3, out7); \ + out2 = (RTYPE) __msa_ilvev_w((v4i32) out7, (v4i32) out3); \ + out6 = (RTYPE) __msa_ilvod_w((v4i32) out7, (v4i32) out3); \ + \ + MSA_ILVOD_H2(v16i8, out5, tmp1_m, out1, tmp3_m, tmp0_m, tmp2_m); \ + out3 = (RTYPE) __msa_ilvev_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ + out7 = (RTYPE) __msa_ilvod_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ + \ + MSA_ILVEV_H2(v16i8, out5, tmp1_m, out1, tmp3_m, tmp0_m, tmp2_m); \ + out1 = (RTYPE) __msa_ilvev_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ + out5 = (RTYPE) __msa_ilvod_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ +} + +/* Description : Transposes 4x4 block with half word elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3 + * Outputs - out0, out1, out2, out3 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE4x4_H(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_H2(RTYPE, in1, in0, in3, in2, out1, out3); \ + MSA_ILVRL_W2(RTYPE, out3, out1, out0, out2); \ + MSA_ILVL_D2(RTYPE, out0, out0, out2, out2, out1, out3); \ +} + +/* Description : Transposes 8x4 block with half word elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 + * Outputs - out0, out1, out2, out3 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x4_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + v8i16 s0_m, s1_m; \ + v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + \ + MSA_ILVR_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp0_m, tmp1_m); \ + MSA_ILVR_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp2_m, tmp3_m); \ + MSA_PCKEV_D2(RTYPE, tmp0_m, tmp2_m, tmp1_m, tmp3_m, out0, out2); \ + MSA_PCKOD_D2(RTYPE, tmp0_m, tmp2_m, tmp1_m, tmp3_m, out1, out3); \ +} + +/* Description : Transposes 8x8 block with half word elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ +{ \ + v8i16 s0_m, s1_m; \ + v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \ + \ + MSA_ILVR_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp0_m, tmp1_m); \ + MSA_ILVL_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp2_m, tmp3_m); \ + MSA_ILVR_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp4_m, tmp5_m); \ + MSA_ILVL_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp6_m, tmp7_m); \ + MSA_PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \ + tmp3_m, tmp7_m, out0, out2, out4, out6); \ + MSA_PCKOD_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \ + tmp3_m, tmp7_m, out1, out3, out5, out7); \ +} + +#endif /* _MSA_MACROS_H */ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/sad_common.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/sad_common.h new file mode 100644 index 000000000..01ada25b5 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/sad_common.h @@ -0,0 +1,124 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_SAD_COMMON_H_ +#define WELS_SAD_COMMON_H_ + +#include "typedefs.h" + + +//===================SAD=====================// +int32_t WelsSampleSad16x16_c (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x8_c (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x16_c (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x8_c (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x4_c( uint8_t *, int32_t, uint8_t *, int32_t ); +int32_t WelsSampleSad4x8_c( uint8_t *, int32_t, uint8_t *, int32_t ); +int32_t WelsSampleSad4x4_c (uint8_t*, int32_t, uint8_t*, int32_t); + + + +void WelsSampleSadFour16x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour16x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour8x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour8x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour4x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour8x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour4x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined (X86_ASM) + +int32_t WelsSampleSad4x4_mmx (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x8_sse21 (uint8_t*, int32_t, uint8_t*, int32_t); + +void WelsSampleSadFour16x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour16x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour4x4_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); + +#endif//X86_ASM + +#if defined (HAVE_NEON) + +int32_t WelsSampleSad4x4_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x16_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x8_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x16_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x8_neon (uint8_t*, int32_t, uint8_t*, int32_t); + +void WelsSampleSadFour16x16_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour16x8_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x16_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x8_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour4x4_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); + +#endif + +#if defined (HAVE_NEON_AARCH64) +int32_t WelsSampleSad4x4_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); + +void WelsSampleSadFour16x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour16x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour4x4_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +#endif + +#if defined (HAVE_MMI) +int32_t WelsSampleSad4x4_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t); + +void WelsSampleSadFour16x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour16x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +#endif//HAVE_MMI +#if defined(__cplusplus) +} +#endif//__cplusplus + +#endif //SAMPLE_H_ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/typedefs.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/typedefs.h new file mode 100644 index 000000000..500241101 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/typedefs.h @@ -0,0 +1,86 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +// typedef.h +#ifndef WELS_TYPE_DEFINES_H__ +#define WELS_TYPE_DEFINES_H__ + +#include +#include + +//////////////////////////////////////////////////////////////////////////// +// NOTICE : ALL internal implement MUST use the data type defined as below +// ONLY except with the interface file !!!!! +//////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER + +#define __STDC_FORMAT_MACROS +#include +#include + +#ifdef __LP64__ +typedef int64_t intX_t; +#else +typedef int32_t intX_t; +#endif + +#else + +// FIXME: all singed type should be declared explicit, for example, int8_t should be declared as signed char. +typedef signed char int8_t ; +typedef unsigned char uint8_t ; +typedef short int16_t ; +typedef unsigned short uint16_t; +typedef int int32_t ; +typedef unsigned int uint32_t; +typedef __int64 int64_t ; +typedef unsigned __int64 uint64_t; +#define PRId64 "I64d" + +#ifdef _WIN64 +typedef int64_t intX_t; +#else +typedef int32_t intX_t; +#endif + +#endif // _MSC_VER defined + +// The 'float' type is portable and usable without any need for any extra typedefs. + +#ifdef EPSN +#undef EPSN +#endif//EPSN +#define EPSN (0.000001f) // (1e-6) // desired float precision + +#endif //WELS_TYPE_DEFINES_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/utils.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/utils.h new file mode 100644 index 000000000..f70bd93ee --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/utils.h @@ -0,0 +1,95 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \brief Tool kits for decoder + * ( malloc, realloc, free, log output and PSNR calculation and so on ) + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_UTILS_H__ +#define WELS_UTILS_H__ + +#include +#include "typedefs.h" + +#define MAX_LOG_SIZE 1024 +#define MAX_MBS_PER_FRAME 36864 //in accordance with max level support in Rec +/* + * Function pointer declaration for various tool sets + */ +// wels log output +typedef void (*PWelsLogCallbackFunc) (void* pCtx, const int32_t iLevel, const char* kpFmt, va_list argv); + +typedef struct TagLogContext { + PWelsLogCallbackFunc pfLog; + void* pLogCtx; + void* pCodecInstance; +} SLogContext; + + +#ifdef __GNUC__ +extern void WelsLog (SLogContext* pCtx, int32_t iLevel, const char* kpFmt, ...) __attribute__ ((__format__ (__printf__, + 3, + 4))); +#else +extern void WelsLog (SLogContext* pCtx, int32_t iLevel, const char* kpFmt, ...); +#endif + +/* + * PSNR calculation routines + */ +/*! + ************************************************************************************* + * \brief PSNR calculation utilization in Wels + * + * \param kpTarPic target picture to be calculated in Picture pData format + * \param kiTarStride stride of target picture pData pBuffer + * \param kpRefPic base referencing picture samples + * \param kiRefStride stride of reference picture pData pBuffer + * \param kiWidth picture iWidth in pixel + * \param kiHeight picture iHeight in pixel + * + * \return actual PSNR result; + * + * \note N/A + ************************************************************************************* + */ +float WelsCalcPsnr (const void* kpTarPic, + const int32_t kiTarStride, + const void* kpRefPic, + const int32_t kiRefStride, + const int32_t kiWidth, + const int32_t kiHeight); + + +#endif//WELS_UTILS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/version.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/version.h new file mode 100644 index 000000000..9fab1b0ee --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/version.h @@ -0,0 +1,10 @@ +#ifndef VERSION_H +#define VERSION_H + +#ifdef GENERATED_VERSION_HEADER +#include "version_gen.h" +#else +#define VERSION_NUMBER "openh264 default: 1.4" +#endif + +#endif // VERSION_H diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/version_gen.h.template b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/version_gen.h.template new file mode 100644 index 000000000..397726d1f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/version_gen.h.template @@ -0,0 +1,6 @@ +#ifndef VERSION_GEN_H +#define VERSION_GEN_H + +#define VERSION_NUMBER $FULL_VERSION + +#endif // VERSION_GEN_H diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/welsCodecTrace.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/welsCodecTrace.h new file mode 100644 index 000000000..b6c323f91 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/welsCodecTrace.h @@ -0,0 +1,64 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_CODEC_TRACE +#define WELS_CODEC_TRACE + +#include +#include "typedefs.h" +#include "utils.h" +#include "codec_app_def.h" +#include "codec_api.h" + +class welsCodecTrace { + public: + welsCodecTrace(); + ~welsCodecTrace(); + + void SetCodecInstance (void* pCodecInstance); + void SetTraceLevel (const int32_t kiLevel); + void SetTraceCallback (WelsTraceCallback func); + void SetTraceCallbackContext (void* pCtx); + + private: + static void StaticCodecTrace (void* pCtx, const int32_t kiLevel, const char* kpStrFormat, va_list vl); + void CodecTrace (const int32_t kiLevel, const char* kpStrFormat, va_list vl); + + int32_t m_iTraceLevel; + WelsTraceCallback m_fpTrace; + void* m_pTraceCtx; + public: + + SLogContext m_sLogCtx; +}; + +#endif //WELS_CODEC_TRACE diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/wels_common_defs.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/wels_common_defs.h new file mode 100644 index 000000000..96ba01a79 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/wels_common_defs.h @@ -0,0 +1,373 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_common_defs.h +#ifndef WELS_COMMON_DEFS_H__ +#define WELS_COMMON_DEFS_H__ + +#include "typedefs.h" +#include "macros.h" +#include "codec_app_def.h" + + +namespace WelsCommon { +/*common use table*/ + +#define CTX_NA 0 +#define WELS_CONTEXT_COUNT 460 +#define LEVEL_NUMBER 17 +typedef struct TagLevelLimits { + ELevelIdc uiLevelIdc; // level idc + uint32_t uiMaxMBPS; // Max macroblock processing rate(MB/s) + uint32_t uiMaxFS; // Max frame sizea(MBs) + uint32_t uiMaxDPBMbs;// Max decoded picture buffer size(MBs) + uint32_t uiMaxBR; // Max video bit rate + uint32_t uiMaxCPB; // Max CPB size + int16_t iMinVmv; // Vertical MV component range upper bound + int16_t iMaxVmv; // Vertical MV component range lower bound + uint16_t uiMinCR; // Min compression ration + int16_t iMaxMvsPer2Mb; // Max number of motion vectors per two consecutive MBs +} SLevelLimits; + +#define CpbBrNalFactor 1200 //baseline,main,and extended profiles. +extern const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER]; +extern const uint32_t g_kuiLevelMaps[LEVEL_NUMBER]; +extern const uint8_t g_kuiMbCountScan4Idx[24]; +extern const uint8_t g_kuiCache30ScanIdx[16]; +extern const uint8_t g_kuiCache48CountScan4Idx[24]; + +extern const uint8_t g_kuiMatrixV[6][8][8]; + +extern const uint8_t g_kuiDequantScaling4x4Default[2][16]; +extern const uint8_t g_kuiDequantScaling8x8Default[2][64]; +extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16); +extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16); +extern const uint8_t g_kuiChromaQpTable[52]; + +extern const uint8_t g_kuiCabacRangeLps[64][4]; +extern const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2]; +extern const uint8_t g_kuiStateTransTable[64][2]; +extern const uint32_t g_kuiGolombUELength[256]; +/* + * NAL Unit Type (5 Bits) + */ +enum EWelsNalUnitType { + NAL_UNIT_UNSPEC_0 = 0, + NAL_UNIT_CODED_SLICE = 1, + NAL_UNIT_CODED_SLICE_DPA = 2, + NAL_UNIT_CODED_SLICE_DPB = 3, + NAL_UNIT_CODED_SLICE_DPC = 4, + NAL_UNIT_CODED_SLICE_IDR = 5, + NAL_UNIT_SEI = 6, + NAL_UNIT_SPS = 7, + NAL_UNIT_PPS = 8, + NAL_UNIT_AU_DELIMITER = 9, + NAL_UNIT_END_OF_SEQ = 10, + NAL_UNIT_END_OF_STR = 11, + NAL_UNIT_FILLER_DATA = 12, + NAL_UNIT_SPS_EXT = 13, + NAL_UNIT_PREFIX = 14, + NAL_UNIT_SUBSET_SPS = 15, + NAL_UNIT_DEPTH_PARAM = 16, // NAL_UNIT_RESV_16 + NAL_UNIT_RESV_17 = 17, + NAL_UNIT_RESV_18 = 18, + NAL_UNIT_AUX_CODED_SLICE = 19, + NAL_UNIT_CODED_SLICE_EXT = 20, + NAL_UNIT_MVC_SLICE_EXT = 21, // NAL_UNIT_RESV_21 + NAL_UNIT_RESV_22 = 22, + NAL_UNIT_RESV_23 = 23, + NAL_UNIT_UNSPEC_24 = 24, + NAL_UNIT_UNSPEC_25 = 25, + NAL_UNIT_UNSPEC_26 = 26, + NAL_UNIT_UNSPEC_27 = 27, + NAL_UNIT_UNSPEC_28 = 28, + NAL_UNIT_UNSPEC_29 = 29, + NAL_UNIT_UNSPEC_30 = 30, + NAL_UNIT_UNSPEC_31 = 31 +}; + +/* + * NAL Reference IDC (2 Bits) + */ + +enum EWelsNalRefIdc { + NRI_PRI_LOWEST = 0, + NRI_PRI_LOW = 1, + NRI_PRI_HIGH = 2, + NRI_PRI_HIGHEST = 3 +}; + +/* + * VCL TYPE + */ + +enum EVclType { + NON_VCL = 0, + VCL = 1, + NOT_APP = 2 +}; + +/* + * vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC). + */ +extern const EVclType g_keTypeMap[32][2]; + +#define IS_VCL_NAL(t, ext_idx) (g_keTypeMap[t][ext_idx] == VCL) +#define IS_PARAM_SETS_NALS(t) ( (t) == NAL_UNIT_SPS || (t) == NAL_UNIT_PPS || (t) == NAL_UNIT_SUBSET_SPS ) +#define IS_SPS_NAL(t) ( (t) == NAL_UNIT_SPS ) +#define IS_SUBSET_SPS_NAL(t) ( (t) == NAL_UNIT_SUBSET_SPS ) +#define IS_PPS_NAL(t) ( (t) == NAL_UNIT_PPS ) +#define IS_SEI_NAL(t) ( (t) == NAL_UNIT_SEI ) +#define IS_AU_DELIMITER_NAL(t) ( (t) == NAL_UNIT_AU_DELIMITER ) +#define IS_PREFIX_NAL(t) ( (t) == NAL_UNIT_PREFIX ) +#define IS_SUBSET_SPS_USED(t) ( (t) == NAL_UNIT_SUBSET_SPS || (t) == NAL_UNIT_CODED_SLICE_EXT ) +#define IS_VCL_NAL_AVC_BASE(t) ( (t) == NAL_UNIT_CODED_SLICE || (t) == NAL_UNIT_CODED_SLICE_IDR ) +#define IS_NEW_INTRODUCED_SVC_NAL(t) ( (t) == NAL_UNIT_PREFIX || (t) == NAL_UNIT_CODED_SLICE_EXT ) + + +/* Base SSlice Types + * Invalid in case of eSliceType exceeds 9, + * Need trim when eSliceType > 4 as fixed SliceType(eSliceType-4), + * meaning mapped version after eSliceType minus 4. + */ + +enum EWelsSliceType { + P_SLICE = 0, + B_SLICE = 1, + I_SLICE = 2, + SP_SLICE = 3, + SI_SLICE = 4, + UNKNOWN_SLICE = 5 +}; + +/* SSlice Types in scalable extension */ +enum ESliceTypeExt { + EP_SLICE = 0, // EP_SLICE: 0, 5 + EB_SLICE = 1, // EB_SLICE: 1, 6 + EI_SLICE = 2 // EI_SLICE: 2, 7 +}; + +/* List Index */ +enum EListIndex { + LIST_0 = 0, + LIST_1 = 1, + LIST_A = 2 +}; + + + +/* Motion Vector components */ +enum EMvComp { + MV_X = 0, + MV_Y = 1, + MV_A = 2 +}; + +/* Chroma Components */ + +enum EChromaComp { + CHROMA_CB = 0, + CHROMA_CR = 1, + CHROMA_A = 2 +}; + + + +/* + * Memory Management Control Operation (MMCO) code + */ +enum EMmcoCode { + MMCO_END = 0, + MMCO_SHORT2UNUSED = 1, + MMCO_LONG2UNUSED = 2, + MMCO_SHORT2LONG = 3, + MMCO_SET_MAX_LONG = 4, + MMCO_RESET = 5, + MMCO_LONG = 6 +}; + +enum EVuiVideoFormat { + VUI_COMPONENT = 0, + VUI_PAL = 1, + VUI_NTSC = 2, + VUI_SECAM = 3, + VUI_MAC = 4, + VUI_UNSPECIFIED = 5, + VUI_RESERVED1 = 6, + VUI_RESERVED2 = 7 +}; + +/* + * Bit-stream auxiliary reading / writing + */ +typedef struct TagBitStringAux { + uint8_t* pStartBuf; // buffer to start position + uint8_t* pEndBuf; // buffer + length + int32_t iBits; // count bits of overall bitstreaming input + + intX_t iIndex; //only for cavlc usage + uint8_t* pCurBuf; // current reading position + uint32_t uiCurBits; + int32_t iLeftBits; // count number of available bits left ([1, 8]), + // need pointer to next byte start position in case 0 bit left then 8 instead +} SBitStringAux, *PBitStringAux; + +/* NAL Unix Header in AVC, refer to Page 56 in JVT X201wcm */ +typedef struct TagNalUnitHeader { + uint8_t uiForbiddenZeroBit; + uint8_t uiNalRefIdc; + EWelsNalUnitType eNalUnitType; + uint8_t uiReservedOneByte; // only padding usage +} SNalUnitHeader, *PNalUnitHeader; + +/* NAL Unit Header in scalable extension syntax, refer to Page 390 in JVT X201wcm */ +typedef struct TagNalUnitHeaderExt { + SNalUnitHeader sNalUnitHeader; + + // uint8_t reserved_one_bit; + bool bIdrFlag; + uint8_t uiPriorityId; + int8_t iNoInterLayerPredFlag; // change as int8_t to support 3 values probably in encoder + uint8_t uiDependencyId; + + uint8_t uiQualityId; + uint8_t uiTemporalId; + bool bUseRefBasePicFlag; + bool bDiscardableFlag; + + bool bOutputFlag; + uint8_t uiReservedThree2Bits; + // Derived variable(s) + uint8_t uiLayerDqId; + bool bNalExtFlag; +} SNalUnitHeaderExt, *PNalUnitHeaderExt; + +/* AVC MB types*/ +#define MB_TYPE_INTRA4x4 0x00000001 +#define MB_TYPE_INTRA16x16 0x00000002 +#define MB_TYPE_INTRA8x8 0x00000004 +#define MB_TYPE_16x16 0x00000008 +#define MB_TYPE_16x8 0x00000010 +#define MB_TYPE_8x16 0x00000020 +#define MB_TYPE_8x8 0x00000040 +#define MB_TYPE_8x8_REF0 0x00000080 +#define MB_TYPE_SKIP 0x00000100 +#define MB_TYPE_INTRA_PCM 0x00000200 +#define MB_TYPE_INTRA_BL 0x00000400 +#define MB_TYPE_DIRECT 0x00000800 +#define MB_TYPE_P0L0 0x00001000 +#define MB_TYPE_P1L0 0x00002000 +#define MB_TYPE_P0L1 0x00004000 +#define MB_TYPE_P1L1 0x00008000 +#define MB_TYPE_L0 (MB_TYPE_P0L0 | MB_TYPE_P1L0) +#define MB_TYPE_L1 (MB_TYPE_P0L1 | MB_TYPE_P1L1) + +#define SUB_MB_TYPE_8x8 0x00000001 +#define SUB_MB_TYPE_8x4 0x00000002 +#define SUB_MB_TYPE_4x8 0x00000004 +#define SUB_MB_TYPE_4x4 0x00000008 + +#define MB_TYPE_INTRA (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM) +#define MB_TYPE_INTER (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP | MB_TYPE_DIRECT) +#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) ) +#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) ) +#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) ) +#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) ) +#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA ) +#define IS_INTER(type) ( (type)&MB_TYPE_INTER ) +#define IS_INTER_16x16(type) ( (type)&MB_TYPE_16x16 ) +#define IS_INTER_16x8(type) ( (type)&MB_TYPE_16x8 ) +#define IS_INTER_8x16(type) ( (type)&MB_TYPE_8x16 ) +#define IS_TYPE_L0(type) ( (type)&MB_TYPE_L0 ) +#define IS_TYPE_L1(type) ( (type)&MB_TYPE_L1 ) +#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0<<((part)+2*(list)))) + + +#define IS_SKIP(type) ( ((type)&MB_TYPE_SKIP) != 0 ) +#define IS_DIRECT(type) ( ((type)&MB_TYPE_DIRECT) != 0 ) +#define IS_SVC_INTER(type) IS_INTER(type) +#define IS_I_BL(type) ( (type) == MB_TYPE_INTRA_BL ) +#define IS_SVC_INTRA(type) ( IS_I_BL(type) || IS_INTRA(type) ) +#define IS_Inter_8x8(type) ( ((type)&MB_TYPE_8x8) != 0) +#define IS_SUB_8x8(sub_type) (((sub_type)&SUB_MB_TYPE_8x8) != 0) +#define IS_SUB_8x4(sub_type) (((sub_type)&SUB_MB_TYPE_8x4) != 0) +#define IS_SUB_4x8(sub_type) (((sub_type)&SUB_MB_TYPE_4x8) != 0) +#define IS_SUB_4x4(sub_type) (((sub_type)&SUB_MB_TYPE_4x4) != 0) + +#define REF_NOT_AVAIL -2 +#define REF_NOT_IN_LIST -1 //intra + +/////////intra16x16 Luma +#define I16_PRED_INVALID -1 +#define I16_PRED_V 0 +#define I16_PRED_H 1 +#define I16_PRED_DC 2 +#define I16_PRED_P 3 + +#define I16_PRED_DC_L 4 +#define I16_PRED_DC_T 5 +#define I16_PRED_DC_128 6 +#define I16_PRED_DC_A 7 +//////////intra4x4 Luma +// Here, I8x8 also use these definitions +#define I4_PRED_INVALID 0 +#define I4_PRED_V 0 +#define I4_PRED_H 1 +#define I4_PRED_DC 2 +#define I4_PRED_DDL 3 //diagonal_down_left +#define I4_PRED_DDR 4 //diagonal_down_right +#define I4_PRED_VR 5 //vertical_right +#define I4_PRED_HD 6 //horizon_down +#define I4_PRED_VL 7 //vertical_left +#define I4_PRED_HU 8 //horizon_up + +#define I4_PRED_DC_L 9 +#define I4_PRED_DC_T 10 +#define I4_PRED_DC_128 11 + +#define I4_PRED_DDL_TOP 12 //right-top replacing by padding rightmost pixel of top +#define I4_PRED_VL_TOP 13 //right-top replacing by padding rightmost pixel of top +#define I4_PRED_A 14 + +//////////intra Chroma +#define C_PRED_INVALID -1 +#define C_PRED_DC 0 +#define C_PRED_H 1 +#define C_PRED_V 2 +#define C_PRED_P 3 + +#define C_PRED_DC_L 4 +#define C_PRED_DC_T 5 +#define C_PRED_DC_128 6 +#define C_PRED_A 7 +} +#endif//WELS_COMMON_DEFS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/wels_const_common.h b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/wels_const_common.h new file mode 100644 index 000000000..4d22d93db --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/inc/wels_const_common.h @@ -0,0 +1,63 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_CONST_COMMON_H__ +#define WELS_CONST_COMMON_H__ + +// Miscellaneous sizing infos +#ifndef MAX_FNAME_LEN +#define MAX_FNAME_LEN 256 // maximal length of file name in char size +#endif//MAX_FNAME_LEN + +#ifndef WELS_LOG_BUF_SIZE +#define WELS_LOG_BUF_SIZE 4096 +#endif//WELS_LOG_BUF_SIZE + +#ifndef MAX_TRACE_LOG_SIZE +#define MAX_TRACE_LOG_SIZE (50 * (1<<20)) // max trace log size: 50 MB, overwrite occur if log file size exceeds this size +#endif//MAX_TRACE_LOG_SIZE + +/* MB width in pixels for specified colorspace I420 usually used in codec */ +#define MB_WIDTH_LUMA 16 +#define MB_WIDTH_CHROMA (MB_WIDTH_LUMA>>1) +/* MB height in pixels for specified colorspace I420 usually used in codec */ +#define MB_HEIGHT_LUMA 16 +#define MB_HEIGHT_CHROMA (MB_HEIGHT_LUMA>>1) +#define MB_COEFF_LIST_SIZE (256+((MB_WIDTH_CHROMA*MB_HEIGHT_CHROMA)<<1)) +#define MB_PARTITION_SIZE 4 // Macroblock partition size in 8x8 sub-blocks +#define MB_BLOCK4x4_NUM 16 +#define MB_BLOCK8x8_NUM 4 +#define MAX_SPS_COUNT 32 // Count number of SPS +#define BASE_QUALITY_ID 0 + + +#endif//WELS_CONST_COMMON_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsTaskThread.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsTaskThread.cpp new file mode 100644 index 000000000..44f4e7406 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsTaskThread.cpp @@ -0,0 +1,88 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsTaskThread.cpp + * + * \brief functions for TaskThread + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ +#include "WelsTaskThread.h" + +namespace WelsCommon { + +CWelsTaskThread::CWelsTaskThread (IWelsTaskThreadSink* pSink) : m_pSink (pSink) { + WelsThreadSetName ("CWelsTaskThread"); + + m_uiID = (uintptr_t) (this); + m_pTask = NULL; +} + + +CWelsTaskThread::~CWelsTaskThread() { +} + +void CWelsTaskThread::ExecuteTask() { + CWelsAutoLock cLock (m_cLockTask); + if (m_pSink) { + m_pSink->OnTaskStart (this, m_pTask); + } + + if (m_pTask) { + m_pTask->Execute(); + } + + if (m_pSink) { + m_pSink->OnTaskStop (this, m_pTask); + } + + m_pTask = NULL; +} + +WELS_THREAD_ERROR_CODE CWelsTaskThread::SetTask (WelsCommon::IWelsTask* pTask) { + CWelsAutoLock cLock (m_cLockTask); + + if (!GetRunning()) { + return WELS_THREAD_ERROR_GENERAL; + } + WelsMutexLock(&m_hMutex); + m_pTask = pTask; + WelsMutexUnlock(&m_hMutex); + SignalThread(); + + return WELS_THREAD_ERROR_OK; +} + + +} + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThread.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThread.cpp new file mode 100644 index 000000000..37ad319c2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThread.cpp @@ -0,0 +1,126 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsThreadPool.cpp + * + * \brief functions for Thread Pool + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ + +#include "WelsThread.h" + +namespace WelsCommon { + +CWelsThread::CWelsThread() : + m_hThread (0), + m_bRunning (false), + m_bEndFlag (false) { + + WelsEventOpen (&m_hEvent); + WelsMutexInit(&m_hMutex); + m_iConVar = 1; +} + +CWelsThread::~CWelsThread() { + Kill(); + WelsEventClose (&m_hEvent); + WelsMutexDestroy(&m_hMutex); +} + +void CWelsThread::Thread() { + while (true) { + WelsEventWait (&m_hEvent,&m_hMutex,m_iConVar); + + if (GetEndFlag()) { + break; + } + + m_iConVar = 1; + ExecuteTask();//in ExecuteTask there will be OnTaskStop which opens the potential new Signaling of next run, so the setting of m_iConVar = 1 should be before ExecuteTask() + } + + SetRunning (false); +} + +WELS_THREAD_ERROR_CODE CWelsThread::Start() { +#ifndef __APPLE__ + if (NULL == m_hEvent) { + return WELS_THREAD_ERROR_GENERAL; + } +#endif + if (GetRunning()) { + return WELS_THREAD_ERROR_OK; + } + + SetEndFlag (false); + + WELS_THREAD_ERROR_CODE rc = WelsThreadCreate (&m_hThread, + (LPWELS_THREAD_ROUTINE)TheThread, this, 0); + + if (WELS_THREAD_ERROR_OK != rc) { + return rc; + } + + while (!GetRunning()) { + WelsSleep (1); + } + + return WELS_THREAD_ERROR_OK; +} + +void CWelsThread::Kill() { + if (!GetRunning()) { + return; + } + + SetEndFlag (true); + + WelsEventSignal (&m_hEvent,&m_hMutex,&m_iConVar); + WelsThreadJoin (m_hThread); + return; +} + +WELS_THREAD_ROUTINE_TYPE CWelsThread::TheThread (void* pParam) { + CWelsThread* pThis = static_cast (pParam); + + pThis->SetRunning (true); + + pThis->Thread(); + + WELS_THREAD_ROUTINE_RETURN (NULL); +} + +} + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp new file mode 100644 index 000000000..4477e34e3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp @@ -0,0 +1,535 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsThreadLib.c + * + * \brief Interfaces introduced in thread programming + * + * \date 11/17/2009 Created + * + ************************************************************************************* + */ + + +#ifdef __linux__ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#elif !defined(_WIN32) && !defined(__CYGWIN__) +#include +#include +#include +#ifndef __Fuchsia__ +#include +#endif +#ifdef __APPLE__ +#define HW_NCPU_NAME "hw.logicalcpu" +#else +#define HW_NCPU_NAME "hw.ncpu" +#endif +#endif +#ifdef ANDROID_NDK +#include +#endif +#ifdef __ANDROID__ +#include +#endif + +#include "WelsThreadLib.h" +#include +#include + + +#if defined(_WIN32) || defined(__CYGWIN__) + +WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex) { + InitializeCriticalSection (mutex); + + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE WelsMutexLock (WELS_MUTEX* mutex) { + EnterCriticalSection (mutex); + + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE WelsMutexUnlock (WELS_MUTEX* mutex) { + LeaveCriticalSection (mutex); + + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE WelsMutexDestroy (WELS_MUTEX* mutex) { + DeleteCriticalSection (mutex); + + return WELS_THREAD_ERROR_OK; +} + +#else /* _WIN32 */ + +WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex) { + return pthread_mutex_init (mutex, NULL); +} + +WELS_THREAD_ERROR_CODE WelsMutexLock (WELS_MUTEX* mutex) { + return pthread_mutex_lock (mutex); +} + +WELS_THREAD_ERROR_CODE WelsMutexUnlock (WELS_MUTEX* mutex) { + return pthread_mutex_unlock (mutex); +} + +WELS_THREAD_ERROR_CODE WelsMutexDestroy (WELS_MUTEX* mutex) { + return pthread_mutex_destroy (mutex); +} + +#endif /* !_WIN32 */ + +#if defined(_WIN32) || defined(__CYGWIN__) + +WELS_THREAD_ERROR_CODE WelsEventOpen (WELS_EVENT* event, const char* event_name) { + WELS_EVENT h = CreateEvent (NULL, FALSE, FALSE, NULL); + *event = h; + if (h == NULL) { + return WELS_THREAD_ERROR_GENERAL; + } + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE WelsEventSignal (WELS_EVENT* event, WELS_MUTEX *pMutex, int* iCondition) { + (*iCondition) --; + if ((*iCondition) <= 0) { + if (SetEvent (*event)) { + return WELS_THREAD_ERROR_OK; + } + } + return WELS_THREAD_ERROR_GENERAL; +} + +WELS_THREAD_ERROR_CODE WelsEventWait (WELS_EVENT* event, WELS_MUTEX* pMutex, int& iCondition) { + return WaitForSingleObject (*event, INFINITE); +} + +WELS_THREAD_ERROR_CODE WelsEventWaitWithTimeOut (WELS_EVENT* event, uint32_t dwMilliseconds, WELS_MUTEX* pMutex) { + return WaitForSingleObject (*event, dwMilliseconds); +} + +WELS_THREAD_ERROR_CODE WelsMultipleEventsWaitSingleBlocking (uint32_t nCount, + WELS_EVENT* event_list, WELS_EVENT* master_even, WELS_MUTEX* pMutext) { + // Don't need/use the master event for anything, since windows has got WaitForMultipleObjects + return WaitForMultipleObjects (nCount, event_list, FALSE, INFINITE); +} + +WELS_THREAD_ERROR_CODE WelsEventClose (WELS_EVENT* event, const char* event_name) { + CloseHandle (*event); + + *event = NULL; + return WELS_THREAD_ERROR_OK; +} + +#ifndef WP80 +void WelsSleep (uint32_t dwMilliSecond) { + ::Sleep (dwMilliSecond); +} +#else +void WelsSleep (uint32_t dwMilliSecond) { + static WELS_EVENT hSleepEvent = NULL; + if (!hSleepEvent) { + WELS_EVENT hLocalSleepEvent = NULL; + WELS_THREAD_ERROR_CODE ret = WelsEventOpen (&hLocalSleepEvent); + if (WELS_THREAD_ERROR_OK != ret) { + return; + } + WELS_EVENT hPreviousEvent = InterlockedCompareExchangePointerRelease (&hSleepEvent, hLocalSleepEvent, NULL); + if (hPreviousEvent) { + WelsEventClose (&hLocalSleepEvent); + } + //On this singleton usage idea of using InterlockedCompareExchangePointerRelease: + // similar idea of can be found at msdn blog when introducing InterlockedCompareExchangePointerRelease + } + + WaitForSingleObject (hSleepEvent, dwMilliSecond); +} +#endif + +WELS_THREAD_ERROR_CODE WelsThreadCreate (WELS_THREAD_HANDLE* thread, LPWELS_THREAD_ROUTINE routine, + void* arg, WELS_THREAD_ATTR attr) { + WELS_THREAD_HANDLE h = CreateThread (NULL, 0, routine, arg, 0, NULL); + + if (h == NULL) { + return WELS_THREAD_ERROR_GENERAL; + } + * thread = h; + + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE WelsThreadSetName (const char* thread_name) { + // do nothing + return WELS_THREAD_ERROR_OK; +} + + +WELS_THREAD_ERROR_CODE WelsThreadJoin (WELS_THREAD_HANDLE thread) { + WaitForSingleObject (thread, INFINITE); + CloseHandle (thread); + + return WELS_THREAD_ERROR_OK; +} + + +WELS_THREAD_HANDLE WelsThreadSelf() { + return GetCurrentThread(); +} + +WELS_THREAD_ERROR_CODE WelsQueryLogicalProcessInfo (WelsLogicalProcessInfo* pInfo) { + SYSTEM_INFO si; + + GetSystemInfo (&si); + + pInfo->ProcessorCount = si.dwNumberOfProcessors; + + return WELS_THREAD_ERROR_OK; +} + +#else //platform: #ifdef _WIN32 + +WELS_THREAD_ERROR_CODE WelsThreadCreate (WELS_THREAD_HANDLE* thread, LPWELS_THREAD_ROUTINE routine, + void* arg, WELS_THREAD_ATTR attr) { + WELS_THREAD_ERROR_CODE err = 0; + + pthread_attr_t at; + err = pthread_attr_init (&at); + if (err) + return err; +#if !defined(__ANDROID__) && !defined(__Fuchsia__) + err = pthread_attr_setscope (&at, PTHREAD_SCOPE_SYSTEM); + if (err) + return err; + err = pthread_attr_setschedpolicy (&at, SCHED_FIFO); + if (err) + return err; +#endif + err = pthread_create (thread, &at, routine, arg); + + pthread_attr_destroy (&at); + + return err; +} + +WELS_THREAD_ERROR_CODE WelsThreadSetName (const char* thread_name) { +#ifdef APPLE_IOS + pthread_setname_np (thread_name); +#endif +#if defined(__ANDROID__) && __ANDROID_API__ >= 9 + pthread_setname_np (pthread_self(), thread_name); +#endif + // do nothing + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE WelsThreadJoin (WELS_THREAD_HANDLE thread) { + return pthread_join (thread, NULL); +} + +WELS_THREAD_HANDLE WelsThreadSelf() { + return pthread_self(); +} + +// unnamed semaphores aren't supported on OS X + +WELS_THREAD_ERROR_CODE WelsEventOpen (WELS_EVENT* p_event, const char* event_name) { +#ifdef __APPLE__ + WELS_THREAD_ERROR_CODE err= pthread_cond_init (p_event, NULL); + return err; +#else + WELS_EVENT event = (WELS_EVENT) malloc (sizeof (*event)); + if (event == NULL){ + *p_event = NULL; + return WELS_THREAD_ERROR_GENERAL; + } + WELS_THREAD_ERROR_CODE err = sem_init (event, 0, 0); + if (!err) { + *p_event = event; + return err; + } + free (event); + *p_event = NULL; + return err; +#endif +} +WELS_THREAD_ERROR_CODE WelsEventClose (WELS_EVENT* event, const char* event_name) { + //printf("event_close:%x, %s\n", event, event_name); +#ifdef __APPLE__ + WELS_THREAD_ERROR_CODE err = pthread_cond_destroy (event); + return err; +#else + WELS_THREAD_ERROR_CODE err = sem_destroy (*event); // match with sem_init + free (*event); + *event = NULL; + return err; +#endif +} + +void WelsSleep (uint32_t dwMilliSecond) { + usleep (dwMilliSecond * 1000); +} + +WELS_THREAD_ERROR_CODE WelsEventSignal (WELS_EVENT* event, WELS_MUTEX *pMutex, int* iCondition) { + WELS_THREAD_ERROR_CODE err = 0; + //fprintf( stderr, "before signal it, event=%x iCondition= %d..\n", event, *iCondition ); +#ifdef __APPLE__ + WelsMutexLock (pMutex); + (*iCondition) --; + WelsMutexUnlock (pMutex); + if ((*iCondition) <= 0) { + err = pthread_cond_signal (event); + //fprintf( stderr, "signal it, event=%x iCondition= %d..\n",event, *iCondition ); + + } +#else + (*iCondition) --; + if ((*iCondition) <= 0) { +// int32_t val = 0; +// sem_getvalue(event, &val); +// fprintf( stderr, "before signal it, val= %d..\n",val ); + if (event != NULL) + err = sem_post (*event); +// sem_getvalue(event, &val); + //fprintf( stderr, "signal it, event=%x iCondition= %d..\n",event, *iCondition ); + } +#endif + //fprintf( stderr, "after signal it, event=%x iCondition= %d..\n",event, *iCondition ); + return err; +} + +WELS_THREAD_ERROR_CODE WelsEventWait (WELS_EVENT* event, WELS_MUTEX* pMutex, int& iCondition) { +#ifdef __APPLE__ + int err = 0; + WelsMutexLock(pMutex); + //fprintf( stderr, "WelsEventWait event %x %d..\n", event, iCondition ); + while (iCondition>0) { + err = pthread_cond_wait (event, pMutex); + } + WelsMutexUnlock(pMutex); + return err; +#else + return sem_wait (*event); // blocking until signaled +#endif +} + +WELS_THREAD_ERROR_CODE WelsEventWaitWithTimeOut (WELS_EVENT* event, uint32_t dwMilliseconds, WELS_MUTEX* pMutex) { + + if (dwMilliseconds != (uint32_t) - 1) { +#if defined(__APPLE__) + return pthread_cond_wait (event, pMutex); +#else + return sem_wait (*event); +#endif + } else { + struct timespec ts; + struct timeval tv; + + gettimeofday (&tv, 0); + + ts.tv_nsec = tv.tv_usec * 1000 + dwMilliseconds * 1000000; + ts.tv_sec = tv.tv_sec + ts.tv_nsec / 1000000000; + ts.tv_nsec %= 1000000000; + +#if defined(__APPLE__) + return pthread_cond_timedwait (event, pMutex, &ts); +#else + return sem_timedwait (*event, &ts); +#endif + } + +} + +WELS_THREAD_ERROR_CODE WelsMultipleEventsWaitSingleBlocking (uint32_t nCount, + WELS_EVENT* event_list, WELS_EVENT* master_event, WELS_MUTEX* pMutex) { + uint32_t nIdx = 0; + uint32_t uiAccessTime = 2; // 2 us once + + if (nCount == 0) + return WELS_THREAD_ERROR_WAIT_FAILED; +#if defined(__APPLE__) + if (master_event != NULL) { + // This design relies on the events actually being semaphores; + // if multiple events in the list have been signalled, the master + // event should have a similar count (events in windows can't keep + // track of the actual count, but the master event isn't needed there + // since it uses WaitForMultipleObjects). + int32_t err = pthread_cond_wait (master_event, pMutex); + if (err != WELS_THREAD_ERROR_OK) + return err; + uiAccessTime = 0; // no blocking, just quickly loop through all to find the one that was signalled + } + + while (1) { + nIdx = 0; // access each event by order + while (nIdx < nCount) { + int32_t err = 0; + int32_t wait_count = 0; + + /* + * although such interface is not used in __GNUC__ like platform, to use + * pthread_cond_timedwait() might be better choice if need + */ + do { + err = pthread_cond_wait (&event_list[nIdx], pMutex); + if (WELS_THREAD_ERROR_OK == err) + return WELS_THREAD_ERROR_WAIT_OBJECT_0 + nIdx; + else if (wait_count > 0 || uiAccessTime == 0) + break; + usleep (uiAccessTime); + ++ wait_count; + } while (1); + // we do need access next event next time + ++ nIdx; + } + usleep (1); // switch to working threads + if (master_event != NULL) { + // A master event was used and was signalled, but none of the events in the + // list was found to be signalled, thus wait a little more when rechecking + // the list to avoid busylooping here. + // If we ever hit this codepath it's mostly a bug in the code that signals + // the events. + uiAccessTime = 2; + } + } +#else + if (master_event != NULL) { + // This design relies on the events actually being semaphores; + // if multiple events in the list have been signalled, the master + // event should have a similar count (events in windows can't keep + // track of the actual count, but the master event isn't needed there + // since it uses WaitForMultipleObjects). + int32_t err = sem_wait (*master_event); + if (err != WELS_THREAD_ERROR_OK) + return err; + uiAccessTime = 0; // no blocking, just quickly loop through all to find the one that was signalled + } + + while (1) { + nIdx = 0; // access each event by order + while (nIdx < nCount) { + int32_t err = 0; + int32_t wait_count = 0; + + /* + * although such interface is not used in __GNUC__ like platform, to use + * pthread_cond_timedwait() might be better choice if need + */ + do { + err = sem_trywait (event_list[nIdx]); + if (WELS_THREAD_ERROR_OK == err) + return WELS_THREAD_ERROR_WAIT_OBJECT_0 + nIdx; + else if (wait_count > 0 || uiAccessTime == 0) + break; + usleep (uiAccessTime); + ++ wait_count; + } while (1); + // we do need access next event next time + ++ nIdx; + } + usleep (1); // switch to working threads + if (master_event != NULL) { + // A master event was used and was signalled, but none of the events in the + // list was found to be signalled, thus wait a little more when rechecking + // the list to avoid busylooping here. + // If we ever hit this codepath it's mostly a bug in the code that signals + // the events. + uiAccessTime = 2; + } + } + +#endif + return WELS_THREAD_ERROR_WAIT_FAILED; +} + +WELS_THREAD_ERROR_CODE WelsQueryLogicalProcessInfo (WelsLogicalProcessInfo* pInfo) { +#ifdef ANDROID_NDK + pInfo->ProcessorCount = android_getCpuCount(); + return WELS_THREAD_ERROR_OK; +#elif defined(__linux__) + + cpu_set_t cpuset; + + CPU_ZERO (&cpuset); + + if (!sched_getaffinity (0, sizeof (cpuset), &cpuset)) { +#ifdef CPU_COUNT + pInfo->ProcessorCount = CPU_COUNT (&cpuset); +#else + int32_t count = 0; + for (int i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET (i, &cpuset)) { + count++; + } + } + pInfo->ProcessorCount = count; +#endif + } else { + pInfo->ProcessorCount = 1; + } + + return WELS_THREAD_ERROR_OK; + +#elif defined(__EMSCRIPTEN__) + + // There is not yet a way to determine CPU count in emscripten JS environment. + pInfo->ProcessorCount = 1; + return WELS_THREAD_ERROR_OK; + +#elif defined(__Fuchsia__) + + pInfo->ProcessorCount = sysconf(_SC_NPROCESSORS_ONLN); + return WELS_THREAD_ERROR_OK; +#else + + size_t len = sizeof (pInfo->ProcessorCount); + +#if defined(__OpenBSD__) + int scname[] = { CTL_HW, HW_NCPU }; + if (sysctl (scname, 2, &pInfo->ProcessorCount, &len, NULL, 0) == -1) +#else + if (sysctlbyname (HW_NCPU_NAME, &pInfo->ProcessorCount, &len, NULL, 0) == -1) +#endif + pInfo->ProcessorCount = 1; + + return WELS_THREAD_ERROR_OK; + +#endif//__linux__ +} + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThreadPool.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThreadPool.cpp new file mode 100644 index 000000000..302643cc8 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/WelsThreadPool.cpp @@ -0,0 +1,380 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file WelsThreadPool.cpp + * + * \brief functions for Thread Pool + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ +#include "typedefs.h" +#include "memory_align.h" +#include "WelsThreadPool.h" + +namespace WelsCommon { + +namespace { + +CWelsLock& GetInitLock() { + static CWelsLock *initLock = new CWelsLock; + return *initLock; +} + +} + +int32_t CWelsThreadPool::m_iRefCount = 0; +int32_t CWelsThreadPool::m_iMaxThreadNum = DEFAULT_THREAD_NUM; +CWelsThreadPool* CWelsThreadPool::m_pThreadPoolSelf = NULL; + +CWelsThreadPool::CWelsThreadPool() : + m_cWaitedTasks (NULL), m_cIdleThreads (NULL), m_cBusyThreads (NULL) { +} + + +CWelsThreadPool::~CWelsThreadPool() { + //fprintf(stdout, "CWelsThreadPool::~CWelsThreadPool: delete %x, %x, %x\n", m_cWaitedTasks, m_cIdleThreads, m_cBusyThreads); + if (0 != m_iRefCount) { + m_iRefCount = 0; + Uninit(); + } +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::SetThreadNum (int32_t iMaxThreadNum) { + CWelsAutoLock cLock (GetInitLock()); + + if (m_iRefCount != 0) { + return WELS_THREAD_ERROR_GENERAL; + } + + if (iMaxThreadNum <= 0) { + iMaxThreadNum = 1; + } + m_iMaxThreadNum = iMaxThreadNum; + return WELS_THREAD_ERROR_OK; +} + + +CWelsThreadPool* CWelsThreadPool::AddReference() { + CWelsAutoLock cLock (GetInitLock()); + if (m_pThreadPoolSelf == NULL) { + m_pThreadPoolSelf = new CWelsThreadPool(); + if (!m_pThreadPoolSelf) { + return NULL; + } + } + + if (m_iRefCount == 0) { + if (WELS_THREAD_ERROR_OK != m_pThreadPoolSelf->Init()) { + m_pThreadPoolSelf->Uninit(); + delete m_pThreadPoolSelf; + m_pThreadPoolSelf = NULL; + return NULL; + } + } + + ////fprintf(stdout, "m_iRefCount=%d, iMaxThreadNum=%d\n", m_iRefCount, m_iMaxThreadNum); + + ++ m_iRefCount; + //fprintf(stdout, "m_iRefCount2=%d\n", m_iRefCount); + return m_pThreadPoolSelf; +} + +void CWelsThreadPool::RemoveInstance() { + CWelsAutoLock cLock (GetInitLock()); + //fprintf(stdout, "m_iRefCount=%d\n", m_iRefCount); + -- m_iRefCount; + if (0 == m_iRefCount) { + StopAllRunning(); + Uninit(); + if (m_pThreadPoolSelf) { + delete m_pThreadPoolSelf; + m_pThreadPoolSelf = NULL; + } + //fprintf(stdout, "m_iRefCount=%d, IdleThreadNum=%d, BusyThreadNum=%d, WaitedTask=%d\n", m_iRefCount, GetIdleThreadNum(), GetBusyThreadNum(), GetWaitedTaskNum()); + } +} + + +bool CWelsThreadPool::IsReferenced() { + CWelsAutoLock cLock (GetInitLock()); + return (m_iRefCount > 0); +} + + +WELS_THREAD_ERROR_CODE CWelsThreadPool::OnTaskStart (CWelsTaskThread* pThread, IWelsTask* pTask) { + AddThreadToBusyList (pThread); + //fprintf(stdout, "CWelsThreadPool::AddThreadToBusyList: Task %x at Thread %x\n", pTask, pThread); + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::OnTaskStop (CWelsTaskThread* pThread, IWelsTask* pTask) { + //fprintf(stdout, "CWelsThreadPool::OnTaskStop 0: Task %x at Thread %x Finished\n", pTask, pThread); + + RemoveThreadFromBusyList (pThread); + AddThreadToIdleQueue (pThread); + + if (pTask && pTask->GetSink()) { + //fprintf(stdout, "CWelsThreadPool::OnTaskStop 1: Task %x at Thread %x Finished, m_pSink=%x\n", pTask, pThread, pTask->GetSink()); + pTask->GetSink()->OnTaskExecuted(); + ////fprintf(stdout, "CWelsThreadPool::OnTaskStop 1: Task %x at Thread %x Finished, m_pSink=%x\n", pTask, pThread, pTask->GetSink()); + } + //if (m_pSink) { + // m_pSink->OnTaskExecuted (pTask); + //} + //fprintf(stdout, "CWelsThreadPool::OnTaskStop 2: Task %x at Thread %x Finished\n", pTask, pThread); + + SignalThread(); + + //fprintf(stdout, "ThreadPool: Task %x at Thread %x Finished\n", pTask, pThread); + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::Init() { + //fprintf(stdout, "Enter WelsThreadPool Init\n"); + + CWelsAutoLock cLock (m_cLockPool); + + m_cWaitedTasks = new CWelsNonDuplicatedList(); + m_cIdleThreads = new CWelsNonDuplicatedList(); + m_cBusyThreads = new CWelsList(); + if (NULL == m_cWaitedTasks || NULL == m_cIdleThreads || NULL == m_cBusyThreads) { + return WELS_THREAD_ERROR_GENERAL; + } + + for (int32_t i = 0; i < m_iMaxThreadNum; i++) { + if (WELS_THREAD_ERROR_OK != CreateIdleThread()) { + return WELS_THREAD_ERROR_GENERAL; + } + } + + if (WELS_THREAD_ERROR_OK != Start()) { + return WELS_THREAD_ERROR_GENERAL; + } + + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::StopAllRunning() { + WELS_THREAD_ERROR_CODE iReturn = WELS_THREAD_ERROR_OK; + + ClearWaitedTasks(); + + while (GetBusyThreadNum() > 0) { + //WELS_INFO_TRACE ("CWelsThreadPool::Uninit - Waiting all thread to exit"); + WelsSleep (10); + } + + if (GetIdleThreadNum() != m_iMaxThreadNum) { + iReturn = WELS_THREAD_ERROR_GENERAL; + } + + return iReturn; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::Uninit() { + WELS_THREAD_ERROR_CODE iReturn = WELS_THREAD_ERROR_OK; + CWelsAutoLock cLock (m_cLockPool); + + iReturn = StopAllRunning(); + if (WELS_THREAD_ERROR_OK != iReturn) { + return iReturn; + } + + m_cLockIdleTasks.Lock(); + while (m_cIdleThreads->size() > 0) { + DestroyThread (m_cIdleThreads->begin()); + m_cIdleThreads->pop_front(); + } + m_cLockIdleTasks.Unlock(); + + Kill(); + + WELS_DELETE_OP (m_cWaitedTasks); + WELS_DELETE_OP (m_cIdleThreads); + WELS_DELETE_OP (m_cBusyThreads); + + return iReturn; +} + +void CWelsThreadPool::ExecuteTask() { + //fprintf(stdout, "ThreadPool: scheduled tasks: ExecuteTask\n"); + CWelsTaskThread* pThread = NULL; + IWelsTask* pTask = NULL; + while (GetWaitedTaskNum() > 0) { + //fprintf(stdout, "ThreadPool: ExecuteTask: waiting task %d\n", GetWaitedTaskNum()); + pThread = GetIdleThread(); + if (pThread == NULL) { + //fprintf(stdout, "ThreadPool: ExecuteTask: no IdleThread\n"); + + break; + } + pTask = GetWaitedTask(); + //fprintf(stdout, "ThreadPool: ExecuteTask = %x at thread %x\n", pTask, pThread); + if (pTask) { + pThread->SetTask (pTask); + } else { + AddThreadToIdleQueue (pThread); + } + } +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::QueueTask (IWelsTask* pTask) { + CWelsAutoLock cLock (m_cLockPool); + + //fprintf(stdout, "CWelsThreadPool::QueueTask: %d, pTask=%x\n", m_iRefCount, pTask); + if (GetWaitedTaskNum() == 0) { + CWelsTaskThread* pThread = GetIdleThread(); + + if (pThread != NULL) { + //fprintf(stdout, "ThreadPool: ExecuteTask = %x at thread %x\n", pTask, pThread); + pThread->SetTask (pTask); + + return WELS_THREAD_ERROR_OK; + } + } + //fprintf(stdout, "ThreadPool: AddTaskToWaitedList: %x\n", pTask); + if (false == AddTaskToWaitedList (pTask)) { + return WELS_THREAD_ERROR_GENERAL; + } + + //fprintf(stdout, "ThreadPool: SignalThread: %x\n", pTask); + SignalThread(); + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::CreateIdleThread() { + CWelsTaskThread* pThread = new CWelsTaskThread (this); + + if (NULL == pThread) { + return WELS_THREAD_ERROR_GENERAL; + } + + if (WELS_THREAD_ERROR_OK != pThread->Start()) { + return WELS_THREAD_ERROR_GENERAL; + } + //fprintf(stdout, "ThreadPool: AddThreadToIdleQueue: %x\n", pThread); + AddThreadToIdleQueue (pThread); + + return WELS_THREAD_ERROR_OK; +} + +void CWelsThreadPool::DestroyThread (CWelsTaskThread* pThread) { + pThread->Kill(); + WELS_DELETE_OP (pThread); + + return; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::AddThreadToIdleQueue (CWelsTaskThread* pThread) { + CWelsAutoLock cLock (m_cLockIdleTasks); + m_cIdleThreads->push_back (pThread); + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::AddThreadToBusyList (CWelsTaskThread* pThread) { + CWelsAutoLock cLock (m_cLockBusyTasks); + m_cBusyThreads->push_back (pThread); + return WELS_THREAD_ERROR_OK; +} + +WELS_THREAD_ERROR_CODE CWelsThreadPool::RemoveThreadFromBusyList (CWelsTaskThread* pThread) { + CWelsAutoLock cLock (m_cLockBusyTasks); + if (m_cBusyThreads->erase (pThread)) { + return WELS_THREAD_ERROR_OK; + } else { + return WELS_THREAD_ERROR_GENERAL; + } +} + +bool CWelsThreadPool::AddTaskToWaitedList (IWelsTask* pTask) { + CWelsAutoLock cLock (m_cLockWaitedTasks); + + return m_cWaitedTasks->push_back (pTask); +} + +CWelsTaskThread* CWelsThreadPool::GetIdleThread() { + CWelsAutoLock cLock (m_cLockIdleTasks); + + if (NULL == m_cIdleThreads || m_cIdleThreads->size() == 0) { + return NULL; + } + + //fprintf(stdout, "CWelsThreadPool::GetIdleThread=%d\n", m_cIdleThreads->size()); + + CWelsTaskThread* pThread = m_cIdleThreads->begin(); + m_cIdleThreads->pop_front(); + return pThread; +} + +int32_t CWelsThreadPool::GetBusyThreadNum() { + return (m_cBusyThreads?m_cBusyThreads->size():0); +} + +int32_t CWelsThreadPool::GetIdleThreadNum() { + return (m_cIdleThreads?m_cIdleThreads->size():0); +} + +int32_t CWelsThreadPool::GetWaitedTaskNum() { + return (m_cWaitedTasks?m_cWaitedTasks->size():0); +} + +IWelsTask* CWelsThreadPool::GetWaitedTask() { + CWelsAutoLock lock (m_cLockWaitedTasks); + + if (NULL==m_cWaitedTasks || m_cWaitedTasks->size() == 0) { + return NULL; + } + + IWelsTask* pTask = m_cWaitedTasks->begin(); + + m_cWaitedTasks->pop_front(); + + return pTask; +} + +void CWelsThreadPool::ClearWaitedTasks() { + CWelsAutoLock cLock (m_cLockWaitedTasks); + if (NULL == m_cWaitedTasks) { + return; + } + IWelsTask* pTask = NULL; + while (0 != m_cWaitedTasks->size()) { + pTask = m_cWaitedTasks->begin(); + if (pTask->GetSink()) { + pTask->GetSink()->OnTaskCancelled(); + } + m_cWaitedTasks->pop_front(); + } +} + +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/common_tables.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/common_tables.cpp new file mode 100644 index 000000000..117b28d08 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/common_tables.cpp @@ -0,0 +1,906 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "wels_common_defs.h" + +namespace WelsCommon { +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////pNonZeroCount[16+8] mapping scan index +const uint8_t g_kuiMbCountScan4Idx[24] = { + // 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8] + 0, 1, 4, 5, // 2 3 | 6 7 0 | 1 0 1 2 3 + 2, 3, 6, 7, //--------------- --------- 4 5 6 7 + 8, 9, 12, 13, // 8 9 | 12 13 2 | 3 8 9 10 11 + 10, 11, 14, 15, // 10 11 | 14 15-----------------------------> 12 13 14 15 + 16, 17, 20, 21, //---------------- chroma 8*8 block 16 17 18 19 + 18, 19, 22, 23 // 16 17 | 20 21 0 1 20 21 22 23 +}; + +const uint8_t g_kuiCache48CountScan4Idx[24] = { + /* Luma */ + 9, 10, 17, 18, // 1+1*8, 2+1*8, 1+2*8, 2+2*8, + 11, 12, 19, 20, // 3+1*8, 4+1*8, 3+2*8, 4+2*8, + 25, 26, 33, 34, // 1+3*8, 2+3*8, 1+4*8, 2+4*8, + 27, 28, 35, 36, // 3+3*8, 4+3*8, 3+4*8, 4+4*8, + /* Cb */ + 14, 15, // 6+1*8, 7+1*8, + 22, 23, // 6+2*8, 7+2*8, + + /* Cr */ + 38, 39, // 6+4*8, 7+4*8, + 46, 47, // 6+5*8, 7+5*8, +}; + +const uint8_t g_kuiMatrixV[6][8][8] = { // generated from equation 8-317, 8-318 + { + {20, 19, 25, 19, 20, 19, 25, 19}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {25, 24, 32, 24, 25, 24, 32, 24}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {20, 19, 25, 19, 20, 19, 25, 19}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {25, 24, 32, 24, 25, 24, 32, 24}, + {19, 18, 24, 18, 19, 18, 24, 18} + }, + { + {22, 21, 28, 21, 22, 21, 28, 21}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {22, 21, 28, 21, 22, 21, 28, 21}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {21, 19, 26, 19, 21, 19, 26, 19} + }, + { + {26, 24, 33, 24, 26, 24, 33, 24}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {33, 31, 42, 31, 33, 31, 42, 31}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {26, 24, 33, 24, 26, 24, 33, 24}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {33, 31, 42, 31, 33, 31, 42, 31}, + {24, 23, 31, 23, 24, 23, 31, 23} + }, + { + {28, 26, 35, 26, 28, 26, 35, 26}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {35, 33, 45, 33, 35, 33, 45, 33}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {35, 33, 45, 33, 35, 33, 45, 33}, + {26, 25, 33, 25, 26, 25, 33, 25} + }, + { + {32, 30, 40, 30, 32, 30, 40, 30}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {40, 38, 51, 38, 40, 38, 51, 38}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {32, 30, 40, 30, 32, 30, 40, 30}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {40, 38, 51, 38, 40, 38, 51, 38}, + {30, 28, 38, 28, 30, 28, 38, 28} + }, + { + {36, 34, 46, 34, 36, 34, 46, 34}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {46, 43, 58, 43, 46, 43, 58, 43}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {36, 34, 46, 34, 36, 34, 46, 34}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {46, 43, 58, 43, 46, 43, 58, 43}, + {34, 32, 43, 32, 34, 32, 43, 32} + } +}; + +//cache element equal to 30 +const uint8_t g_kuiCache30ScanIdx[16] = { //mv or uiRefIndex cache scan index, 4*4 block as basic unit + 7, 8, 13, 14, + 9, 10, 15, 16, + 19, 20, 25, 26, + 21, 22, 27, 28 +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +// extern at wels_common_defs.h +const uint8_t g_kuiChromaQpTable[52] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, + 37, 38, 38, 38, 39, 39, 39, 39 +}; + +/* + * vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC). + */ +const EVclType g_keTypeMap[32][2] = { + { NON_VCL, NON_VCL }, // 0: NAL_UNIT_UNSPEC_0 + { VCL, VCL, }, // 1: NAL_UNIT_CODED_SLICE + { VCL, NOT_APP }, // 2: NAL_UNIT_CODED_SLICE_DPA + { VCL, NOT_APP }, // 3: NAL_UNIT_CODED_SLICE_DPB + { VCL, NOT_APP }, // 4: NAL_UNIT_CODED_SLICE_DPC + { VCL, VCL }, // 5: NAL_UNIT_CODED_SLICE_IDR + { NON_VCL, NON_VCL }, // 6: NAL_UNIT_SEI + { NON_VCL, NON_VCL }, // 7: NAL_UNIT_SPS + { NON_VCL, NON_VCL }, // 8: NAL_UNIT_PPS + { NON_VCL, NON_VCL }, // 9: NAL_UNIT_AU_DELIMITER + { NON_VCL, NON_VCL }, // 10: NAL_UNIT_END_OF_SEQ + { NON_VCL, NON_VCL }, // 11: NAL_UNIT_END_OF_STR + { NON_VCL, NON_VCL }, // 12: NAL_UNIT_FILLER_DATA + { NON_VCL, NON_VCL }, // 13: NAL_UNIT_SPS_EXT + { NON_VCL, NON_VCL }, // 14: NAL_UNIT_PREFIX, NEED associate succeeded NAL to make a VCL + { NON_VCL, NON_VCL }, // 15: NAL_UNIT_SUBSET_SPS + { NON_VCL, NON_VCL }, // 16: NAL_UNIT_DEPTH_PARAM + { NON_VCL, NON_VCL }, // 17: NAL_UNIT_RESV_17 + { NON_VCL, NON_VCL }, // 18: NAL_UNIT_RESV_18 + { NON_VCL, NON_VCL }, // 19: NAL_UNIT_AUX_CODED_SLICE + { NON_VCL, VCL }, // 20: NAL_UNIT_CODED_SLICE_EXT + { NON_VCL, NON_VCL }, // 21: NAL_UNIT_MVC_SLICE_EXT + { NON_VCL, NON_VCL }, // 22: NAL_UNIT_RESV_22 + { NON_VCL, NON_VCL }, // 23: NAL_UNIT_RESV_23 + { NON_VCL, NON_VCL }, // 24: NAL_UNIT_UNSPEC_24 + { NON_VCL, NON_VCL }, // 25: NAL_UNIT_UNSPEC_25 + { NON_VCL, NON_VCL }, // 26: NAL_UNIT_UNSPEC_26 + { NON_VCL, NON_VCL }, // 27: NAL_UNIT_UNSPEC_27 + { NON_VCL, NON_VCL }, // 28: NAL_UNIT_UNSPEC_28 + { NON_VCL, NON_VCL }, // 29: NAL_UNIT_UNSPEC_29 + { NON_VCL, NON_VCL }, // 30: NAL_UNIT_UNSPEC_30 + { NON_VCL, NON_VCL } // 31: NAL_UNIT_UNSPEC_31 +}; +//default scaling list matrix value of 4x4 +const uint8_t g_kuiDequantScaling4x4Default[2][16]={ + { 6, 13, 20, 28, 13, 20, 28, 32, 20, 28, 32, 37, 28, 32, 37, 42 }, + { 10, 14, 20, 24, 14, 20, 24, 27, 20, 24, 27, 30, 24, 27, 30, 34 } +}; +//default scaling list matrix value of 8x8 +const uint8_t g_kuiDequantScaling8x8Default[2][64]={ + { 6, 10, 13, 16, 18, 23, 25, 27, 10, 11, 16, 18, 23, 25, 27, 29, + 13, 16, 18, 23, 25, 27, 29, 31, + 16, 18, 23, 25, 27, 29, 31, 33, + 18, 23, 25, 27, 29, 31, 33, 36, + 23, 25, 27, 29, 31, 33, 36, 38, + 25, 27, 29, 31, 33, 36, 38, 40, + 27, 29, 31, 33, 36, 38, 40, 42 }, + { 9, 13, 15, 17, 19, 21, 22, 24, + 13, 13, 17, 19, 21, 22, 24, 25, + 15, 17, 19, 21, 22, 24, 25, 27, + 17, 19, 21, 22, 24, 25, 27, 28, + 19, 21, 22, 24, 25, 27, 28, 30, + 21, 22, 24, 25, 27, 28, 30, 32, + 22, 24, 25, 27, 28, 30, 32, 33, + 24, 25, 27, 28, 30, 32, 33, 35 } +}; +ALIGNED_DECLARE (const uint16_t, g_kuiDequantCoeff[52][8], 16) = { + /* 0*/{ 10, 13, 10, 13, 13, 16, 13, 16 }, /* 1*/{ 11, 14, 11, 14, 14, 18, 14, 18 }, + /* 2*/{ 13, 16, 13, 16, 16, 20, 16, 20 }, /* 3*/{ 14, 18, 14, 18, 18, 23, 18, 23 }, + /* 4*/{ 16, 20, 16, 20, 20, 25, 20, 25 }, /* 5*/{ 18, 23, 18, 23, 23, 29, 23, 29 }, + /* 6*/{ 20, 26, 20, 26, 26, 32, 26, 32 }, /* 7*/{ 22, 28, 22, 28, 28, 36, 28, 36 }, + /* 8*/{ 26, 32, 26, 32, 32, 40, 32, 40 }, /* 9*/{ 28, 36, 28, 36, 36, 46, 36, 46 }, + /*10*/{ 32, 40, 32, 40, 40, 50, 40, 50 }, /*11*/{ 36, 46, 36, 46, 46, 58, 46, 58 }, + /*12*/{ 40, 52, 40, 52, 52, 64, 52, 64 }, /*13*/{ 44, 56, 44, 56, 56, 72, 56, 72 }, + /*14*/{ 52, 64, 52, 64, 64, 80, 64, 80 }, /*15*/{ 56, 72, 56, 72, 72, 92, 72, 92 }, + /*16*/{ 64, 80, 64, 80, 80, 100, 80, 100 }, /*17*/{ 72, 92, 72, 92, 92, 116, 92, 116 }, + /*18*/{ 80, 104, 80, 104, 104, 128, 104, 128 }, /*19*/{ 88, 112, 88, 112, 112, 144, 112, 144 }, + /*20*/{ 104, 128, 104, 128, 128, 160, 128, 160 }, /*21*/{ 112, 144, 112, 144, 144, 184, 144, 184 }, + /*22*/{ 128, 160, 128, 160, 160, 200, 160, 200 }, /*23*/{ 144, 184, 144, 184, 184, 232, 184, 232 }, + /*24*/{ 160, 208, 160, 208, 208, 256, 208, 256 }, /*25*/{ 176, 224, 176, 224, 224, 288, 224, 288 }, + /*26*/{ 208, 256, 208, 256, 256, 320, 256, 320 }, /*27*/{ 224, 288, 224, 288, 288, 368, 288, 368 }, + /*28*/{ 256, 320, 256, 320, 320, 400, 320, 400 }, /*29*/{ 288, 368, 288, 368, 368, 464, 368, 464 }, + /*30*/{ 320, 416, 320, 416, 416, 512, 416, 512 }, /*31*/{ 352, 448, 352, 448, 448, 576, 448, 576 }, + /*32*/{ 416, 512, 416, 512, 512, 640, 512, 640 }, /*33*/{ 448, 576, 448, 576, 576, 736, 576, 736 }, + /*34*/{ 512, 640, 512, 640, 640, 800, 640, 800 }, /*35*/{ 576, 736, 576, 736, 736, 928, 736, 928 }, + /*36*/{ 640, 832, 640, 832, 832, 1024, 832, 1024 }, /*37*/{ 704, 896, 704, 896, 896, 1152, 896, 1152 }, + /*38*/{ 832, 1024, 832, 1024, 1024, 1280, 1024, 1280 }, /*39*/{ 896, 1152, 896, 1152, 1152, 1472, 1152, 1472 }, + /*40*/{ 1024, 1280, 1024, 1280, 1280, 1600, 1280, 1600 }, /*41*/{ 1152, 1472, 1152, 1472, 1472, 1856, 1472, 1856 }, + /*42*/{ 1280, 1664, 1280, 1664, 1664, 2048, 1664, 2048 }, /*43*/{ 1408, 1792, 1408, 1792, 1792, 2304, 1792, 2304 }, + /*44*/{ 1664, 2048, 1664, 2048, 2048, 2560, 2048, 2560 }, /*45*/{ 1792, 2304, 1792, 2304, 2304, 2944, 2304, 2944 }, + /*46*/{ 2048, 2560, 2048, 2560, 2560, 3200, 2560, 3200 }, /*47*/{ 2304, 2944, 2304, 2944, 2944, 3712, 2944, 3712 }, + /*48*/{ 2560, 3328, 2560, 3328, 3328, 4096, 3328, 4096 }, /*49*/{ 2816, 3584, 2816, 3584, 3584, 4608, 3584, 4608 }, + /*50*/{ 3328, 4096, 3328, 4096, 4096, 5120, 4096, 5120 }, /*51*/{ 3584, 4608, 3584, 4608, 4608, 5888, 4608, 5888 }, +}; + +ALIGNED_DECLARE (const uint16_t, g_kuiDequantCoeff8x8[52][64], 16) = { +/* QP == 0 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 1 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 2 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 3 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 4 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 5 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 6 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 7 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 8 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 9 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 10 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 11 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 12 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 13 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 14 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 15 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 16 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 17 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 18 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 19 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 20 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 21 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 22 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 23 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 24 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 25 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 26 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 27 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 28 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 29 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 30 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 31 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 32 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 33 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 34 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 35 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 36 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 37 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 38 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 39 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 40 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 41 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 42 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 43 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 44 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 45 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +/* QP == 46 */ +{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 }, +/* QP == 47 */ +{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 }, +/* QP == 48 */ +{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 }, +/* QP == 49 */ +{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 }, +/* QP == 50 */ +{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 }, +/* QP == 51 */ +{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 }, +}; + +// table A-1 - Level limits +const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER] = { + {LEVEL_1_0, 1485, 99, 396, 64, 175, -256, 255, 2, 0x7fff}, /* level 1 */ + {LEVEL_1_B, 1485, 99, 396, 128, 350, -256, 255, 2, 0x7fff}, /* level 1.b */ + {LEVEL_1_1, 3000, 396, 900, 192, 500, -512, 511, 2, 0x7fff}, /* level 1.1 */ + {LEVEL_1_2, 6000, 396, 2376, 384, 1000, -512, 511, 2, 0x7fff}, /* level 1.2 */ + {LEVEL_1_3, 11880, 396, 2376, 768, 2000, -512, 511, 2, 0x7fff}, /* level 1.3 */ + + {LEVEL_2_0, 11880, 396, 2376, 2000, 2000, -512, 511, 2, 0x7fff}, /* level 2 */ + {LEVEL_2_1, 19800, 792, 4752, 4000, 4000, -1024, 1023, 2, 0x7fff}, /* level 2.1 */ + {LEVEL_2_2, 20250, 1620, 8100, 4000, 4000, -1024, 1023, 2, 0x7fff}, /* level 2.2 */ + + {LEVEL_3_0, 40500, 1620, 8100, 10000, 10000, -1024, 1023, 2, 32 }, /* level 3 */ + {LEVEL_3_1, 108000, 3600, 18000, 14000, 14000, -2048, 2047, 4, 16}, /* level 3.1 */ + {LEVEL_3_2, 216000, 5120, 20480, 20000, 20000, -2048, 2047, 4, 16}, /* level 3.2 */ + + {LEVEL_4_0, 245760, 8192, 32768, 20000, 25000, -2048, 2047, 4, 16}, /* level 4 */ + {LEVEL_4_1, 245760, 8192, 32768, 50000, 62500, -2048, 2047, 2, 16}, /* level 4.1 */ + {LEVEL_4_2, 522240, 8704, 34816, 50000, 62500, -2048, 2047, 2, 16}, /* level 4.2 */ + + {LEVEL_5_0, 589824, 22080, 110400, 135000, 135000, -2048, 2047, 2, 16}, /* level 5 */ + {LEVEL_5_1, 983040, 36864, 184320, 240000, 240000, -2048, 2047, 2, 16}, /* level 5.1 */ + {LEVEL_5_2, 2073600, 36864, 184320, 240000, 240000, -2048, 2047, 2, 16} /* level 5.2 */ +}; +const uint32_t g_kuiLevelMaps[LEVEL_NUMBER] = { + 10, 9, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52 +}; +//for cabac +/* this table is from Table9-12 to Table 9-24 */ + +const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2] = { + //0-10 Table 9-12 + {{20, -15}, {20, -15}, {20, -15}, {20, -15}}, + {{2, 54}, {2, 54}, {2, 54}, {2, 54}}, + {{3, 74}, {3, 74}, {3, 74}, {3, 74}}, + {{20, -15}, {20, -15}, {20, -15}, {20, -15}}, + {{2, 54}, {2, 54}, {2, 54}, {2, 54}}, + {{3, 74}, {3, 74}, {3, 74}, {3, 74}}, + {{ -28, 127}, { -28, 127}, { -28, 127}, { -28, 127}}, + {{ -23, 104}, { -23, 104}, { -23, 104}, { -23, 104}}, + {{ -6, 53}, { -6, 53}, { -6, 53}, { -6, 53}}, + {{ -1, 54}, { -1, 54}, { -1, 54}, { -1, 54}}, + {{7, 51}, {7, 51}, {7, 51}, {7, 51}}, + //11-23 Table 9-13 + {{CTX_NA, CTX_NA}, {23, 33}, {22, 25}, {29, 16}}, + {{CTX_NA, CTX_NA}, {23, 2}, {34, 0}, {25, 0}}, + {{CTX_NA, CTX_NA}, {21, 0}, {16, 0}, {14, 0}}, + {{CTX_NA, CTX_NA}, {1, 9}, { -2, 9}, { -10, 51}}, + {{CTX_NA, CTX_NA}, {0, 49}, {4, 41}, { -3, 62}}, + {{CTX_NA, CTX_NA}, { -37, 118}, { -29, 118}, { -27, 99}}, + {{CTX_NA, CTX_NA}, {5, 57}, {2, 65}, {26, 16}}, + {{CTX_NA, CTX_NA}, { -13, 78}, { -6, 71}, { -4, 85}}, + {{CTX_NA, CTX_NA}, { -11, 65}, { -13, 79}, { -24, 102}}, + {{CTX_NA, CTX_NA}, {1, 62}, {5, 52}, {5, 57}}, + {{CTX_NA, CTX_NA}, {12, 49}, {9, 50}, {6, 57}}, + {{CTX_NA, CTX_NA}, { -4, 73}, { -3, 70}, { -17, 73}}, + {{CTX_NA, CTX_NA}, {17, 50}, {10, 54}, {14, 57}}, + //24-39 Table9-14 + {{CTX_NA, CTX_NA}, {18, 64}, {26, 34}, {20, 40}}, + {{CTX_NA, CTX_NA}, {9, 43}, {19, 22}, {20, 10}}, + {{CTX_NA, CTX_NA}, {29, 0}, {40, 0}, {29, 0}}, + {{CTX_NA, CTX_NA}, {26, 67}, {57, 2}, {54, 0}}, + {{CTX_NA, CTX_NA}, {16, 90}, {41, 36}, {37, 42}}, + {{CTX_NA, CTX_NA}, {9, 104}, {26, 69}, {12, 97}}, + {{CTX_NA, CTX_NA}, { -46, 127}, { -45, 127}, { -32, 127}}, + {{CTX_NA, CTX_NA}, { -20, 104}, { -15, 101}, { -22, 117}}, + {{CTX_NA, CTX_NA}, {1, 67}, { -4, 76}, { -2, 74}}, + {{CTX_NA, CTX_NA}, { -13, 78}, { -6, 71}, { -4, 85}}, + {{CTX_NA, CTX_NA}, { -11, 65}, { -13, 79}, { -24, 102}}, + {{CTX_NA, CTX_NA}, {1, 62}, {5, 52}, {5, 57}}, + {{CTX_NA, CTX_NA}, { -6, 86}, {6, 69}, { -6, 93}}, + {{CTX_NA, CTX_NA}, { -17, 95}, { -13, 90}, { -14, 88}}, + {{CTX_NA, CTX_NA}, { -6, 61}, {0, 52}, { -6, 44}}, + {{CTX_NA, CTX_NA}, {9, 45}, {8, 43}, {4, 55}}, + //40-53 Table 9-15 + {{CTX_NA, CTX_NA}, { -3, 69}, { -2, 69}, { -11, 89}}, + {{CTX_NA, CTX_NA}, { -6, 81}, { -5, 82}, { -15, 103}}, + {{CTX_NA, CTX_NA}, { -11, 96}, { -10, 96}, { -21, 116}}, + {{CTX_NA, CTX_NA}, {6, 55}, {2, 59}, {19, 57}}, + {{CTX_NA, CTX_NA}, {7, 67}, {2, 75}, {20, 58}}, + {{CTX_NA, CTX_NA}, { -5, 86}, { -3, 87}, {4, 84}}, + {{CTX_NA, CTX_NA}, {2, 88}, { -3, 100}, {6, 96}}, + {{CTX_NA, CTX_NA}, {0, 58}, {1, 56}, {1, 63}}, + {{CTX_NA, CTX_NA}, { -3, 76}, { -3, 74}, { -5, 85}}, + {{CTX_NA, CTX_NA}, { -10, 94}, { -6, 85}, { -13, 106}}, + {{CTX_NA, CTX_NA}, {5, 54}, {0, 59}, {5, 63}}, + {{CTX_NA, CTX_NA}, {4, 69}, { -3, 81}, {6, 75}}, + {{CTX_NA, CTX_NA}, { -3, 81}, { -7, 86}, { -3, 90}}, + {{CTX_NA, CTX_NA}, {0, 88}, { -5, 95}, { -1, 101}}, + //54-59 Table 9-16 + {{CTX_NA, CTX_NA}, { -7, 67}, { -1, 66}, {3, 55}}, + {{CTX_NA, CTX_NA}, { -5, 74}, { -1, 77}, { -4, 79}}, + {{CTX_NA, CTX_NA}, { -4, 74}, {1, 70}, { -2, 75}}, + {{CTX_NA, CTX_NA}, { -5, 80}, { -2, 86}, { -12, 97}}, + {{CTX_NA, CTX_NA}, { -7, 72}, { -5, 72}, { -7, 50}}, + {{CTX_NA, CTX_NA}, {1, 58}, {0, 61}, {1, 60}}, + //60-69 Table 9-17 + {{0, 41}, {0, 41}, {0, 41}, {0, 41}}, + {{0, 63}, {0, 63}, {0, 63}, {0, 63}}, + {{0, 63}, {0, 63}, {0, 63}, {0, 63}}, + {{0, 63}, {0, 63}, {0, 63}, {0, 63}}, + {{ -9, 83}, { -9, 83}, { -9, 83}, { -9, 83}}, + {{4, 86}, {4, 86}, {4, 86}, {4, 86}}, + {{0, 97}, {0, 97}, {0, 97}, {0, 97}}, + {{ -7, 72}, { -7, 72}, { -7, 72}, { -7, 72}}, + {{13, 41}, {13, 41}, {13, 41}, {13, 41}}, + {{3, 62}, {3, 62}, {3, 62}, {3, 62}}, + //70-104 Table 9-18 + {{0, 11}, {0, 45}, {13, 15}, {7, 34}}, + {{1, 55}, { -4, 78}, {7, 51}, { -9, 88}}, + {{0, 69}, { -3, 96}, {2, 80}, { -20, 127}}, + {{ -17, 127}, { -27, 126}, { -39, 127}, { -36, 127}}, + {{ -13, 102}, { -28, 98}, { -18, 91}, { -17, 91}}, + {{0, 82}, { -25, 101}, { -17, 96}, { -14, 95}}, + {{ -7, 74}, { -23, 67}, { -26, 81}, { -25, 84}}, + {{ -21, 107}, { -28, 82}, { -35, 98}, { -25, 86}}, + {{ -27, 127}, { -20, 94}, { -24, 102}, { -12, 89}}, + {{ -31, 127}, { -16, 83}, { -23, 97}, { -17, 91}}, + {{ -24, 127}, { -22, 110}, { -27, 119}, { -31, 127}}, + {{ -18, 95}, { -21, 91}, { -24, 99}, { -14, 76}}, + {{ -27, 127}, { -18, 102}, { -21, 110}, { -18, 103}}, + {{ -21, 114}, { -13, 93}, { -18, 102}, { -13, 90}}, + {{ -30, 127}, { -29, 127}, { -36, 127}, { -37, 127}}, + {{ -17, 123}, { -7, 92}, {0, 80}, {11, 80}}, + {{ -12, 115}, { -5, 89}, { -5, 89}, {5, 76}}, + {{ -16, 122}, { -7, 96}, { -7, 94}, {2, 84}}, + {{ -11, 115}, { -13, 108}, { -4, 92}, {5, 78}}, + {{ -12, 63}, { -3, 46}, {0, 39}, { -6, 55}}, + {{ -2, 68}, { -1, 65}, {0, 65}, {4, 61}}, + {{ -15, 84}, { -1, 57}, { -15, 84}, { -14, 83}}, + {{ -13, 104}, { -9, 93}, { -35, 127}, { -37, 127}}, + {{ -3, 70}, { -3, 74}, { -2, 73}, { -5, 79}}, + {{ -8, 93}, { -9, 92}, { -12, 104}, { -11, 104}}, + {{ -10, 90}, { -8, 87}, { -9, 91}, { -11, 91}}, + {{ -30, 127}, { -23, 126}, { -31, 127}, { -30, 127}}, + {{ -1, 74}, {5, 54}, {3, 55}, {0, 65}}, + {{ -6, 97}, {6, 60}, {7, 56}, { -2, 79}}, + {{ -7, 91}, {6, 59}, {7, 55}, {0, 72}}, + {{ -20, 127}, {6, 69}, {8, 61}, { -4, 92}}, + {{ -4, 56}, { -1, 48}, { -3, 53}, { -6, 56}}, + {{ -5, 82}, {0, 68}, {0, 68}, {3, 68}}, + {{ -7, 76}, { -4, 69}, { -7, 74}, { -8, 71}}, + {{ -22, 125}, { -8, 88}, { -9, 88}, { -13, 98}}, + //105-165 Table 9-19 + {{ -7, 93}, { -2, 85}, { -13, 103}, { -4, 86}}, + {{ -11, 87}, { -6, 78}, { -13, 91}, { -12, 88}}, + {{ -3, 77}, { -1, 75}, { -9, 89}, { -5, 82}}, + {{ -5, 71}, { -7, 77}, { -14, 92}, { -3, 72}}, + {{ -4, 63}, {2, 54}, { -8, 76}, { -4, 67}}, + {{ -4, 68}, {5, 50}, { -12, 87}, { -8, 72}}, + {{ -12, 84}, { -3, 68}, { -23, 110}, { -16, 89}}, + {{ -7, 62}, {1, 50}, { -24, 105}, { -9, 69}}, + {{ -7, 65}, {6, 42}, { -10, 78}, { -1, 59}}, + {{8, 61}, { -4, 81}, { -20, 112}, {5, 66}}, + {{5, 56}, {1, 63}, { -17, 99}, {4, 57}}, + {{ -2, 66}, { -4, 70}, { -78, 127}, { -4, 71}}, + {{1, 64}, {0, 67}, { -70, 127}, { -2, 71}}, + {{0, 61}, {2, 57}, { -50, 127}, {2, 58}}, + {{ -2, 78}, { -2, 76}, { -46, 127}, { -1, 74}}, + {{1, 50}, {11, 35}, { -4, 66}, { -4, 44}}, + {{7, 52}, {4, 64}, { -5, 78}, { -1, 69}}, + {{10, 35}, {1, 61}, { -4, 71}, {0, 62}}, + {{0, 44}, {11, 35}, { -8, 72}, { -7, 51}}, + {{11, 38}, {18, 25}, {2, 59}, { -4, 47}}, + {{1, 45}, {12, 24}, { -1, 55}, { -6, 42}}, + {{0, 46}, {13, 29}, { -7, 70}, { -3, 41}}, + {{5, 44}, {13, 36}, { -6, 75}, { -6, 53}}, + {{31, 17}, { -10, 93}, { -8, 89}, {8, 76}}, + {{1, 51}, { -7, 73}, { -34, 119}, { -9, 78}}, + {{7, 50}, { -2, 73}, { -3, 75}, { -11, 83}}, + {{28, 19}, {13, 46}, {32, 20}, {9, 52}}, + {{16, 33}, {9, 49}, {30, 22}, {0, 67}}, + {{14, 62}, { -7, 100}, { -44, 127}, { -5, 90}}, + {{ -13, 108}, {9, 53}, {0, 54}, {1, 67}}, + {{ -15, 100}, {2, 53}, { -5, 61}, { -15, 72}}, + {{ -13, 101}, {5, 53}, {0, 58}, { -5, 75}}, + {{ -13, 91}, { -2, 61}, { -1, 60}, { -8, 80}}, + {{ -12, 94}, {0, 56}, { -3, 61}, { -21, 83}}, + {{ -10, 88}, {0, 56}, { -8, 67}, { -21, 64}}, + {{ -16, 84}, { -13, 63}, { -25, 84}, { -13, 31}}, + {{ -10, 86}, { -5, 60}, { -14, 74}, { -25, 64}}, + {{ -7, 83}, { -1, 62}, { -5, 65}, { -29, 94}}, + {{ -13, 87}, {4, 57}, {5, 52}, {9, 75}}, + {{ -19, 94}, { -6, 69}, {2, 57}, {17, 63}}, + {{1, 70}, {4, 57}, {0, 61}, { -8, 74}}, + {{0, 72}, {14, 39}, { -9, 69}, { -5, 35}}, + {{ -5, 74}, {4, 51}, { -11, 70}, { -2, 27}}, + {{18, 59}, {13, 68}, {18, 55}, {13, 91}}, + {{ -8, 102}, {3, 64}, { -4, 71}, {3, 65}}, + {{ -15, 100}, {1, 61}, {0, 58}, { -7, 69}}, + {{0, 95}, {9, 63}, {7, 61}, {8, 77}}, + {{ -4, 75}, {7, 50}, {9, 41}, { -10, 66}}, + {{2, 72}, {16, 39}, {18, 25}, {3, 62}}, + {{ -11, 75}, {5, 44}, {9, 32}, { -3, 68}}, + {{ -3, 71}, {4, 52}, {5, 43}, { -20, 81}}, + {{15, 46}, {11, 48}, {9, 47}, {0, 30}}, + {{ -13, 69}, { -5, 60}, {0, 44}, {1, 7}}, + {{0, 62}, { -1, 59}, {0, 51}, { -3, 23}}, + {{0, 65}, {0, 59}, {2, 46}, { -21, 74}}, + {{21, 37}, {22, 33}, {19, 38}, {16, 66}}, + {{ -15, 72}, {5, 44}, { -4, 66}, { -23, 124}}, + {{9, 57}, {14, 43}, {15, 38}, {17, 37}}, + {{16, 54}, { -1, 78}, {12, 42}, {44, -18}}, + {{0, 62}, {0, 60}, {9, 34}, {50, -34}}, + {{12, 72}, {9, 69}, {0, 89}, { -22, 127}}, + //166-226 Table 9-20 + {{24, 0}, {11, 28}, {4, 45}, {4, 39}}, + {{15, 9}, {2, 40}, {10, 28}, {0, 42}}, + {{8, 25}, {3, 44}, {10, 31}, {7, 34}}, + {{13, 18}, {0, 49}, {33, -11}, {11, 29}}, + {{15, 9}, {0, 46}, {52, -43}, {8, 31}}, + {{13, 19}, {2, 44}, {18, 15}, {6, 37}}, + {{10, 37}, {2, 51}, {28, 0}, {7, 42}}, + {{12, 18}, {0, 47}, {35, -22}, {3, 40}}, + {{6, 29}, {4, 39}, {38, -25}, {8, 33}}, + {{20, 33}, {2, 62}, {34, 0}, {13, 43}}, + {{15, 30}, {6, 46}, {39, -18}, {13, 36}}, + {{4, 45}, {0, 54}, {32, -12}, {4, 47}}, + {{1, 58}, {3, 54}, {102, -94}, {3, 55}}, + {{0, 62}, {2, 58}, {0, 0}, {2, 58}}, + {{7, 61}, {4, 63}, {56, -15}, {6, 60}}, + {{12, 38}, {6, 51}, {33, -4}, {8, 44}}, + {{11, 45}, {6, 57}, {29, 10}, {11, 44}}, + {{15, 39}, {7, 53}, {37, -5}, {14, 42}}, + {{11, 42}, {6, 52}, {51, -29}, {7, 48}}, + {{13, 44}, {6, 55}, {39, -9}, {4, 56}}, + {{16, 45}, {11, 45}, {52, -34}, {4, 52}}, + {{12, 41}, {14, 36}, {69, -58}, {13, 37}}, + {{10, 49}, {8, 53}, {67, -63}, {9, 49}}, + {{30, 34}, { -1, 82}, {44, -5}, {19, 58}}, + {{18, 42}, {7, 55}, {32, 7}, {10, 48}}, + {{10, 55}, { -3, 78}, {55, -29}, {12, 45}}, + {{17, 51}, {15, 46}, {32, 1}, {0, 69}}, + {{17, 46}, {22, 31}, {0, 0}, {20, 33}}, + {{0, 89}, { -1, 84}, {27, 36}, {8, 63}}, + {{26, -19}, {25, 7}, {33, -25}, {35, -18}}, + {{22, -17}, {30, -7}, {34, -30}, {33, -25}}, + {{26, -17}, {28, 3}, {36, -28}, {28, -3}}, + {{30, -25}, {28, 4}, {38, -28}, {24, 10}}, + {{28, -20}, {32, 0}, {38, -27}, {27, 0}}, + {{33, -23}, {34, -1}, {34, -18}, {34, -14}}, + {{37, -27}, {30, 6}, {35, -16}, {52, -44}}, + {{33, -23}, {30, 6}, {34, -14}, {39, -24}}, + {{40, -28}, {32, 9}, {32, -8}, {19, 17}}, + {{38, -17}, {31, 19}, {37, -6}, {31, 25}}, + {{33, -11}, {26, 27}, {35, 0}, {36, 29}}, + {{40, -15}, {26, 30}, {30, 10}, {24, 33}}, + {{41, -6}, {37, 20}, {28, 18}, {34, 15}}, + {{38, 1}, {28, 34}, {26, 25}, {30, 20}}, + {{41, 17}, {17, 70}, {29, 41}, {22, 73}}, + {{30, -6}, {1, 67}, {0, 75}, {20, 34}}, + {{27, 3}, {5, 59}, {2, 72}, {19, 31}}, + {{26, 22}, {9, 67}, {8, 77}, {27, 44}}, + {{37, -16}, {16, 30}, {14, 35}, {19, 16}}, + {{35, -4}, {18, 32}, {18, 31}, {15, 36}}, + {{38, -8}, {18, 35}, {17, 35}, {15, 36}}, + {{38, -3}, {22, 29}, {21, 30}, {21, 28}}, + {{37, 3}, {24, 31}, {17, 45}, {25, 21}}, + {{38, 5}, {23, 38}, {20, 42}, {30, 20}}, + {{42, 0}, {18, 43}, {18, 45}, {31, 12}}, + {{35, 16}, {20, 41}, {27, 26}, {27, 16}}, + {{39, 22}, {11, 63}, {16, 54}, {24, 42}}, + {{14, 48}, {9, 59}, {7, 66}, {0, 93}}, + {{27, 37}, {9, 64}, {16, 56}, {14, 56}}, + {{21, 60}, { -1, 94}, {11, 73}, {15, 57}}, + {{12, 68}, { -2, 89}, {10, 67}, {26, 38}}, + {{2, 97}, { -9, 108}, { -10, 116}, { -24, 127}}, + //227-275 Table 9-21 + {{ -3, 71}, { -6, 76}, { -23, 112}, { -24, 115}}, + {{ -6, 42}, { -2, 44}, { -15, 71}, { -22, 82}}, + {{ -5, 50}, {0, 45}, { -7, 61}, { -9, 62}}, + {{ -3, 54}, {0, 52}, {0, 53}, {0, 53}}, + {{ -2, 62}, { -3, 64}, { -5, 66}, {0, 59}}, + {{0, 58}, { -2, 59}, { -11, 77}, { -14, 85}}, + {{1, 63}, { -4, 70}, { -9, 80}, { -13, 89}}, + {{ -2, 72}, { -4, 75}, { -9, 84}, { -13, 94}}, + {{ -1, 74}, { -8, 82}, { -10, 87}, { -11, 92}}, + {{ -9, 91}, { -17, 102}, { -34, 127}, { -29, 127}}, + {{ -5, 67}, { -9, 77}, { -21, 101}, { -21, 100}}, + {{ -5, 27}, {3, 24}, { -3, 39}, { -14, 57}}, + {{ -3, 39}, {0, 42}, { -5, 53}, { -12, 67}}, + {{ -2, 44}, {0, 48}, { -7, 61}, { -11, 71}}, + {{0, 46}, {0, 55}, { -11, 75}, { -10, 77}}, + {{ -16, 64}, { -6, 59}, { -15, 77}, { -21, 85}}, + {{ -8, 68}, { -7, 71}, { -17, 91}, { -16, 88}}, + {{ -10, 78}, { -12, 83}, { -25, 107}, { -23, 104}}, + {{ -6, 77}, { -11, 87}, { -25, 111}, { -15, 98}}, + {{ -10, 86}, { -30, 119}, { -28, 122}, { -37, 127}}, + {{ -12, 92}, {1, 58}, { -11, 76}, { -10, 82}}, + {{ -15, 55}, { -3, 29}, { -10, 44}, { -8, 48}}, + {{ -10, 60}, { -1, 36}, { -10, 52}, { -8, 61}}, + {{ -6, 62}, {1, 38}, { -10, 57}, { -8, 66}}, + {{ -4, 65}, {2, 43}, { -9, 58}, { -7, 70}}, + {{ -12, 73}, { -6, 55}, { -16, 72}, { -14, 75}}, + {{ -8, 76}, {0, 58}, { -7, 69}, { -10, 79}}, + {{ -7, 80}, {0, 64}, { -4, 69}, { -9, 83}}, + {{ -9, 88}, { -3, 74}, { -5, 74}, { -12, 92}}, + {{ -17, 110}, { -10, 90}, { -9, 86}, { -18, 108}}, + {{ -11, 97}, {0, 70}, {2, 66}, { -4, 79}}, + {{ -20, 84}, { -4, 29}, { -9, 34}, { -22, 69}}, + {{ -11, 79}, {5, 31}, {1, 32}, { -16, 75}}, + {{ -6, 73}, {7, 42}, {11, 31}, { -2, 58}}, + {{ -4, 74}, {1, 59}, {5, 52}, {1, 58}}, + {{ -13, 86}, { -2, 58}, { -2, 55}, { -13, 78}}, + {{ -13, 96}, { -3, 72}, { -2, 67}, { -9, 83}}, + {{ -11, 97}, { -3, 81}, {0, 73}, { -4, 81}}, + {{ -19, 117}, { -11, 97}, { -8, 89}, { -13, 99}}, + {{ -8, 78}, {0, 58}, {3, 52}, { -13, 81}}, + {{ -5, 33}, {8, 5}, {7, 4}, { -6, 38}}, + {{ -4, 48}, {10, 14}, {10, 8}, { -13, 62}}, + {{ -2, 53}, {14, 18}, {17, 8}, { -6, 58}}, + {{ -3, 62}, {13, 27}, {16, 19}, { -2, 59}}, + {{ -13, 71}, {2, 40}, {3, 37}, { -16, 73}}, + {{ -10, 79}, {0, 58}, { -1, 61}, { -10, 76}}, + {{ -12, 86}, { -3, 70}, { -5, 73}, { -13, 86}}, + {{ -13, 90}, { -6, 79}, { -1, 70}, { -9, 83}}, + {{ -14, 97}, { -8, 85}, { -4, 78}, { -10, 87}}, + //276 no use + {{CTX_NA, CTX_NA}, {CTX_NA, CTX_NA}, {CTX_NA, CTX_NA}, {CTX_NA, CTX_NA}}, + //277-337 Table 9-22 + {{ -6, 93}, { -13, 106}, { -21, 126}, { -22, 127}}, + {{ -6, 84}, { -16, 106}, { -23, 124}, { -25, 127}}, + {{ -8, 79}, { -10, 87}, { -20, 110}, { -25, 120}}, + {{0, 66}, { -21, 114}, { -26, 126}, { -27, 127}}, + {{ -1, 71}, { -18, 110}, { -25, 124}, { -19, 114}}, + {{0, 62}, { -14, 98}, { -17, 105}, { -23, 117}}, + {{ -2, 60}, { -22, 110}, { -27, 121}, { -25, 118}}, + {{ -2, 59}, { -21, 106}, { -27, 117}, { -26, 117}}, + {{ -5, 75}, { -18, 103}, { -17, 102}, { -24, 113}}, + {{ -3, 62}, { -21, 107}, { -26, 117}, { -28, 118}}, + {{ -4, 58}, { -23, 108}, { -27, 116}, { -31, 120}}, + {{ -9, 66}, { -26, 112}, { -33, 122}, { -37, 124}}, + {{ -1, 79}, { -10, 96}, { -10, 95}, { -10, 94}}, + {{0, 71}, { -12, 95}, { -14, 100}, { -15, 102}}, + {{3, 68}, { -5, 91}, { -8, 95}, { -10, 99}}, + {{10, 44}, { -9, 93}, { -17, 111}, { -13, 106}}, + {{ -7, 62}, { -22, 94}, { -28, 114}, { -50, 127}}, + {{15, 36}, { -5, 86}, { -6, 89}, { -5, 92}}, + {{14, 40}, {9, 67}, { -2, 80}, {17, 57}}, + {{16, 27}, { -4, 80}, { -4, 82}, { -5, 86}}, + {{12, 29}, { -10, 85}, { -9, 85}, { -13, 94}}, + {{1, 44}, { -1, 70}, { -8, 81}, { -12, 91}}, + {{20, 36}, {7, 60}, { -1, 72}, { -2, 77}}, + {{18, 32}, {9, 58}, {5, 64}, {0, 71}}, + {{5, 42}, {5, 61}, {1, 67}, { -1, 73}}, + {{1, 48}, {12, 50}, {9, 56}, {4, 64}}, + {{10, 62}, {15, 50}, {0, 69}, { -7, 81}}, + {{17, 46}, {18, 49}, {1, 69}, {5, 64}}, + {{9, 64}, {17, 54}, {7, 69}, {15, 57}}, + {{ -12, 104}, {10, 41}, { -7, 69}, {1, 67}}, + {{ -11, 97}, {7, 46}, { -6, 67}, {0, 68}}, + {{ -16, 96}, { -1, 51}, { -16, 77}, { -10, 67}}, + {{ -7, 88}, {7, 49}, { -2, 64}, {1, 68}}, + {{ -8, 85}, {8, 52}, {2, 61}, {0, 77}}, + {{ -7, 85}, {9, 41}, { -6, 67}, {2, 64}}, + {{ -9, 85}, {6, 47}, { -3, 64}, {0, 68}}, + {{ -13, 88}, {2, 55}, {2, 57}, { -5, 78}}, + {{4, 66}, {13, 41}, { -3, 65}, {7, 55}}, + {{ -3, 77}, {10, 44}, { -3, 66}, {5, 59}}, + {{ -3, 76}, {6, 50}, {0, 62}, {2, 65}}, + {{ -6, 76}, {5, 53}, {9, 51}, {14, 54}}, + {{10, 58}, {13, 49}, { -1, 66}, {15, 44}}, + {{ -1, 76}, {4, 63}, { -2, 71}, {5, 60}}, + {{ -1, 83}, {6, 64}, { -2, 75}, {2, 70}}, + {{ -7, 99}, { -2, 69}, { -1, 70}, { -2, 76}}, + {{ -14, 95}, { -2, 59}, { -9, 72}, { -18, 86}}, + {{2, 95}, {6, 70}, {14, 60}, {12, 70}}, + {{0, 76}, {10, 44}, {16, 37}, {5, 64}}, + {{ -5, 74}, {9, 31}, {0, 47}, { -12, 70}}, + {{0, 70}, {12, 43}, {18, 35}, {11, 55}}, + {{ -11, 75}, {3, 53}, {11, 37}, {5, 56}}, + {{1, 68}, {14, 34}, {12, 41}, {0, 69}}, + {{0, 65}, {10, 38}, {10, 41}, {2, 65}}, + {{ -14, 73}, { -3, 52}, {2, 48}, { -6, 74}}, + {{3, 62}, {13, 40}, {12, 41}, {5, 54}}, + {{4, 62}, {17, 32}, {13, 41}, {7, 54}}, + {{ -1, 68}, {7, 44}, {0, 59}, { -6, 76}}, + {{ -13, 75}, {7, 38}, {3, 50}, { -11, 82}}, + {{11, 55}, {13, 50}, {19, 40}, { -2, 77}}, + {{5, 64}, {10, 57}, {3, 66}, { -2, 77}}, + {{12, 70}, {26, 43}, {18, 50}, {25, 42}}, + //338-398 Table9-23 + {{15, 6}, {14, 11}, {19, -6}, {17, -13}}, + {{6, 19}, {11, 14}, {18, -6}, {16, -9}}, + {{7, 16}, {9, 11}, {14, 0}, {17, -12}}, + {{12, 14}, {18, 11}, {26, -12}, {27, -21}}, + {{18, 13}, {21, 9}, {31, -16}, {37, -30}}, + {{13, 11}, {23, -2}, {33, -25}, {41, -40}}, + {{13, 15}, {32, -15}, {33, -22}, {42, -41}}, + {{15, 16}, {32, -15}, {37, -28}, {48, -47}}, + {{12, 23}, {34, -21}, {39, -30}, {39, -32}}, + {{13, 23}, {39, -23}, {42, -30}, {46, -40}}, + {{15, 20}, {42, -33}, {47, -42}, {52, -51}}, + {{14, 26}, {41, -31}, {45, -36}, {46, -41}}, + {{14, 44}, {46, -28}, {49, -34}, {52, -39}}, + {{17, 40}, {38, -12}, {41, -17}, {43, -19}}, + {{17, 47}, {21, 29}, {32, 9}, {32, 11}}, + {{24, 17}, {45, -24}, {69, -71}, {61, -55}}, + {{21, 21}, {53, -45}, {63, -63}, {56, -46}}, + {{25, 22}, {48, -26}, {66, -64}, {62, -50}}, + {{31, 27}, {65, -43}, {77, -74}, {81, -67}}, + {{22, 29}, {43, -19}, {54, -39}, {45, -20}}, + {{19, 35}, {39, -10}, {52, -35}, {35, -2}}, + {{14, 50}, {30, 9}, {41, -10}, {28, 15}}, + {{10, 57}, {18, 26}, {36, 0}, {34, 1}}, + {{7, 63}, {20, 27}, {40, -1}, {39, 1}}, + {{ -2, 77}, {0, 57}, {30, 14}, {30, 17}}, + {{ -4, 82}, { -14, 82}, {28, 26}, {20, 38}}, + {{ -3, 94}, { -5, 75}, {23, 37}, {18, 45}}, + {{9, 69}, { -19, 97}, {12, 55}, {15, 54}}, + {{ -12, 109}, { -35, 125}, {11, 65}, {0, 79}}, + {{36, -35}, {27, 0}, {37, -33}, {36, -16}}, + {{36, -34}, {28, 0}, {39, -36}, {37, -14}}, + {{32, -26}, {31, -4}, {40, -37}, {37, -17}}, + {{37, -30}, {27, 6}, {38, -30}, {32, 1}}, + {{44, -32}, {34, 8}, {46, -33}, {34, 15}}, + {{34, -18}, {30, 10}, {42, -30}, {29, 15}}, + {{34, -15}, {24, 22}, {40, -24}, {24, 25}}, + {{40, -15}, {33, 19}, {49, -29}, {34, 22}}, + {{33, -7}, {22, 32}, {38, -12}, {31, 16}}, + {{35, -5}, {26, 31}, {40, -10}, {35, 18}}, + {{33, 0}, {21, 41}, {38, -3}, {31, 28}}, + {{38, 2}, {26, 44}, {46, -5}, {33, 41}}, + {{33, 13}, {23, 47}, {31, 20}, {36, 28}}, + {{23, 35}, {16, 65}, {29, 30}, {27, 47}}, + {{13, 58}, {14, 71}, {25, 44}, {21, 62}}, + {{29, -3}, {8, 60}, {12, 48}, {18, 31}}, + {{26, 0}, {6, 63}, {11, 49}, {19, 26}}, + {{22, 30}, {17, 65}, {26, 45}, {36, 24}}, + {{31, -7}, {21, 24}, {22, 22}, {24, 23}}, + {{35, -15}, {23, 20}, {23, 22}, {27, 16}}, + {{34, -3}, {26, 23}, {27, 21}, {24, 30}}, + {{34, 3}, {27, 32}, {33, 20}, {31, 29}}, + {{36, -1}, {28, 23}, {26, 28}, {22, 41}}, + {{34, 5}, {28, 24}, {30, 24}, {22, 42}}, + {{32, 11}, {23, 40}, {27, 34}, {16, 60}}, + {{35, 5}, {24, 32}, {18, 42}, {15, 52}}, + {{34, 12}, {28, 29}, {25, 39}, {14, 60}}, + {{39, 11}, {23, 42}, {18, 50}, {3, 78}}, + {{30, 29}, {19, 57}, {12, 70}, { -16, 123}}, + {{34, 26}, {22, 53}, {21, 54}, {21, 53}}, + {{29, 39}, {22, 61}, {14, 71}, {22, 56}}, + {{19, 66}, {11, 86}, {11, 83}, {25, 61}}, + {{31, 21}, {12, 40}, {25, 32}, {21, 33}}, + {{31, 31}, {11, 51}, {21, 49}, {19, 50}}, + {{25, 50}, {14, 59}, {21, 54}, {17, 61}}, + //402-459 Table 9-24 + {{ -17, 120}, { -4, 79}, { -5, 85}, { -3, 78}}, + {{ -20, 112}, { -7, 71}, { -6, 81}, { -8, 74}}, + {{ -18, 114}, { -5, 69}, { -10, 77}, { -9, 72}}, + {{ -11, 85}, { -9, 70}, { -7, 81}, { -10, 72}}, + {{ -15, 92}, { -8, 66}, { -17, 80}, { -18, 75}}, + {{ -14, 89}, { -10, 68}, { -18, 73}, { -12, 71}}, + {{ -26, 71}, { -19, 73}, { -4, 74}, { -11, 63}}, + {{ -15, 81}, { -12, 69}, { -10, 83}, { -5, 70}}, + {{ -14, 80}, { -16, 70}, { -9, 71}, { -17, 75}}, + {{0, 68}, { -15, 67}, { -9, 67}, { -14, 72}}, + {{ -14, 70}, { -20, 62}, { -1, 61}, { -16, 67}}, + {{ -24, 56}, { -19, 70}, { -8, 66}, { -8, 53}}, + {{ -23, 68}, { -16, 66}, { -14, 66}, { -14, 59}}, + {{ -24, 50}, { -22, 65}, {0, 59}, { -9, 52}}, + {{ -11, 74}, { -20, 63}, {2, 59}, { -11, 68}}, + {{23, -13}, {9, -2}, {17, -10}, {9, -2}}, + {{26, -13}, {26, -9}, {32, -13}, {30, -10}}, + {{40, -15}, {33, -9}, {42, -9}, {31, -4}}, + {{49, -14}, {39, -7}, {49, -5}, {33, -1}}, + {{44, 3}, {41, -2}, {53, 0}, {33, 7}}, + {{45, 6}, {45, 3}, {64, 3}, {31, 12}}, + {{44, 34}, {49, 9}, {68, 10}, {37, 23}}, + {{33, 54}, {45, 27}, {66, 27}, {31, 38}}, + {{19, 82}, {36, 59}, {47, 57}, {20, 64}}, + {{ -3, 75}, { -6, 66}, { -5, 71}, { -9, 71}}, + {{ -1, 23}, { -7, 35}, {0, 24}, { -7, 37}}, + {{1, 34}, { -7, 42}, { -1, 36}, { -8, 44}}, + {{1, 43}, { -8, 45}, { -2, 42}, { -11, 49}}, + {{0, 54}, { -5, 48}, { -2, 52}, { -10, 56}}, + {{ -2, 55}, { -12, 56}, { -9, 57}, { -12, 59}}, + {{0, 61}, { -6, 60}, { -6, 63}, { -8, 63}}, + {{1, 64}, { -5, 62}, { -4, 65}, { -9, 67}}, + {{0, 68}, { -8, 66}, { -4, 67}, { -6, 68}}, + {{ -9, 92}, { -8, 76}, { -7, 82}, { -10, 79}}, + {{ -14, 106}, { -5, 85}, { -3, 81}, { -3, 78}}, + {{ -13, 97}, { -6, 81}, { -3, 76}, { -8, 74}}, + {{ -15, 90}, { -10, 77}, { -7, 72}, { -9, 72}}, + {{ -12, 90}, { -7, 81}, { -6, 78}, { -10, 72}}, + {{ -18, 88}, { -17, 80}, { -12, 72}, { -18, 75}}, + {{ -10, 73}, { -18, 73}, { -14, 68}, { -12, 71}}, + {{ -9, 79}, { -4, 74}, { -3, 70}, { -11, 63}}, + {{ -14, 86}, { -10, 83}, { -6, 76}, { -5, 70}}, + {{ -10, 73}, { -9, 71}, { -5, 66}, { -17, 75}}, + {{ -10, 70}, { -9, 67}, { -5, 62}, { -14, 72}}, + {{ -10, 69}, { -1, 61}, {0, 57}, { -16, 67}}, + {{ -5, 66}, { -8, 66}, { -4, 61}, { -8, 53}}, + {{ -9, 64}, { -14, 66}, { -9, 60}, { -14, 59}}, + {{ -5, 58}, {0, 59}, {1, 54}, { -9, 52}}, + {{2, 59}, {2, 59}, {2, 58}, { -11, 68}}, + {{21, -10}, {21, -13}, {17, -10}, {9, -2}}, + {{24, -11}, {33, -14}, {32, -13}, {30, -10}}, + {{28, -8}, {39, -7}, {42, -9}, {31, -4}}, + {{28, -1}, {46, -2}, {49, -5}, {33, -1}}, + {{29, 3}, {51, 2}, {53, 0}, {33, 7}}, + {{29, 9}, {60, 6}, {64, 3}, {31, 12}}, + {{35, 20}, {61, 17}, {68, 10}, {37, 23}}, + {{29, 36}, {55, 34}, {66, 27}, {31, 38}}, + {{14, 67}, {42, 62}, {47, 57}, {20, 64}}, +}; + +/*Table 9-44 – Specification of rangeTabLPS depending on pStateIdx and qCodIRangeIdx */ + +const uint8_t g_kuiCabacRangeLps[64][4] = { + { 128, 176, 208, 240}, { 128, 167, 197, 227}, { 128, 158, 187, 216}, { 123, 150, 178, 205}, { 116, 142, 169, 195}, { 111, 135, 160, 185}, { 105, 128, 152, 175}, { 100, 122, 144, 166}, + { 95, 116, 137, 158}, { 90, 110, 130, 150}, { 85, 104, 123, 142}, { 81, 99, 117, 135}, { 77, 94, 111, 128}, { 73, 89, 105, 122}, { 69, 85, 100, 116}, { 66, 80, 95, 110}, + { 62, 76, 90, 104}, { 59, 72, 86, 99}, { 56, 69, 81, 94}, { 53, 65, 77, 89}, { 51, 62, 73, 85}, { 48, 59, 69, 80}, { 46, 56, 66, 76}, { 43, 53, 63, 72}, + { 41, 50, 59, 69}, { 39, 48, 56, 65}, { 37, 45, 54, 62}, { 35, 43, 51, 59}, { 33, 41, 48, 56}, { 32, 39, 46, 53}, { 30, 37, 43, 50}, { 29, 35, 41, 48}, + { 27, 33, 39, 45}, { 26, 31, 37, 43}, { 24, 30, 35, 41}, { 23, 28, 33, 39}, { 22, 27, 32, 37}, { 21, 26, 30, 35}, { 20, 24, 29, 33}, { 19, 23, 27, 31}, + { 18, 22, 26, 30}, { 17, 21, 25, 28}, { 16, 20, 23, 27}, { 15, 19, 22, 25}, { 14, 18, 21, 24}, { 14, 17, 20, 23}, { 13, 16, 19, 22}, { 12, 15, 18, 21}, + { 12, 14, 17, 20}, { 11, 14, 16, 19}, { 11, 13, 15, 18}, { 10, 12, 15, 17}, { 10, 12, 14, 16}, { 9, 11, 13, 15}, { 9, 11, 12, 14}, { 8, 10, 12, 14}, + { 8, 9, 11, 13}, { 7, 9, 11, 12}, { 7, 9, 10, 12}, { 7, 8, 10, 11}, { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2} +}; + +/*Table 9-45 – State transition table*/ + +const uint8_t g_kuiStateTransTable[64][2] = { + + {0, 1}, {0, 2}, {1, 3}, {2, 4}, {2, 5}, {4, 6}, {4, 7}, {5, 8}, {6, 9}, {7, 10}, + + {8, 11}, {9, 12}, {9, 13}, {11, 14}, {11, 15}, {12, 16}, {13, 17}, {13, 18}, {15, 19}, {15, 20}, + + {16, 21}, {16, 22}, {18, 23}, {18, 24}, {19, 25}, {19, 26}, {21, 27}, {21, 28}, {22, 29}, {22, 30}, + + {23, 31}, {24, 32}, {24, 33}, {25, 34}, {26, 35}, {26, 36}, {27, 37}, {27, 38}, {28, 39}, {29, 40}, + + {29, 41}, {30, 42}, {30, 43}, {30, 44}, {31, 45}, {32, 46}, {32, 47}, {33, 48}, {33, 49}, {33, 50}, + + {34, 51}, {34, 52}, {35, 53}, {35, 54}, {35, 55}, {36, 56}, {36, 57}, {36, 58}, {37, 59}, {37, 60}, + + {37, 61}, {38, 62}, {38, 62}, {63, 63} + +}; + +// extern at svc_enc_golomb.h, golomb_common.h + +const uint32_t g_kuiGolombUELength[256] = { + 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, //14 + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //30 + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,//46 + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,//62 + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,// + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 17 +}; +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/copy_mb.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/copy_mb.cpp new file mode 100644 index 000000000..96b2099d0 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/copy_mb.cpp @@ -0,0 +1,111 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file copy_mb.cpp + * + * \brief copy MB YUV data + * + * \date 2014.04.14 Created + * + ************************************************************************************* + */ + +#include "copy_mb.h" +#include "macros.h" +#include "ls_defines.h" + +/**************************************************************************** + * Copy functions + ****************************************************************************/ +void WelsCopy4x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + const int32_t kiSrcStride2 = iStrideS << 1; + const int32_t kiSrcStride3 = iStrideS + kiSrcStride2; + const int32_t kiDstStride2 = iStrideD << 1; + const int32_t kiDstStride3 = iStrideD + kiDstStride2; + + ST32 (pDst, LD32 (pSrc)); + ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS)); + ST32 (pDst + kiDstStride2, LD32 (pSrc + kiSrcStride2)); + ST32 (pDst + kiDstStride3, LD32 (pSrc + kiSrcStride3)); +} +void WelsCopy8x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + WelsCopy4x4_c (pDst, iStrideD, pSrc, iStrideS); + WelsCopy4x4_c (pDst + 4, iStrideD, pSrc + 4, iStrideS); +} +void WelsCopy4x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + WelsCopy4x4_c (pDst, iStrideD, pSrc, iStrideS); + WelsCopy4x4_c (pDst + (iStrideD << 2), iStrideD, pSrc + (iStrideS << 2), iStrideS); +} +void WelsCopy8x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + int32_t i; + for (i = 0; i < 4; i++) { + ST32 (pDst, LD32 (pSrc)); + ST32 (pDst + 4 , LD32 (pSrc + 4)); + ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS)); + ST32 (pDst + iStrideD + 4 , LD32 (pSrc + iStrideS + 4)); + pDst += iStrideD << 1; + pSrc += iStrideS << 1; + } +} +void WelsCopy8x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + int32_t i; + for (i = 0; i < 8; ++i) { + ST32 (pDst, LD32 (pSrc)); + ST32 (pDst + 4 , LD32 (pSrc + 4)); + ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS)); + ST32 (pDst + iStrideD + 4 , LD32 (pSrc + iStrideS + 4)); + pDst += iStrideD << 1; + pSrc += iStrideS << 1; + } +} +void WelsCopy16x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + int32_t i; + for (i = 0; i < 8; i++) { + ST32 (pDst, LD32 (pSrc)); + ST32 (pDst + 4 , LD32 (pSrc + 4)); + ST32 (pDst + 8 , LD32 (pSrc + 8)); + ST32 (pDst + 12 , LD32 (pSrc + 12)); + pDst += iStrideD ; + pSrc += iStrideS; + } +} +void WelsCopy16x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + int32_t i; + for (i = 0; i < 16; i++) { + ST32 (pDst, LD32 (pSrc)); + ST32 (pDst + 4 , LD32 (pSrc + 4)); + ST32 (pDst + 8 , LD32 (pSrc + 8)); + ST32 (pDst + 12 , LD32 (pSrc + 12)); + pDst += iStrideD ; + pSrc += iStrideS; + } +} + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/cpu.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/cpu.cpp new file mode 100644 index 000000000..fb5d3dae4 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/cpu.cpp @@ -0,0 +1,359 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file cpu.cpp + * + * \brief CPU compatibility detection + * + * \date 04/29/2009 Created + * + ************************************************************************************* + */ +#include +#include +#ifdef ANDROID_NDK +#include +#endif +#include "cpu.h" +#include "cpu_core.h" + + + +#define CPU_Vendor_AMD "AuthenticAMD" +#define CPU_Vendor_INTEL "GenuineIntel" +#define CPU_Vendor_CYRIX "CyrixInstead" + +#if defined(X86_ASM) + +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + uint32_t uiCPU = 0; + uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0; + int32_t CacheLineSize = 0; + int8_t chVendorName[16] = { 0 }; + uint32_t uiMaxCpuidLevel = 0; + + if (!WelsCPUIdVerify()) { + /* cpuid is not supported in cpu */ + return 0; + } + + WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVendorName[0], (uint32_t*)&chVendorName[8], (uint32_t*)&chVendorName[4]); + uiMaxCpuidLevel = uiFeatureA; + if (uiMaxCpuidLevel == 0) { + /* maximum input value for basic cpuid information */ + return 0; + } + + WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); + if ((uiFeatureD & 0x00800000) == 0) { + /* Basic MMX technology is not support in cpu, mean nothing for us so return here */ + return 0; + } + + uiCPU = WELS_CPU_MMX; + if (uiFeatureD & 0x02000000) { + /* SSE technology is identical to AMD MMX extensions */ + uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE; + } + if (uiFeatureD & 0x04000000) { + /* SSE2 support here */ + uiCPU |= WELS_CPU_SSE2; + } + if (uiFeatureD & 0x00000001) { + /* x87 FPU on-chip checking */ + uiCPU |= WELS_CPU_FPU; + } + if (uiFeatureD & 0x00008000) { + /* CMOV instruction checking */ + uiCPU |= WELS_CPU_CMOV; + } + if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) || + (!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))) { // confirmed_safe_unsafe_usage + if (uiFeatureD & 0x10000000) { + /* Multi-Threading checking: contains of multiple logic processors */ + uiCPU |= WELS_CPU_HTT; + } + } + + if (uiFeatureC & 0x00000001) { + /* SSE3 support here */ + uiCPU |= WELS_CPU_SSE3; + } + if (uiFeatureC & 0x00000200) { + /* SSSE3 support here */ + uiCPU |= WELS_CPU_SSSE3; + } + if (uiFeatureC & 0x00080000) { + /* SSE4.1 support here, 45nm Penryn processor */ + uiCPU |= WELS_CPU_SSE41; + } + if (uiFeatureC & 0x00100000) { + /* SSE4.2 support here, next generation Nehalem processor */ + uiCPU |= WELS_CPU_SSE42; + } + if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) { + /* AVX supported */ + uiCPU |= WELS_CPU_AVX; + } + if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) { + /* AVX FMA supported */ + uiCPU |= WELS_CPU_FMA; + } + if (uiFeatureC & 0x02000000) { + /* AES checking */ + uiCPU |= WELS_CPU_AES; + } + if (uiFeatureC & 0x00400000) { + /* MOVBE checking */ + uiCPU |= WELS_CPU_MOVBE; + } + + if (uiMaxCpuidLevel >= 7) { + uiFeatureC = 0; + WelsCPUId (7, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); + if ((uiCPU & WELS_CPU_AVX) && (uiFeatureB & 0x00000020)) { + /* AVX2 supported */ + uiCPU |= WELS_CPU_AVX2; + } + } + + if (pNumberOfLogicProcessors != NULL) { + if (uiCPU & WELS_CPU_HTT) { + *pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX + } else { + *pNumberOfLogicProcessors = 0; + } + if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) { + if (uiMaxCpuidLevel >= 4) { + uiFeatureC = 0; + WelsCPUId (0x4, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); + if (uiFeatureA != 0) { + *pNumberOfLogicProcessors = ((uiFeatureA & 0xfc000000) >> 26) + 1; + } + } + } + } + + WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); + + if ((!strcmp ((const char*)chVendorName, CPU_Vendor_AMD)) + && (uiFeatureA >= 0x80000001)) { // confirmed_safe_unsafe_usage + WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); + if (uiFeatureD & 0x00400000) { + uiCPU |= WELS_CPU_MMXEXT; + } + if (uiFeatureD & 0x80000000) { + uiCPU |= WELS_CPU_3DNOW; + } + } + + if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) { // confirmed_safe_unsafe_usage + int32_t family, model; + + WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); + family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff); + model = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0); + + if ((family == 6) && (model == 9 || model == 13 || model == 14)) { + uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3); + } + } + + // get cache line size + if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) + || ! (strcmp ((const char*)chVendorName, CPU_Vendor_CYRIX))) { // confirmed_safe_unsafe_usage + WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); + + CacheLineSize = (uiFeatureB & 0xff00) >> + 5; // ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte + + if (CacheLineSize == 128) { + uiCPU |= WELS_CPU_CACHELINE_128; + } else if (CacheLineSize == 64) { + uiCPU |= WELS_CPU_CACHELINE_64; + } else if (CacheLineSize == 32) { + uiCPU |= WELS_CPU_CACHELINE_32; + } else if (CacheLineSize == 16) { + uiCPU |= WELS_CPU_CACHELINE_16; + } + } + + return uiCPU; +} + + +void WelsCPURestore (const uint32_t kuiCPU) { + if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) { + WelsEmms(); + } +} + +#elif defined(HAVE_NEON) //For supporting both android platform and iOS platform +#if defined(ANDROID_NDK) +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + uint32_t uiCPU = 0; + AndroidCpuFamily cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; + uint64_t uiFeatures = 0; + cpuFamily = android_getCpuFamily(); + if (cpuFamily == ANDROID_CPU_FAMILY_ARM) { + uiFeatures = android_getCpuFeatures(); + if (uiFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) { + uiCPU |= WELS_CPU_ARMv7; + } + if (uiFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) { + uiCPU |= WELS_CPU_VFPv3; + } + if (uiFeatures & ANDROID_CPU_ARM_FEATURE_NEON) { + uiCPU |= WELS_CPU_NEON; + } + } + + if (pNumberOfLogicProcessors != NULL) { + *pNumberOfLogicProcessors = android_getCpuCount(); + } + + return uiCPU; +} + +#elif defined(__APPLE__) +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + uint32_t uiCPU = 0; + +#if defined(__ARM_NEON__) + uiCPU |= WELS_CPU_ARMv7; + uiCPU |= WELS_CPU_VFPv3; + uiCPU |= WELS_CPU_NEON; +#endif + return uiCPU; +} +#elif defined(__linux__) + +/* Generic arm/linux cpu feature detection */ +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + int flags = 0; + FILE* f = fopen ("/proc/cpuinfo", "r"); + +#if defined(__chromeos__) + flags |= WELS_CPU_NEON; +#endif + + if (!f) { + return flags; + } + + char buf[200]; + while (fgets (buf, sizeof (buf), f)) { + if (!strncmp (buf, "Features", strlen ("Features"))) { + // The asimd and fp features are listed on 64 bit ARMv8 kernels + if (strstr (buf, " neon ") || strstr (buf, " asimd ")) + flags |= WELS_CPU_NEON; + if (strstr (buf, " vfpv3 ") || strstr (buf, " fp ")) + flags |= WELS_CPU_VFPv3; + break; + } + } + fclose (f); + return flags; +} + +#else /* HAVE_NEON enabled but no runtime detection */ + +/* No runtime feature detection available, but built with HAVE_NEON - assume + * that NEON and all associated features are available. */ + +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + return WELS_CPU_ARMv7 | + WELS_CPU_VFPv3 | + WELS_CPU_NEON; +} +#endif +#elif defined(HAVE_NEON_AARCH64) + +/* For AArch64, no runtime detection actually is necessary for now, since + * NEON and VFPv3 is mandatory on all such CPUs. (/proc/cpuinfo doesn't + * contain neon, and the android cpufeatures library doesn't return it + * either.) */ + +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + return WELS_CPU_VFPv3 | + WELS_CPU_NEON; +} + +#elif defined(mips) +/* for loongson */ +static uint32_t get_cpu_flags_from_cpuinfo(void) +{ + uint32_t flags = 0; + +# ifdef __linux__ + FILE* fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + return flags; + + char buf[200]; + memset(buf, 0, sizeof(buf)); + while (fgets(buf, sizeof(buf), fp)) { + if (!strncmp(buf, "model name", strlen("model name"))) { + if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") || + strstr(buf, "Loongson-2K")) { + flags |= WELS_CPU_MMI; + } + break; + } + } + while (fgets(buf, sizeof(buf), fp)) { + if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) { + if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) { + flags |= WELS_CPU_MMI; + } + if (strstr(buf, "msa")) { + flags |= WELS_CPU_MSA; + } + break; + } + } + fclose(fp); +# endif + + return flags; +} + +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + return get_cpu_flags_from_cpuinfo(); +} + +#else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */ + +uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { + return 0; +} + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/crt_util_safe_x.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/crt_util_safe_x.cpp new file mode 100644 index 000000000..9d54ea8e6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/crt_util_safe_x.cpp @@ -0,0 +1,266 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file crt_utils_safe_x.cpp + * + * \brief common tool/function utilization + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ + +#include +#include +#include +#include +#if defined(_WIN32) +#include +#include +#include +#ifndef _MSC_VER +#include +#endif //!_MSC_VER +#else +#include +#endif //_WIN32 + +#include "macros.h" +#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms + +#if defined(_WIN32) && defined(_MSC_VER) + +#if defined(_MSC_VER) && (_MSC_VER>=1500) + +int32_t WelsSnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, ...) { + va_list pArgPtr; + int32_t iRc; + + va_start (pArgPtr, kpFormat); + + iRc = vsnprintf_s (pBuffer, iSizeOfBuffer, _TRUNCATE, kpFormat, pArgPtr); + if (iRc < 0) + iRc = iSizeOfBuffer; + + va_end (pArgPtr); + + return iRc; +} + +char* WelsStrncpy (char* pDest, int32_t iSizeInBytes, const char* kpSrc) { + strncpy_s (pDest, iSizeInBytes, kpSrc, _TRUNCATE); + + return pDest; +} + +int32_t WelsVsnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, va_list pArgPtr) { + int32_t iRc = vsnprintf_s (pBuffer, iSizeOfBuffer, _TRUNCATE, kpFormat, pArgPtr); + if (iRc < 0) + iRc = iSizeOfBuffer; + return iRc; +} + +WelsFileHandle* WelsFopen (const char* kpFilename, const char* kpMode) { + WelsFileHandle* pFp = NULL; + if (fopen_s (&pFp, kpFilename, kpMode) != 0) { + return NULL; + } + + return pFp; +} + +int32_t WelsFclose (WelsFileHandle* pFp) { + return fclose (pFp); +} + +int32_t WelsGetTimeOfDay (SWelsTime* pTp) { + return _ftime_s (pTp); +} + +int32_t WelsStrftime (char* pBuffer, int32_t iSize, const char* kpFormat, const SWelsTime* kpTp) { + struct tm sTimeNow; + int32_t iRc; + + localtime_s (&sTimeNow, &kpTp->time); + + iRc = (int32_t)strftime (pBuffer, iSize, kpFormat, &sTimeNow); + if (iRc == 0) + pBuffer[0] = '\0'; + return iRc; +} + +#else + +int32_t WelsSnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, ...) { + va_list pArgPtr; + int32_t iRc; + + va_start (pArgPtr, kpFormat); + + iRc = vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr); //confirmed_safe_unsafe_usage + if (iRc < 0) { + pBuffer[iSizeOfBuffer - 1] = '\0'; + iRc = iSizeOfBuffer; + } + + va_end (pArgPtr); + + return iRc; +} + +char* WelsStrncpy (char* pDest, int32_t iSizeInBytes, const char* kpSrc) { + strncpy (pDest, kpSrc, iSizeInBytes); //confirmed_safe_unsafe_usage + pDest[iSizeInBytes - 1] = '\0'; + + return pDest; +} + +int32_t WelsVsnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, va_list pArgPtr) { + int32_t iRc = vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr); //confirmed_safe_unsafe_usage + if (iRc < 0) { + pBuffer[iSizeOfBuffer - 1] = '\0'; + iRc = iSizeOfBuffer; + } + return iRc; +} + + +WelsFileHandle* WelsFopen (const char* kpFilename, const char* kpMode) { + return fopen (kpFilename, kpMode); +} + +int32_t WelsFclose (WelsFileHandle* pFp) { + return fclose (pFp); +} + +int32_t WelsGetTimeOfDay (SWelsTime* pTp) { + _ftime (pTp); + return 0; +} + +int32_t WelsStrftime (char* pBuffer, int32_t iSize, const char* kpFormat, const SWelsTime* kpTp) { + struct tm* pTnow; + int32_t iRc; + + pTnow = localtime (&kpTp->time); + + iRc = strftime (pBuffer, iSize, kpFormat, pTnow); + if (iRc == 0) + pBuffer[0] = '\0'; + return iRc; +} + + +#endif // _MSC_VER + +#else //GCC + +int32_t WelsSnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, ...) { + va_list pArgPtr; + int32_t iRc; + + va_start (pArgPtr, kpFormat); + + iRc = vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr); + + va_end (pArgPtr); + + return iRc; +} + +char* WelsStrncpy (char* pDest, int32_t iSizeInBytes, const char* kpSrc) { + strncpy (pDest, kpSrc, iSizeInBytes); //confirmed_safe_unsafe_usage + pDest[iSizeInBytes - 1] = '\0'; + return pDest; +} + +int32_t WelsVsnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, va_list pArgPtr) { + return vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr); //confirmed_safe_unsafe_usage +} + +WelsFileHandle* WelsFopen (const char* kpFilename, const char* kpMode) { + return fopen (kpFilename, kpMode); +} + +int32_t WelsFclose (WelsFileHandle* pFp) { + return fclose (pFp); +} + +int32_t WelsGetTimeOfDay (SWelsTime* pTp) { + struct timeval sTv; + + if (gettimeofday (&sTv, NULL)) { + return -1; + } + + pTp->time = sTv.tv_sec; + pTp->millitm = (uint16_t)sTv.tv_usec / 1000; + + return 0; +} + +int32_t WelsStrftime (char* pBuffer, int32_t iSize, const char* kpFormat, const SWelsTime* kpTp) { + struct tm* pTnow; + int32_t iRc; + + pTnow = localtime (&kpTp->time); + + iRc = (int32_t) strftime (pBuffer, iSize, kpFormat, pTnow); + if (iRc == 0) + pBuffer[0] = '\0'; + return iRc; +} + +#endif + + +char* WelsStrcat (char* pDest, uint32_t uiSizeInBytes, const char* kpSrc) { + uint32_t uiCurLen = (uint32_t) strlen (pDest); + if (uiSizeInBytes > uiCurLen) + return WelsStrncpy (pDest + uiCurLen, uiSizeInBytes - uiCurLen, kpSrc); + return pDest; +} + +int32_t WelsFwrite (const void* kpBuffer, int32_t iSize, int32_t iCount, WelsFileHandle* pFp) { + return (int32_t)fwrite (kpBuffer, iSize, iCount, pFp); +} + +uint16_t WelsGetMillisecond (const SWelsTime* kpTp) { + return kpTp->millitm; +} + +int32_t WelsFseek (WelsFileHandle* fp, int32_t offset, int32_t origin) { + return fseek (fp, offset, origin); +} + +int32_t WelsFflush (WelsFileHandle* pFp) { + return fflush (pFp); +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/deblocking_common.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/deblocking_common.cpp new file mode 100644 index 000000000..096859b3a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/deblocking_common.cpp @@ -0,0 +1,295 @@ +#include "deblocking_common.h" +#include "macros.h" + +// C code only +void DeblockLumaLt4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta, + int8_t* pTc) { + for (int32_t i = 0; i < 16; i++) { + int32_t iTc0 = pTc[i >> 2]; + if (iTc0 >= 0) { + int32_t p0 = pPix[-iStrideX]; + int32_t p1 = pPix[-2 * iStrideX]; + int32_t p2 = pPix[-3 * iStrideX]; + int32_t q0 = pPix[0]; + int32_t q1 = pPix[iStrideX]; + int32_t q2 = pPix[2 * iStrideX]; + bool bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; + bool bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bool bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + int32_t iTc = iTc0; + if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { + bool bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta; + bool bDetaQ2Q0 = WELS_ABS (q2 - q0) < iBeta; + if (bDetaP2P0) { + pPix[-2 * iStrideX] = p1 + WELS_CLIP3 ((p2 + ((p0 + q0 + 1) >> 1) - (p1 * (1 << 1))) >> 1, -iTc0, iTc0); + iTc++; + } + if (bDetaQ2Q0) { + pPix[iStrideX] = q1 + WELS_CLIP3 ((q2 + ((p0 + q0 + 1) >> 1) - (q1 * (1 << 1))) >> 1, -iTc0, iTc0); + iTc++; + } + int32_t iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc, iTc); + pPix[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ + pPix[0] = WelsClip1 (q0 - iDeta); /* q0' */ + } + } + pPix += iStrideY; + } +} +void DeblockLumaEq4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta) { + int32_t p0, p1, p2, q0, q1, q2; + int32_t iDetaP0Q0; + bool bDetaP1P0, bDetaQ1Q0; + for (int32_t i = 0; i < 16; i++) { + p0 = pPix[-iStrideX]; + p1 = pPix[-2 * iStrideX]; + p2 = pPix[-3 * iStrideX]; + q0 = pPix[0]; + q1 = pPix[iStrideX]; + q2 = pPix[2 * iStrideX]; + iDetaP0Q0 = WELS_ABS (p0 - q0); + bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + if ((iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0) { + if (iDetaP0Q0 < ((iAlpha >> 2) + 2)) { + bool bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta; + bool bDetaQ2Q0 = WELS_ABS (q2 - q0) < iBeta; + if (bDetaP2P0) { + const int32_t p3 = pPix[-4 * iStrideX]; + pPix[-iStrideX] = (p2 + (p1 * (1 << 1)) + (p0 * (1 << 1)) + (q0 * (1 << 1)) + q1 + 4) >> 3; //p0 + pPix[-2 * iStrideX] = (p2 + p1 + p0 + q0 + 2) >> 2; //p1 + pPix[-3 * iStrideX] = ((p3 * (1 << 1)) + p2 + (p2 * (1 << 1)) + p1 + p0 + q0 + 4) >> 3; //p2 + } else { + pPix[-1 * iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; //p0 + } + if (bDetaQ2Q0) { + const int32_t q3 = pPix[3 * iStrideX]; + pPix[0] = (p1 + (p0 * (1 << 1)) + (q0 * (1 << 1)) + (q1 * (1 << 1)) + q2 + 4) >> 3; //q0 + pPix[iStrideX] = (p0 + q0 + q1 + q2 + 2) >> 2; //q1 + pPix[2 * iStrideX] = ((q3 * (1 << 1)) + q2 + (q2 * (1 << 1)) + q1 + q0 + p0 + 4) >> 3; //q2 + } else { + pPix[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; //q0 + } + } else { + pPix[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; //p0 + pPix[ 0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; //q0 + } + } + pPix += iStrideY; + } +} +void DeblockLumaLt4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) { + DeblockLumaLt4_c (pPix, iStride, 1, iAlpha, iBeta, tc); +} +void DeblockLumaLt4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) { + DeblockLumaLt4_c (pPix, 1, iStride, iAlpha, iBeta, tc); +} +void DeblockLumaEq4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + DeblockLumaEq4_c (pPix, iStride, 1, iAlpha, iBeta); +} +void DeblockLumaEq4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + DeblockLumaEq4_c (pPix, 1, iStride, iAlpha, iBeta); +} +void DeblockChromaLt4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, + int32_t iBeta, int8_t* pTc) { + int32_t p0, p1, q0, q1, iDeta; + bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + + for (int32_t i = 0; i < 8; i++) { + int32_t iTc0 = pTc[i >> 1]; + if (iTc0 > 0) { + p0 = pPixCb[-iStrideX]; + p1 = pPixCb[-2 * iStrideX]; + q0 = pPixCb[0]; + q1 = pPixCb[iStrideX]; + + bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; + bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { + iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0); + pPixCb[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ + pPixCb[0] = WelsClip1 (q0 - iDeta); /* q0' */ + } + + + p0 = pPixCr[-iStrideX]; + p1 = pPixCr[-2 * iStrideX]; + q0 = pPixCr[0]; + q1 = pPixCr[iStrideX]; + + bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; + bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + + if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { + iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0); + pPixCr[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ + pPixCr[0] = WelsClip1 (q0 - iDeta); /* q0' */ + } + } + pPixCb += iStrideY; + pPixCr += iStrideY; + } +} +void DeblockChromaEq4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, + int32_t iBeta) { + int32_t p0, p1, q0, q1; + bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + for (int32_t i = 0; i < 8; i++) { + //cb + p0 = pPixCb[-iStrideX]; + p1 = pPixCb[-2 * iStrideX]; + q0 = pPixCb[0]; + q1 = pPixCb[iStrideX]; + bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; + bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { + pPixCb[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */ + pPixCb[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */ + } + + //cr + p0 = pPixCr[-iStrideX]; + p1 = pPixCr[-2 * iStrideX]; + q0 = pPixCr[0]; + q1 = pPixCr[iStrideX]; + bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; + bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { + pPixCr[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */ + pPixCr[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */ + } + pPixCr += iStrideY; + pPixCb += iStrideY; + } +} +void DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* tc) { + DeblockChromaLt4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta, tc); +} +void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* tc) { + DeblockChromaLt4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta, tc); +} +void DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + DeblockChromaEq4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta); +} +void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta); +} + +void DeblockChromaLt42_c (uint8_t* pPixCbCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, + int32_t iBeta, int8_t* pTc) { + int32_t p0, p1, q0, q1, iDeta; + bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + + for (int32_t i = 0; i < 8; i++) { + int32_t iTc0 = pTc[i >> 1]; + if (iTc0 > 0) { + p0 = pPixCbCr[-iStrideX]; + p1 = pPixCbCr[-2 * iStrideX]; + q0 = pPixCbCr[0]; + q1 = pPixCbCr[iStrideX]; + + bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; + bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { + iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0); + pPixCbCr[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ + pPixCbCr[0] = WelsClip1 (q0 - iDeta); /* q0' */ + } + + + } + pPixCbCr += iStrideY; + } +} +void DeblockChromaEq42_c (uint8_t* pPixCbCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, + int32_t iBeta) { + int32_t p0, p1, q0, q1; + bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + for (int32_t i = 0; i < 8; i++) { + p0 = pPixCbCr[-iStrideX]; + p1 = pPixCbCr[-2 * iStrideX]; + q0 = pPixCbCr[0]; + q1 = pPixCbCr[iStrideX]; + bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; + bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; + bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; + if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { + pPixCbCr[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */ + pPixCbCr[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */ + } + + pPixCbCr += iStrideY; + } +} + +void DeblockChromaLt4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* tc) { + DeblockChromaLt42_c (pPixCbCr, iStride, 1, iAlpha, iBeta, tc); +} +void DeblockChromaLt4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* tc) { + + DeblockChromaLt42_c (pPixCbCr, 1, iStride, iAlpha, iBeta, tc); +} +void DeblockChromaEq4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + DeblockChromaEq42_c (pPixCbCr, iStride, 1, iAlpha, iBeta); +} +void DeblockChromaEq4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + DeblockChromaEq42_c (pPixCbCr, 1, iStride, iAlpha, iBeta); +} + +void WelsNonZeroCount_c (int8_t* pNonZeroCount) { + int32_t i; + for (i = 0; i < 24; i++) { + pNonZeroCount[i] = !!pNonZeroCount[i]; + } +} + +#ifdef X86_ASM +extern "C" { + void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) { + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); + + DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]); + DeblockLumaLt4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc); + DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]); + } + + void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); + + DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]); + DeblockLumaEq4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta); + DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]); + } + +} + +#endif + +#ifdef HAVE_MMI +extern "C" { + void DeblockLumaLt4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) { + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); + + DeblockLumaTransposeH2V_mmi (pPixY - 4, iStride, &uiBuf[0]); + DeblockLumaLt4V_mmi (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc); + DeblockLumaTransposeV2H_mmi (pPixY - 4, iStride, &uiBuf[0]); + } + + void DeblockLumaEq4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) { + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); + + DeblockLumaTransposeH2V_mmi (pPixY - 4, iStride, &uiBuf[0]); + DeblockLumaEq4V_mmi (&uiBuf[4 * 16], 16, iAlpha, iBeta); + DeblockLumaTransposeV2H_mmi (pPixY - 4, iStride, &uiBuf[0]); + } +} +#endif//HAVE_MMI diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/expand_pic.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/expand_pic.cpp new file mode 100644 index 000000000..fcd96540e --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/expand_pic.cpp @@ -0,0 +1,415 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +#include +#include "expand_pic.h" +#include "cpu_core.h" + +static inline void MBPadTopLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride) { + const uint8_t kuiTL = pDst[0]; + int32_t i = 0; + uint8_t* pTopLeft = pDst; + do { + pTopLeft -= kiStride; + // pad pTop + memcpy (pTopLeft, pDst, 16); // confirmed_safe_unsafe_usage + memset (pTopLeft - PADDING_LENGTH, kuiTL, PADDING_LENGTH); //pTop left + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadTopLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) { + uint8_t* pTopLine = pDst + (kiMbX << 4); + int32_t i = 0; + uint8_t* pTop = pTopLine; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop, pTopLine, 16); // confirmed_safe_unsafe_usage + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadBottomLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX, + const int32_t& kiPicH) { + uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 4); + int32_t i = 0; + uint8_t* pBottom = pBottomLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pBottomLine, 16); // confirmed_safe_unsafe_usage + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadTopRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) { + uint8_t* pTopRight = pDst + kiPicW; + const uint8_t kuiTR = pTopRight[-1]; + int32_t i = 0; + uint8_t* pTop = pTopRight; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop - 16, pTopRight - 16, 16); // confirmed_safe_unsafe_usage + memset (pTop, kuiTR, PADDING_LENGTH); //pTop Right + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadBottomLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride; + const uint8_t kuiBL = pDstLastLine[0]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pDstLastLine, 16); // confirmed_safe_unsafe_usage + memset (pBottom - PADDING_LENGTH, kuiBL, PADDING_LENGTH); //pBottom left + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadBottomRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, + const int32_t& kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW; + const uint8_t kuiBR = pDstLastLine[-1]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom - 16, pDstLastLine - 16, 16); // confirmed_safe_unsafe_usage + memset (pBottom, kuiBR, PADDING_LENGTH); //pBottom Right + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) { + uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride; + for (int32_t i = 0; i < 16; ++i) { + // pad left + memset (pTmp - PADDING_LENGTH, pTmp[0], PADDING_LENGTH); + pTmp += kiStride; + } +} + +static inline void MBPadRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY, + const int32_t& kiPicW) { + uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride + kiPicW; + for (int32_t i = 0; i < 16; ++i) { + // pad right + memset (pTmp, pTmp[-1], PADDING_LENGTH); + pTmp += kiStride; + } +} + +static inline void MBPadTopChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) { + uint8_t* pTopLine = pDst + (kiMbX << 3); + int32_t i = 0; + uint8_t* pTop = pTopLine; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop, pTopLine, 8); // confirmed_safe_unsafe_usage + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadBottomChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX, + const int32_t& kiPicH) { + uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 3); + int32_t i = 0; + uint8_t* pBottom = pBottomLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pBottomLine, 8); // confirmed_safe_unsafe_usage + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadTopLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride) { + const uint8_t kuiTL = pDst[0]; + int32_t i = 0; + uint8_t* pTopLeft = pDst; + do { + pTopLeft -= kiStride; + // pad pTop + memcpy (pTopLeft, pDst, 8); // confirmed_safe_unsafe_usage + memset (pTopLeft - CHROMA_PADDING_LENGTH, kuiTL, CHROMA_PADDING_LENGTH); //pTop left + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadTopRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) { + uint8_t* pTopRight = pDst + kiPicW; + const uint8_t kuiTR = pTopRight[-1]; + int32_t i = 0; + uint8_t* pTop = pTopRight; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop - 8, pTopRight - 8, 8); // confirmed_safe_unsafe_usage + memset (pTop, kuiTR, CHROMA_PADDING_LENGTH); //pTop Right + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadBottomLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride; + const uint8_t kuiBL = pDstLastLine[0]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pDstLastLine, 8); // confirmed_safe_unsafe_usage + memset (pBottom - CHROMA_PADDING_LENGTH, kuiBL, CHROMA_PADDING_LENGTH); //pBottom left + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadBottomRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, + const int32_t kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW; + const uint8_t kuiBR = pDstLastLine[-1]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom - 8, pDstLastLine - 8, 8); // confirmed_safe_unsafe_usage + memset (pBottom, kuiBR, CHROMA_PADDING_LENGTH); //pBottom Right + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) { + uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride; + for (int32_t i = 0; i < 8; ++i) { + // pad left + memset (pTmp - CHROMA_PADDING_LENGTH, pTmp[0], CHROMA_PADDING_LENGTH); + pTmp += kiStride; + } +} + +static inline void MBPadRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY, + const int32_t& kiPicW) { + uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride + kiPicW; + for (int32_t i = 0; i < 8; ++i) { + // pad right + memset (pTmp, pTmp[-1], CHROMA_PADDING_LENGTH); + pTmp += kiStride; + } +} + +void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) { + if (kiMbX == 0 && kiMbY == 0) { + MBPadTopLeftLuma_c (pDst, kiStride); + } else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) { + MBPadTopRightLuma_c (pDst, kiStride, kiPicW); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) { + MBPadBottomLeftLuma_c (pDst, kiStride, kiPicH); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) { + MBPadBottomRightLuma_c (pDst, kiStride, kiPicW, kiPicH); + } + if (kiMbX == 0) { + MBPadLeftLuma_c (pDst, kiStride, kiMbY); + } else if (kiMbX == kiMBWidth - 1) { + MBPadRightLuma_c (pDst, kiStride, kiMbY, kiPicW); + } + if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadTopLuma_c (pDst, kiStride, kiMbX); + } else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadBottomLuma_c (pDst, kiStride, kiMbX, kiPicH); + } +} + +void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) { + if (kiMbX == 0 && kiMbY == 0) { + MBPadTopLeftChroma_c (pDst, kiStride); + } else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) { + MBPadTopRightChroma_c (pDst, kiStride, kiPicW); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) { + MBPadBottomLeftChroma_c (pDst, kiStride, kiPicH); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) { + MBPadBottomRightChroma_c (pDst, kiStride, kiPicW, kiPicH); + } + if (kiMbX == 0) { + MBPadLeftChroma_c (pDst, kiStride, kiMbY); + } else if (kiMbX == kiMBWidth - 1) { + MBPadRightChroma_c (pDst, kiStride, kiMbY, kiPicW); + } + if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadTopChroma_c (pDst, kiStride, kiMbX); + } else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadBottomChroma_c (pDst, kiStride, kiMbX, kiPicH); + } +} + +// rewrite it (split into luma & chroma) that is helpful for mmx/sse2 optimization perform, 9/27/2009 +static inline void ExpandPictureLuma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, + const int32_t kiPicH) { + uint8_t* pTmp = pDst; + uint8_t* pDstLastLine = pTmp + (kiPicH - 1) * kiStride; + const int32_t kiPaddingLen = PADDING_LENGTH; + const uint8_t kuiTL = pTmp[0]; + const uint8_t kuiTR = pTmp[kiPicW - 1]; + const uint8_t kuiBL = pDstLastLine[0]; + const uint8_t kuiBR = pDstLastLine[kiPicW - 1]; + int32_t i = 0; + + do { + const int32_t kiStrides = (1 + i) * kiStride; + uint8_t* pTop = pTmp - kiStrides; + uint8_t* pBottom = pDstLastLine + kiStrides; + + // pad pTop and pBottom + memcpy (pTop, pTmp, kiPicW); // confirmed_safe_unsafe_usage + memcpy (pBottom, pDstLastLine, kiPicW); // confirmed_safe_unsafe_usage + + // pad corners + memset (pTop - kiPaddingLen, kuiTL, kiPaddingLen); //pTop left + memset (pTop + kiPicW, kuiTR, kiPaddingLen); //pTop right + memset (pBottom - kiPaddingLen, kuiBL, kiPaddingLen); //pBottom left + memset (pBottom + kiPicW, kuiBR, kiPaddingLen); //pBottom right + + ++ i; + } while (i < kiPaddingLen); + + // pad left and right + i = 0; + do { + memset (pTmp - kiPaddingLen, pTmp[0], kiPaddingLen); + memset (pTmp + kiPicW, pTmp[kiPicW - 1], kiPaddingLen); + + pTmp += kiStride; + ++ i; + } while (i < kiPicH); +} + +static inline void ExpandPictureChroma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, + const int32_t kiPicH) { + uint8_t* pTmp = pDst; + uint8_t* pDstLastLine = pTmp + (kiPicH - 1) * kiStride; + const int32_t kiPaddingLen = (PADDING_LENGTH >> 1); + const uint8_t kuiTL = pTmp[0]; + const uint8_t kuiTR = pTmp[kiPicW - 1]; + const uint8_t kuiBL = pDstLastLine[0]; + const uint8_t kuiBR = pDstLastLine[kiPicW - 1]; + int32_t i = 0; + + do { + const int32_t kiStrides = (1 + i) * kiStride; + uint8_t* pTop = pTmp - kiStrides; + uint8_t* pBottom = pDstLastLine + kiStrides; + + // pad pTop and pBottom + memcpy (pTop, pTmp, kiPicW); // confirmed_safe_unsafe_usage + memcpy (pBottom, pDstLastLine, kiPicW); // confirmed_safe_unsafe_usage + + // pad corners + memset (pTop - kiPaddingLen, kuiTL, kiPaddingLen); //pTop left + memset (pTop + kiPicW, kuiTR, kiPaddingLen); //pTop right + memset (pBottom - kiPaddingLen, kuiBL, kiPaddingLen); //pBottom left + memset (pBottom + kiPicW, kuiBR, kiPaddingLen); //pBottom right + + ++ i; + } while (i < kiPaddingLen); + + // pad left and right + i = 0; + do { + memset (pTmp - kiPaddingLen, pTmp[0], kiPaddingLen); + memset (pTmp + kiPicW, pTmp[kiPicW - 1], kiPaddingLen); + + pTmp += kiStride; + ++ i; + } while (i < kiPicH); +} + +void InitExpandPictureFunc (SExpandPicFunc* pExpandPicFunc, const uint32_t kuiCPUFlag) { + pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_c; + pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChroma_c; + pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChroma_c; + +#if defined(X86_ASM) + if ((kuiCPUFlag & WELS_CPU_SSE2) == WELS_CPU_SSE2) { + pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_sse2; + pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChromaUnalign_sse2; + pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChromaAlign_sse2; + } +#endif//X86_ASM +#if defined(HAVE_NEON) + if (kuiCPUFlag & WELS_CPU_NEON) { + pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_neon; + pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChroma_neon; + pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChroma_neon; + } +#endif//HAVE_NEON +#if defined(HAVE_NEON_AARCH64) + if (kuiCPUFlag & WELS_CPU_NEON) { + pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_AArch64_neon; + pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChroma_AArch64_neon; + pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChroma_AArch64_neon; + } +#endif//HAVE_NEON_AARCH64 +#if defined(HAVE_MMI) + if (kuiCPUFlag & WELS_CPU_MMI) { + pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_mmi; + pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChromaUnalign_mmi; + pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChromaAlign_mmi; + } +#endif//HAVE_MMI +} + + +//void ExpandReferencingPicture (SPicture* pPic, PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]) { +void ExpandReferencingPicture (uint8_t* pData[3], int32_t iWidth, int32_t iHeight, int32_t iStride[3], + PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]) { + /*local variable*/ + uint8_t* pPicY = pData[0]; + uint8_t* pPicCb = pData[1]; + uint8_t* pPicCr = pData[2]; + const int32_t kiWidthY = iWidth; + const int32_t kiHeightY = iHeight; + const int32_t kiWidthUV = kiWidthY >> 1; + const int32_t kiHeightUV = kiHeightY >> 1; + + + + pExpLuma (pPicY, iStride[0], kiWidthY, kiHeightY); + if (kiWidthUV >= 16) { + // fix coding picture size as 16x16 + const bool kbChrAligned = /*(iWidthUV >= 16) && */ ((kiWidthUV & 0x0F) == 0); // chroma planes: (16+iWidthUV) & 15 + pExpChrom[kbChrAligned] (pPicCb, iStride[1], kiWidthUV, kiHeightUV); + pExpChrom[kbChrAligned] (pPicCr, iStride[2], kiWidthUV, kiHeightUV); + } else { + // fix coding picture size as 16x16 + ExpandPictureChroma_c (pPicCb, iStride[1], kiWidthUV, kiHeightUV); + ExpandPictureChroma_c (pPicCr, iStride[2], kiWidthUV, kiHeightUV); + } + + + +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/intra_pred_common.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/intra_pred_common.cpp new file mode 100644 index 000000000..f35ceccbc --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/intra_pred_common.cpp @@ -0,0 +1,77 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file get_intra_predictor.c + * + * \brief implementation for get intra predictor about 16x16, 4x4, chroma. + * + * \date 4/2/2009 Created + * 9/14/2009 C level based optimization with high performance gained. + * [const, using ST32/ST64 to replace memset, memcpy and memmove etc.] + * + ************************************************************************************* + */ +#include "ls_defines.h" +#include "cpu_core.h" +#include "intra_pred_common.h" + + +void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + uint8_t i = 15; + const int8_t* kpSrc = (int8_t*)&pRef[-kiStride]; + const uint64_t kuiT1 = LD64 (kpSrc); + const uint64_t kuiT2 = LD64 (kpSrc + 8); + uint8_t* pDst = pPred; + + do { + ST64 (pDst , kuiT1); + ST64 (pDst + 8, kuiT2); + pDst += 16; + } while (i-- > 0); +} + +void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + int32_t iStridex15 = (kiStride << 4) - kiStride; + int32_t iPredStride = 16; + int32_t iPredStridex15 = 240; //(iPredStride<<4)-iPredStride; + uint8_t i = 15; + + do { + const uint8_t kuiSrc8 = pRef[iStridex15 - 1]; + const uint64_t kuiV64 = (uint64_t) (0x0101010101010101ULL * kuiSrc8); + ST64 (&pPred[iPredStridex15], kuiV64); + ST64 (&pPred[iPredStridex15 + 8], kuiV64); + + iStridex15 -= kiStride; + iPredStridex15 -= iPredStride; + } while (i-- > 0); +} + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/mc.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/mc.cpp new file mode 100644 index 000000000..455fe5cfe --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/mc.cpp @@ -0,0 +1,4266 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file mc.c + * + * \brief Interfaces implementation for motion compensation + * + * \date 03/17/2009 Created + * + ************************************************************************************* + */ + +#include "mc.h" + +#include "cpu_core.h" +#include "ls_defines.h" +#include "macros.h" +#include "asmdefs_mmi.h" + +namespace { + +typedef void (*PMcChromaWidthExtFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + const uint8_t* kpABCD, int32_t iHeight); +typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, + int32_t, int32_t); +typedef void (*PWelsMcWidthHeightFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight); + +/*------------------weight for chroma fraction pixel interpolation------------------*/ +//iA = (8 - dx) * (8 - dy); +//iB = dx * (8 - dy); +//iC = (8 - dx) * dy; +//iD = dx * dy +static const uint8_t g_kuiABCD[8][8][4] = { //g_kA[dy][dx], g_kB[dy][dx], g_kC[dy][dx], g_kD[dy][dx] + { + {64, 0, 0, 0}, {56, 8, 0, 0}, {48, 16, 0, 0}, {40, 24, 0, 0}, + {32, 32, 0, 0}, {24, 40, 0, 0}, {16, 48, 0, 0}, {8, 56, 0, 0} + }, + { + {56, 0, 8, 0}, {49, 7, 7, 1}, {42, 14, 6, 2}, {35, 21, 5, 3}, + {28, 28, 4, 4}, {21, 35, 3, 5}, {14, 42, 2, 6}, {7, 49, 1, 7} + }, + { + {48, 0, 16, 0}, {42, 6, 14, 2}, {36, 12, 12, 4}, {30, 18, 10, 6}, + {24, 24, 8, 8}, {18, 30, 6, 10}, {12, 36, 4, 12}, {6, 42, 2, 14} + }, + { + {40, 0, 24, 0}, {35, 5, 21, 3}, {30, 10, 18, 6}, {25, 15, 15, 9}, + {20, 20, 12, 12}, {15, 25, 9, 15}, {10, 30, 6, 18}, {5, 35, 3, 21} + }, + { + {32, 0, 32, 0}, {28, 4, 28, 4}, {24, 8, 24, 8}, {20, 12, 20, 12}, + {16, 16, 16, 16}, {12, 20, 12, 20}, {8, 24, 8, 24}, {4, 28, 4, 28} + }, + { + {24, 0, 40, 0}, {21, 3, 35, 5}, {18, 6, 30, 10}, {15, 9, 25, 15}, + {12, 12, 20, 20}, {9, 15, 15, 25}, {6, 18, 10, 30}, {3, 21, 5, 35} + }, + { + {16, 0, 48, 0}, {14, 2, 42, 6}, {12, 4, 36, 12}, {10, 6, 30, 18}, + {8, 8, 24, 24}, {6, 10, 18, 30}, {4, 12, 12, 36}, {2, 14, 6, 42} + }, + { + {8, 0, 56, 0}, {7, 1, 49, 7}, {6, 2, 42, 14}, {5, 3, 35, 21}, + {4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49} + } +}; + +//***************************************************************************// +// C code implementation // +//***************************************************************************// +static inline void McCopyWidthEq2_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight) { + int32_t i; + for (i = 0; i < iHeight; i++) { // iWidth == 2 only for chroma + ST16A2 (pDst, LD16 (pSrc)); + pDst += iDstStride; + pSrc += iSrcStride; + } +} + +static inline void McCopyWidthEq4_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight) { + int32_t i; + for (i = 0; i < iHeight; i++) { + ST32A4 (pDst, LD32 (pSrc)); + pDst += iDstStride; + pSrc += iSrcStride; + } +} + +static inline void McCopyWidthEq8_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight) { + int32_t i; + for (i = 0; i < iHeight; i++) { + ST64A8 (pDst, LD64 (pSrc)); + pDst += iDstStride; + pSrc += iSrcStride; + } +} + +static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight) { + int32_t i; + for (i = 0; i < iHeight; i++) { + ST64A8 (pDst , LD64 (pSrc)); + ST64A8 (pDst + 8, LD64 (pSrc + 8)); + pDst += iDstStride; + pSrc += iSrcStride; + } +} + +//--------------------Luma sample MC------------------// + +static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) { + int32_t iPix05 = pSrc[0] + pSrc[5]; + int32_t iPix14 = pSrc[1] + pSrc[4]; + int32_t iPix23 = pSrc[2] + pSrc[3]; + + return (iPix05 - (iPix14 * 5) + (iPix23 * 20)); +} +// h: iOffset=1 / v: iOffset=iSrcStride +static inline int32_t FilterInput8bitWithStride_c (const uint8_t* pSrc, const int32_t kiOffset) { + const int32_t kiOffset1 = kiOffset; + const int32_t kiOffset2 = (kiOffset << 1); + const int32_t kiOffset3 = kiOffset + kiOffset2; + const uint32_t kuiPix05 = * (pSrc - kiOffset2) + * (pSrc + kiOffset3); + const uint32_t kuiPix14 = * (pSrc - kiOffset1) + * (pSrc + kiOffset2); + const uint32_t kuiPix23 = * (pSrc) + * (pSrc + kiOffset1); + + return (kuiPix05 - ((kuiPix14 << 2) + kuiPix14) + (kuiPix23 << 4) + (kuiPix23 << 2)); +} + +static inline void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + int32_t i, j; + for (i = 0; i < iHeight; i++) { + for (j = 0; j < iWidth; j++) { + pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1; + } + pDst += iDstStride; + pSrcA += iSrcAStride; + pSrcB += iSrcBStride; + } +} +static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, + int32_t iHeight) { + if (iWidth == 16) + McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //here iWidth == 2 + McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +//horizontal filter to gain half sample, that is (2, 0) location in quarter sample +static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + int32_t i, j; + for (i = 0; i < iHeight; i++) { + for (j = 0; j < iWidth; j++) { + pDst[j] = WelsClip1 ((FilterInput8bitWithStride_c (pSrc + j, 1) + 16) >> 5); + } + pDst += iDstStride; + pSrc += iSrcStride; + } +} + +//vertical filter to gain half sample, that is (0, 2) location in quarter sample +static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + int32_t i, j; + for (i = 0; i < iHeight; i++) { + for (j = 0; j < iWidth; j++) { + pDst[j] = WelsClip1 ((FilterInput8bitWithStride_c (pSrc + j, iSrcStride) + 16) >> 5); + } + pDst += iDstStride; + pSrc += iSrcStride; + } +} + +//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample +static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + int16_t iTmp[17 + 5]; + int32_t i, j, k; + + for (i = 0; i < iHeight; i++) { + for (j = 0; j < iWidth + 5; j++) { + iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride); + } + for (k = 0; k < iWidth; k++) { + pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10); + } + pSrc += iSrcStride; + pDst += iDstStride; + } +} + +/////////////////////luma MC////////////////////////// +static inline void McHorVer01_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiTmp[256]; + McHorVer02_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight); +} +static inline void McHorVer03_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiTmp[256]; + McHorVer02_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, uiTmp, 16, iWidth, iHeight); +} +static inline void McHorVer10_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiTmp[256]; + McHorVer20_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight); +} +static inline void McHorVer11_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiHorTmp[256]; + uint8_t uiVerTmp[256]; + McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight); + McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight); +} +static inline void McHorVer12_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiVerTmp[256]; + uint8_t uiCtrTmp[256]; + McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight); + McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiVerTmp, 16, uiCtrTmp, 16, iWidth, iHeight); +} +static inline void McHorVer13_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiHorTmp[256]; + uint8_t uiVerTmp[256]; + McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight); + McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight); +} +static inline void McHorVer21_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiHorTmp[256]; + uint8_t uiCtrTmp[256]; + McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight); + McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiCtrTmp, 16, iWidth, iHeight); +} +static inline void McHorVer23_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiHorTmp[256]; + uint8_t uiCtrTmp[256]; + McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight); + McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiCtrTmp, 16, iWidth, iHeight); +} +static inline void McHorVer30_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiHorTmp[256]; + McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, pSrc + 1, iSrcStride, uiHorTmp, 16, iWidth, iHeight); +} +static inline void McHorVer31_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiHorTmp[256]; + uint8_t uiVerTmp[256]; + McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight); + McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight); +} +static inline void McHorVer32_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiVerTmp[256]; + uint8_t uiCtrTmp[256]; + McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight); + McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiVerTmp, 16, uiCtrTmp, 16, iWidth, iHeight); +} +static inline void McHorVer33_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + uint8_t uiHorTmp[256]; + uint8_t uiVerTmp[256]; + McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight); + McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight); + PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight); +} + +void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) +//pSrc has been added the offset of mv +{ + static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y] + {McCopy_c, McHorVer01_c, McHorVer02_c, McHorVer03_c}, + {McHorVer10_c, McHorVer11_c, McHorVer12_c, McHorVer13_c}, + {McHorVer20_c, McHorVer21_c, McHorVer22_c, McHorVer23_c}, + {McHorVer30_c, McHorVer31_c, McHorVer32_c, McHorVer33_c}, + }; + + pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +static inline void McChromaWithFragMv_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + int32_t i, j; + int32_t iA, iB, iC, iD; + const uint8_t* pSrcNext = pSrc + iSrcStride; + const uint8_t* pABCD = g_kuiABCD[iMvY & 0x07][iMvX & 0x07]; + iA = pABCD[0]; + iB = pABCD[1]; + iC = pABCD[2]; + iD = pABCD[3]; + for (i = 0; i < iHeight; i++) { + for (j = 0; j < iWidth; j++) { + pDst[j] = (iA * pSrc[j] + iB * pSrc[j + 1] + iC * pSrcNext[j] + iD * pSrcNext[j + 1] + 32) >> 6; + } + pDst += iDstStride; + pSrc = pSrcNext; + pSrcNext += iSrcStride; + } +} + +void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) +//pSrc has been added the offset of mv +{ + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; + if (0 == kiD8x && 0 == kiD8y) + McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); + else + McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight); +} + +#if defined(X86_ASM) +//***************************************************************************// +// SSE2 implement // +//***************************************************************************// +static inline void McHorVer22WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (int16_t, iTap, 21, 8, 16) + McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)iTap, 16, iHeight + 5); + McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)iTap, 16, pDst, iDstStride, 8, iHeight); +} + +static inline void McHorVer02WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight) { + McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + McHorVer02WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight); +} + +static inline void McHorVer22WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iHeight) { + McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight); +} + +void McHorVer20Width5Or9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17 || iWidth == 9) + McHorVer20Width9Or17_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); + else //if (iWidth == 5) + McHorVer20Width5_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +void McHorVer02Height5Or9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16 || iWidth == 8) + McHorVer02Height9Or17_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); + else //if (iWidth == 4) + McHorVer02Height5_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +void McHorVer22Width5Or9Or17Height5Or9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16) + if (iWidth == 17 || iWidth == 9){ + int32_t tmp1 = 2 * (iWidth - 8); + McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5); + McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight); + McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight); + } + else{ //if(iWidth == 5) + int32_t tmp1 = 2 * (iWidth - 4); + McHorVer22Width5HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5); + McHorVer22Width4VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight); + McHorVer22Width4VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 4, iDstStride, 4, iHeight); + } + +} + +static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, + int32_t iHeight) { + if (iWidth == 16) + McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +static inline void McHorVer20_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +static inline void McHorVer02_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McHorVer02_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight); +} + +static inline void McHorVer22_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McHorVer22_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight); +} + +static inline void McHorVer01_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } +} +static inline void McHorVer03_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); + } +} +static inline void McHorVer10_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } +} +static inline void McHorVer11_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +static inline void McHorVer12_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } +} +static inline void McHorVer13_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4 , iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +static inline void McHorVer21_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } +} +static inline void McHorVer23_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } +} +static inline void McHorVer30_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight); + } +} +static inline void McHorVer31_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +static inline void McHorVer32_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } +} +static inline void McHorVer33_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} + +void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) +//pSrc has been added the offset of mv +{ + static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y] + {McCopy_sse2, McHorVer01_sse2, McHorVer02_sse2, McHorVer03_sse2}, + {McHorVer10_sse2, McHorVer11_sse2, McHorVer12_sse2, McHorVer13_sse2}, + {McHorVer20_sse2, McHorVer21_sse2, McHorVer22_sse2, McHorVer23_sse2}, + {McHorVer30_sse2, McHorVer31_sse2, McHorVer32_sse2, McHorVer33_sse2}, + }; + + pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = { + McChromaWidthEq4_mmx, + McChromaWidthEq8_sse2 + }; + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; + if (kiD8x == 0 && kiD8y == 0) { + McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); + return; + } + if (iWidth != 2) { + kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight); + } else + McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight); +} + +//***************************************************************************// +// SSSE3 implementation // +//***************************************************************************// + +void PixelAvgWidth4Or8Or16_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + if (iWidth < 8) { + PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); + } else if (iWidth == 8) { + PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); + } else { + PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); + } +} + +void McCopy_sse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + switch (iWidth) { + case 16: return McCopyWidthEq16_sse3 (pSrc, iSrcStride, pDst, iDstStride, iHeight); + case 8: return McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight); + case 4: return McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); + } + return McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +void McHorVer22_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (int16_t, pTmp, 16 + 5, 8, 16); + if (iWidth < 8) { + McHorVer20Width4U8ToS16_ssse3 (pSrc, iSrcStride, &pTmp[0][0], iHeight + 5); + McHorVer02Width4S16ToU8_ssse3 (&pTmp[0][0], pDst, iDstStride, iHeight); + } else if (iWidth == 8) { + McHorVer20Width8U8ToS16_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iHeight + 5); + McHorVer02WidthGe8S16ToU8_ssse3 (&pTmp[0][0], sizeof *pTmp, pDst, iDstStride, iWidth, iHeight); + } else { + McHorVer20Width8U8ToS16_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iHeight + 5); + McHorVer02WidthGe8S16ToU8_ssse3 (&pTmp[0][0], sizeof *pTmp, pDst, iDstStride, 8, iHeight); + McHorVer20Width8U8ToS16_ssse3 (pSrc + 8, iSrcStride, &pTmp[0][0], sizeof *pTmp, iHeight + 5); + McHorVer02WidthGe8S16ToU8_ssse3 (&pTmp[0][0], sizeof *pTmp, pDst + 8, iDstStride, 8, iHeight); + } +} + +void McHorVer01_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer02_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc, iSrcStride, + &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer03_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer02_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, + &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer10_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc, iSrcStride, + &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer11_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_ssse3 (pSrc, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer12_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer02_ssse3 (pSrc, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + McHorVer22_ssse3 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pVerTmp[0][0], sizeof *pVerTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer13_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc + iSrcStride, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_ssse3 (pSrc, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer21_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer22_ssse3 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer23_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc + iSrcStride, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer22_ssse3 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer30_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer31_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_ssse3 (pSrc + 1, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer32_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer02_ssse3 (pSrc + 1, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + McHorVer22_ssse3 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pVerTmp[0][0], sizeof *pVerTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer33_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_ssse3 (pSrc + iSrcStride, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_ssse3 (pSrc + 1, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer22Width5Or9Or17_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (int16_t, pTmp, 17 + 5, WELS_ALIGN(17, 16 / sizeof (int16_t)), 16) + if (iWidth > 5) { + McHorVer20Width9Or17U8ToS16_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight + 5); + McHorVer02WidthGe8S16ToU8_ssse3 (&pTmp[0][0], sizeof *pTmp, pDst, iDstStride, iWidth, iHeight); + } else { + McHorVer20Width8U8ToS16_ssse3 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iHeight + 5); + McHorVer02Width5S16ToU8_ssse3 (&pTmp[0][0], sizeof *pTmp, pDst, iDstStride, iHeight); + } +} + +void McLuma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { + {McCopy_sse3, McHorVer01_ssse3, McHorVer02_ssse3, McHorVer03_ssse3}, + {McHorVer10_ssse3, McHorVer11_ssse3, McHorVer12_ssse3, McHorVer13_ssse3}, + {McHorVer20_ssse3, McHorVer21_ssse3, McHorVer22_ssse3, McHorVer23_ssse3}, + {McHorVer30_ssse3, McHorVer31_ssse3, McHorVer32_ssse3, McHorVer33_ssse3}, + }; + + pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = { + McChromaWidthEq4_mmx, + McChromaWidthEq8_ssse3 + }; + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; + if (kiD8x == 0 && kiD8y == 0) { + McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); + return; + } + if (iWidth != 2) { + kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight); + } else + McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight); +} + +//***************************************************************************// +// AVX2 implementation // +//***************************************************************************// + +#ifdef HAVE_AVX2 + +void McHorVer22_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (int16_t, pTmp, 16 + 5, 16, 32); + if (iWidth < 8) { + McHorVer20Width4U8ToS16_avx2 (pSrc, iSrcStride, &pTmp[0][0], iHeight + 5); + McHorVer02Width4S16ToU8_avx2 (&pTmp[0][0], pDst, iDstStride, iHeight); + } else if (iWidth == 8) { + McHorVer20Width8U8ToS16_avx2 (pSrc, iSrcStride, &pTmp[0][0], iHeight + 5); + McHorVer02Width8S16ToU8_avx2 (&pTmp[0][0], pDst, iDstStride, iHeight); + } else { + McHorVer20Width16U8ToS16_avx2 (pSrc, iSrcStride, &pTmp[0][0], iHeight + 5); + McHorVer02Width16Or17S16ToU8_avx2 (&pTmp[0][0], sizeof *pTmp, pDst, iDstStride, iWidth, iHeight); + } +} + +void McHorVer01_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer02_avx2 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc, iSrcStride, + &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer03_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer02_avx2 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, + &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer10_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc, iSrcStride, + &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer11_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_avx2 (pSrc, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer12_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer02_avx2 (pSrc, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + McHorVer22_avx2 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pVerTmp[0][0], sizeof *pVerTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer13_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc + iSrcStride, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_avx2 (pSrc, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer21_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer22_avx2 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer23_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc + iSrcStride, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer22_avx2 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer30_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, &pTmp[0][0], sizeof *pTmp, iWidth, iHeight); +} + +void McHorVer31_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_avx2 (pSrc + 1, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer32_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pCtrTmp, 16, 16, 16); + McHorVer02_avx2 (pSrc + 1, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + McHorVer22_avx2 (pSrc, iSrcStride, &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pVerTmp[0][0], sizeof *pVerTmp, + &pCtrTmp[0][0], sizeof *pCtrTmp, iWidth, iHeight); +} + +void McHorVer33_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (uint8_t, pHorTmp, 16, 16, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, pVerTmp, 16, 16, 16); + McHorVer20_avx2 (pSrc + iSrcStride, iSrcStride, &pHorTmp[0][0], sizeof *pHorTmp, iWidth, iHeight); + McHorVer02_avx2 (pSrc + 1, iSrcStride, &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); + PixelAvgWidth4Or8Or16_sse2 (pDst, iDstStride, &pHorTmp[0][0], sizeof *pHorTmp, + &pVerTmp[0][0], sizeof *pVerTmp, iWidth, iHeight); +} + +void McHorVer22Width5Or9Or17_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth < 9) { + ENFORCE_STACK_ALIGN_2D (int16_t, pTmp, 9 + 5, WELS_ALIGN(5, 16 / sizeof (int16_t)), 16) + McHorVer20Width8U8ToS16_avx2 (pSrc, iSrcStride, &pTmp[0][0], iHeight + 5); + McHorVer02Width5S16ToU8_avx2 (&pTmp[0][0], pDst, iDstStride, iHeight); + } else if (iWidth == 9) { + ENFORCE_STACK_ALIGN_2D (int16_t, pTmp, 17 + 5, 16, 32) + McHorVer20Width16U8ToS16_avx2 (pSrc, iSrcStride, &pTmp[0][0], iHeight + 5); + McHorVer02Width9S16ToU8_avx2 (&pTmp[0][0], pDst, iDstStride, iHeight); + } else { + ENFORCE_STACK_ALIGN_2D (int16_t, pTmp, 17 + 5, WELS_ALIGN(17, 32 / sizeof (int16_t)), 32) + McHorVer20Width17U8ToS16_avx2 (pSrc, iSrcStride, &pTmp[0][0], iHeight + 5); + McHorVer02Width16Or17S16ToU8_avx2 (&pTmp[0][0], sizeof *pTmp, pDst, iDstStride, iWidth, iHeight); + } +} + +void McLuma_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { + {McCopy_sse3, McHorVer01_avx2, McHorVer02_avx2, McHorVer03_avx2}, + {McHorVer10_avx2, McHorVer11_avx2, McHorVer12_avx2, McHorVer13_avx2}, + {McHorVer20_avx2, McHorVer21_avx2, McHorVer22_avx2, McHorVer23_avx2}, + {McHorVer30_avx2, McHorVer31_avx2, McHorVer32_avx2, McHorVer33_avx2}, + }; + + pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +#endif //HAVE_AVX2 + +void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { + PixelAvgWidthEq8_mmx, + PixelAvgWidthEq16_sse2 + }; + kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); +} + +#endif //X86_ASM +//***************************************************************************// +// NEON implementation // +//***************************************************************************// +#if defined(HAVE_NEON) +void McHorVer20Width5Or9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer20Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 9) + McHorVer20Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 5) + McHorVer20Width5_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer02Height5Or9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer02Height17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer02Height9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 4) + McHorVer02Height5_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer22Width5Or9Or17Height5Or9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer22Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 9) + McHorVer22Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 5) + McHorVer22Width5_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McCopy_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (16 == iWidth) + McCopyWidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (8 == iWidth) + McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (4 == iWidth) + McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer20_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer20WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer20WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer20WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer02_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer02WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer02WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer02WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer22_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer22WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer22WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer22WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +void McHorVer01_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer01WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer01WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer01WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer03_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer03WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer03WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer03WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer10_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer10WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer10WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer10WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer11_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } +} +void McHorVer12_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight); + } else if (iWidth == 4) { + McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight); + } +} +void McHorVer13_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } +} +void McHorVer21_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight); + } +} +void McHorVer23_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight); + } +} +void McHorVer30_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer30WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer30WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer30WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer31_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } +} +void McHorVer32_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight); + } else if (iWidth == 4) { + McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight); + } +} +void McHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight); + } +} + +void McLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y] + {McCopy_neon, McHorVer01_neon, McHorVer02_neon, McHorVer03_neon}, + {McHorVer10_neon, McHorVer11_neon, McHorVer12_neon, McHorVer13_neon}, + {McHorVer20_neon, McHorVer21_neon, McHorVer22_neon, McHorVer23_neon}, + {McHorVer30_neon, McHorVer31_neon, McHorVer32_neon, McHorVer33_neon}, + }; + // pSrc += (iMvY >> 2) * iSrcStride + (iMvX >> 2); + pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} +void McChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + if (0 == iMvX && 0 == iMvY) { + if (8 == iWidth) + McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //here iWidth == 2 + McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); + } else { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; + if (8 == iWidth) + McChromaWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); + else if (4 == iWidth) + McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); + else //here iWidth == 2 + McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight); + } +} +void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { + PixStrideAvgWidthEq8_neon, + PixStrideAvgWidthEq16_neon + }; + kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); +} +#endif +#if defined(HAVE_NEON_AARCH64) +void McHorVer20Width5Or9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer20Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 9) + McHorVer20Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 5) + McHorVer20Width5_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer02Height5Or9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer02Height17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer02Height9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 4) + McHorVer02Height5_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer22Width5Or9Or17Height5Or9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer22Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 9) + McHorVer22Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 5) + McHorVer22Width5_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McCopy_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (16 == iWidth) + McCopyWidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (8 == iWidth) + McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (4 == iWidth) + McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer20_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer02_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer22_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +void McHorVer01_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer01WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer01WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer01WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer03_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer03WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer03WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer03WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer10_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer10WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer10WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer10WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer11_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +void McHorVer12_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } +} +void McHorVer13_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +void McHorVer21_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } +} +void McHorVer23_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } +} +void McHorVer30_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer30WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer30WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McHorVer30WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer31_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +void McHorVer32_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } +} +void McHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 4) { + McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} + +void McLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y] + {McCopy_AArch64_neon, McHorVer01_AArch64_neon, McHorVer02_AArch64_neon, McHorVer03_AArch64_neon}, + {McHorVer10_AArch64_neon, McHorVer11_AArch64_neon, McHorVer12_AArch64_neon, McHorVer13_AArch64_neon}, + {McHorVer20_AArch64_neon, McHorVer21_AArch64_neon, McHorVer22_AArch64_neon, McHorVer23_AArch64_neon}, + {McHorVer30_AArch64_neon, McHorVer31_AArch64_neon, McHorVer32_AArch64_neon, McHorVer33_AArch64_neon}, + }; + // pSrc += (iMvY >> 2) * iSrcStride + (iMvX >> 2); + pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} +void McChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + if (0 == iMvX && 0 == iMvY) { + if (8 == iWidth) + McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //here iWidth == 2 + McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); + } else { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; + if (8 == iWidth) + McChromaWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); + else if (4 == iWidth) + McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); + else //here iWidth == 2 + McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight); + } +} +void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { + PixStrideAvgWidthEq8_AArch64_neon, + PixStrideAvgWidthEq16_AArch64_neon + }; + kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); +} +#endif + +#if defined(HAVE_MMI) +#define MMI_LOAD_8P(f0, f2, f4, r0) \ + "gsldlc1 "#f0", 0x7("#r0") \n\t" \ + "gsldrc1 "#f0", 0x0("#r0") \n\t" \ + "punpckhbh "#f2", "#f0", "#f4" \n\t" \ + "punpcklbh "#f0", "#f0", "#f4" \n\t" + +#define FILTER_HV_W4(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, \ + f20, f22, f24, f26, f28, f30, r0, r1, r2) \ + "paddh "#f0", "#f0", "#f20" \n\t" \ + "paddh "#f2", "#f2", "#f22" \n\t" \ + "mov.d "#f28", "#f8" \n\t" \ + "mov.d "#f30", "#f10" \n\t" \ + "mov.d "#f24", "#f4" \n\t" \ + "mov.d "#f26", "#f6" \n\t" \ + "dmfc1 "#r2", "#f8" \n\t" \ + "dli "#r1", 0x0010001000100010 \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "paddh "#f0", "#f0", "#f8" \n\t" \ + "paddh "#f2", "#f2", "#f8" \n\t" \ + "paddh "#f28", "#f28", "#f12" \n\t" \ + "paddh "#f30", "#f30", "#f14" \n\t" \ + "paddh "#f24", "#f24", "#f16" \n\t" \ + "paddh "#f26", "#f26", "#f18" \n\t" \ + "dli "#r1", 0x2 \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "psllh "#f28", "#f28", "#f8" \n\t" \ + "psllh "#f30", "#f30", "#f8" \n\t" \ + "psubh "#f28", "#f28", "#f24" \n\t" \ + "psubh "#f30", "#f30", "#f26" \n\t" \ + "paddh "#f0", "#f0", "#f28" \n\t" \ + "paddh "#f2", "#f2", "#f30" \n\t" \ + "psllh "#f28", "#f28", "#f8" \n\t" \ + "psllh "#f30", "#f30", "#f8" \n\t" \ + "paddh "#f0", "#f0", "#f28" \n\t" \ + "paddh "#f2", "#f2", "#f30" \n\t" \ + "dli "#r1", 0x5 \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "psrah "#f0", "#f0", "#f8" \n\t" \ + "psrah "#f2", "#f2", "#f8" \n\t" \ + "xor "#f28", "#f28", "#f28" \n\t" \ + "packushb "#f0", "#f0", "#f2" \n\t" \ + "gsswlc1 "#f0", 0x3("#r0") \n\t" \ + "gsswrc1 "#f0", 0x0("#r0") \n\t" \ + "dmtc1 "#r2", "#f8" \n\t" + +#define FILTER_HV_W8(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, \ + f20, f22, f24, f26, f28, f30, r0, r1, r2) \ + "paddh "#f0", "#f0", "#f20" \n\t" \ + "paddh "#f2", "#f2", "#f22" \n\t" \ + "mov.d "#f28", "#f8" \n\t" \ + "mov.d "#f30", "#f10" \n\t" \ + "mov.d "#f24", "#f4" \n\t" \ + "mov.d "#f26", "#f6" \n\t" \ + "dmfc1 "#r2", "#f8" \n\t" \ + "dli "#r1", 0x0010001000100010 \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "paddh "#f0", "#f0", "#f8" \n\t" \ + "paddh "#f2", "#f2", "#f8" \n\t" \ + "paddh "#f28", "#f28", "#f12" \n\t" \ + "paddh "#f30", "#f30", "#f14" \n\t" \ + "paddh "#f24", "#f24", "#f16" \n\t" \ + "paddh "#f26", "#f26", "#f18" \n\t" \ + "dli "#r1", 0x2 \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "psllh "#f28", "#f28", "#f8" \n\t" \ + "psllh "#f30", "#f30", "#f8" \n\t" \ + "psubh "#f28", "#f28", "#f24" \n\t" \ + "psubh "#f30", "#f30", "#f26" \n\t" \ + "paddh "#f0", "#f0", "#f28" \n\t" \ + "paddh "#f2", "#f2", "#f30" \n\t" \ + "psllh "#f28", "#f28", "#f8" \n\t" \ + "psllh "#f30", "#f30", "#f8" \n\t" \ + "paddh "#f0", "#f0", "#f28" \n\t" \ + "paddh "#f2", "#f2", "#f30" \n\t" \ + "dli "#r1", 0x5 \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "psrah "#f0", "#f0", "#f8" \n\t" \ + "psrah "#f2", "#f2", "#f8" \n\t" \ + "xor "#f28", "#f28", "#f28" \n\t" \ + "packushb "#f0", "#f0", "#f2" \n\t" \ + "gssdlc1 "#f0", 0x7("#r0") \n\t" \ + "gssdrc1 "#f0", 0x0("#r0") \n\t" \ + "dmtc1 "#r2", "#f8" \n\t" + +#define FILTER_VER_ALIGN(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, \ + f20, f22, f24, f26, f28, f30, r0, r1, r2, r3, r4) \ + "paddh "#f0", "#f0", "#f20" \n\t" \ + "paddh "#f2", "#f2", "#f22" \n\t" \ + "mov.d "#f24", "#f4" \n\t" \ + "mov.d "#f26", "#f6" \n\t" \ + "mov.d "#f28", "#f8" \n\t" \ + "mov.d "#f30", "#f10" \n\t" \ + "dli "#r2", 0x2 \n\t" \ + "paddh "#f24", "#f24", "#f16" \n\t" \ + "paddh "#f26", "#f26", "#f18" \n\t" \ + "dmfc1 "#r3", "#f8" \n\t" \ + "paddh "#f28", "#f28", "#f12" \n\t" \ + "paddh "#f30", "#f30", "#f14" \n\t" \ + "dmtc1 "#r2", "#f8" \n\t" \ + "psubh "#f0", "#f0", "#f24" \n\t" \ + "psubh "#f2", "#f2", "#f26" \n\t" \ + "psrah "#f0", "#f0", "#f8" \n\t" \ + "psrah "#f2", "#f2", "#f8" \n\t" \ + "paddh "#f0", "#f0", "#f28" \n\t" \ + "paddh "#f2", "#f2", "#f30" \n\t" \ + "psubh "#f0", "#f0", "#f24" \n\t" \ + "psubh "#f2", "#f2", "#f26" \n\t" \ + "psrah "#f0", "#f0", "#f8" \n\t" \ + "psrah "#f2", "#f2", "#f8" \n\t" \ + "dmtc1 "#r4", "#f8" \n\t" \ + "paddh "#f28", "#f28", "#f0" \n\t" \ + "paddh "#f30", "#f30", "#f2" \n\t" \ + "dli "#r2", 0x6 \n\t" \ + "paddh "#f28", "#f28", "#f8" \n\t" \ + "paddh "#f30", "#f30", "#f8" \n\t" \ + "dmtc1 "#r2", "#f8" \n\t" \ + "psrah "#f28", "#f28", "#f8" \n\t" \ + "psrah "#f30", "#f30", "#f8" \n\t" \ + "packushb "#f28", "#f28", "#f30" \n\t" \ + "gssdxc1 "#f28", 0x0("#r0", "#r1") \n\t" \ + "dmtc1 "#r3", "#f8" \n\t" + +#define FILTER_VER_UNALIGN(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, \ + f20, f22, f24, f26, f28, f30, r0, r1, r2, r3) \ + "paddh "#f0", "#f0", "#f20" \n\t" \ + "paddh "#f2", "#f2", "#f22" \n\t" \ + "mov.d "#f24", "#f4" \n\t" \ + "mov.d "#f26", "#f6" \n\t" \ + "mov.d "#f28", "#f8" \n\t" \ + "mov.d "#f30", "#f10" \n\t" \ + "dli "#r1", 0x2 \n\t" \ + "paddh "#f24", "#f24", "#f16" \n\t" \ + "paddh "#f26", "#f26", "#f18" \n\t" \ + "dmfc1 "#r2", "#f8" \n\t" \ + "paddh "#f28", "#f28", "#f12" \n\t" \ + "paddh "#f30", "#f30", "#f14" \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "psubh "#f0", "#f0", "#f24" \n\t" \ + "psubh "#f2", "#f2", "#f26" \n\t" \ + "psrah "#f0", "#f0", "#f8" \n\t" \ + "psrah "#f2", "#f2", "#f8" \n\t" \ + "paddh "#f0", "#f0", "#f28" \n\t" \ + "paddh "#f2", "#f2", "#f30" \n\t" \ + "psubh "#f0", "#f0", "#f24" \n\t" \ + "psubh "#f2", "#f2", "#f26" \n\t" \ + "psrah "#f0", "#f0", "#f8" \n\t" \ + "psrah "#f2", "#f2", "#f8" \n\t" \ + "dmtc1 "#r3", "#f8" \n\t" \ + "paddh "#f28", "#f28", "#f0" \n\t" \ + "paddh "#f30", "#f30", "#f2" \n\t" \ + "dli "#r1", 0x6 \n\t" \ + "paddh "#f28", "#f28", "#f8" \n\t" \ + "paddh "#f30", "#f30", "#f8" \n\t" \ + "dmtc1 "#r1", "#f8" \n\t" \ + "psrah "#f28", "#f28", "#f8" \n\t" \ + "psrah "#f30", "#f30", "#f8" \n\t" \ + "packushb "#f28", "#f28", "#f30" \n\t" \ + "gssdlc1 "#f28", 0x7("#r0") \n\t" \ + "gssdrc1 "#f28", 0x0("#r0") \n\t" \ + "dmtc1 "#r2", "#f8" \n\t" + +void McHorVer20Width5_mmi(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "xor $f28, $f28, $f28 \n\t" + PTR_ADDIU "%[pSrc], %[pSrc], -0x2 \n\t" + "dli $8, 0x2 \n\t" + "dli $10, 0x0010001000100010 \n\t" + "dli $11, 0x5 \n\t" + "1: \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + + "mov.d $f28, $f8 \n\t" + "mov.d $f30, $f10 \n\t" + "paddh $f28, $f28, $f12 \n\t" + "paddh $f30, $f30, $f14 \n\t" + "mov.d $f24, $f16 \n\t" + "mov.d $f26, $f18 \n\t" + "paddh $f24, $f24, $f20 \n\t" + "paddh $f26, $f26, $f22 \n\t" + "dmfc1 $9, $f12 \n\t" + "dmtc1 $8, $f12 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "psubh $f24, $f24, $f28 \n\t" + "psubh $f26, $f26, $f30 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + + "dmtc1 $10, $f12 \n\t" + "paddh $f0, $f0, $f12 \n\t" + "paddh $f2, $f2, $f12 \n\t" + "dmtc1 $11, $f12 \n\t" + "psrah $f0, $f0, $f12 \n\t" + "psrah $f2, $f2, $f12 \n\t" + "packushb $f0, $f0, $f2 \n\t" + + "gsswlc1 $f0, 0x3(%[pDst]) \n\t" + "gsswrc1 $f0, 0x0(%[pDst]) \n\t" + + "gsldlc1 $f0, 0xd(%[pSrc]) \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldrc1 $f0, 0x6(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "dmtc1 $9, $f12 \n\t" + "dmtc1 $8, $f24 \n\t" + + "paddh $f16, $f16, $f4 \n\t" + "paddh $f18, $f18, $f6 \n\t" + "paddh $f20, $f20, $f12 \n\t" + "paddh $f22, $f22, $f14 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "psubh $f20, $f20, $f16 \n\t" + "psubh $f22, $f22, $f18 \n\t" + "paddh $f8, $f8, $f0 \n\t" + "paddh $f10, $f10, $f2 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + + "dmtc1 $10, $f24 \n\t" + "paddh $f8, $f8, $f24 \n\t" + "paddh $f10, $f10, $f24 \n\t" + "dmtc1 $11, $f24 \n\t" + "psrah $f8, $f8, $f24 \n\t" + "psrah $f10, $f10, $f24 \n\t" + "packushb $f8, $f8, $f10 \n\t" + "gsswlc1 $f8, 0x4(%[pDst]) \n\t" + "gsswrc1 $f8, 0x1(%[pDst]) \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"((unsigned char *)pSrc), [pDst]"+&r"((unsigned char *)pDst), + [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight) + : [iSrcStride]"r"((int)iSrcStride), [iDstStride]"r"((int)iDstStride) + : "memory", "$8", "$9", "$10", "$11", "$f0", "$f2", "$f4", "$f6", "$f8", + "$f10", "$f12", "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", + "$f28", "$f30" + ); + RECOVER_REG; +} + +void McHorVer20Width9Or17_mmi(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + PTR_ADDIU "%[pSrc], %[pSrc], -0x2 \n\t" + "xor $f28, $f28, $f28 \n\t" + "dli $8, 0x2 \n\t" + "dli $9, 0x9 \n\t" + "dli $10, 0x0010001000100010 \n\t" + "dli $11, 0x5 \n\t" + "bne %[iWidth], $9, 2f \n\t" + "1: \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + + "mov.d $f28, $f8 \n\t" + "mov.d $f30, $f10 \n\t" + "paddh $f28, $f28, $f12 \n\t" + "paddh $f30, $f30, $f14 \n\t" + "mov.d $f24, $f16 \n\t" + "mov.d $f26, $f18 \n\t" + "paddh $f24, $f24, $f20 \n\t" + "paddh $f26, $f26, $f22 \n\t" + "dmfc1 $9, $f12 \n\t" + "dmtc1 $8, $f12 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "psubh $f24, $f24, $f28 \n\t" + "psubh $f26, $f26, $f30 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + + "dmtc1 $10, $f12 \n\t" + "paddh $f0, $f0, $f12 \n\t" + "paddh $f2, $f2, $f12 \n\t" + "dmtc1 $11, $f12 \n\t" + "psrah $f0, $f0, $f12 \n\t" + "psrah $f2, $f2, $f12 \n\t" + "packushb $f0, $f0, $f2 \n\t" + + "gsswlc1 $f0, 0x3(%[pDst]) \n\t" + "gsswrc1 $f0, 0x0(%[pDst]) \n\t" + + "gsldlc1 $f0, 0xd(%[pSrc]) \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldrc1 $f0, 0x6(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "dmtc1 $9, $f12 \n\t" + "dmtc1 $8, $f24 \n\t" + + "paddh $f16, $f16, $f4 \n\t" + "paddh $f18, $f18, $f6 \n\t" + "paddh $f20, $f20, $f12 \n\t" + "paddh $f22, $f22, $f14 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "psubh $f20, $f20, $f16 \n\t" + "psubh $f22, $f22, $f18 \n\t" + "paddh $f8, $f8, $f0 \n\t" + "paddh $f10, $f10, $f2 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + + "dmtc1 $10, $f24 \n\t" + "paddh $f8, $f8, $f24 \n\t" + "paddh $f10, $f10, $f24 \n\t" + "dmtc1 $11, $f24 \n\t" + "psrah $f8, $f8, $f24 \n\t" + "psrah $f10, $f10, $f24 \n\t" + "packushb $f8, $f8, $f10 \n\t" + "gssdlc1 $f8, 0x8(%[pDst]) \n\t" + "gssdrc1 $f8, 0x1(%[pDst]) \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 1b \n\t" + "j 3f \n\t" + + "2: \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + + "dmtc1 $8, $f30 \n\t" + "paddh $f8, $f8, $f12 \n\t" + "paddh $f10, $f10, $f14 \n\t" + "paddh $f16, $f16, $f20 \n\t" + "paddh $f18, $f18, $f22 \n\t" + "psllh $f16, $f16, $f30 \n\t" + "psllh $f18, $f18, $f30 \n\t" + "psubh $f16, $f16, $f8 \n\t" + "psubh $f18, $f18, $f10 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "psllh $f16, $f16, $f30 \n\t" + "psllh $f18, $f18, $f30 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + + "dmtc1 $10, $f30 \n\t" + "paddh $f0, $f0, $f30 \n\t" + "paddh $f2, $f2, $f30 \n\t" + "dmtc1 $11, $f30 \n\t" + "psrah $f0, $f0, $f30 \n\t" + "psrah $f2, $f2, $f30 \n\t" + "packushb $f0, $f0, $f2 \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + + "gsldlc1 $f0, 15(%[pSrc]) \n\t" + "gsldlc1 $f4, 0x14(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x10(%[pSrc]) \n\t" + "gsldlc1 $f12, 0x13(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x11(%[pSrc]) \n\t" + "gsldlc1 $f20, 0x12(%[pSrc]) \n\t" + "gsldrc1 $f0, 8(%[pSrc]) \n\t" + "gsldrc1 $f4, 0xd(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x9(%[pSrc]) \n\t" + "gsldrc1 $f12, 0xc(%[pSrc]) \n\t" + "gsldrc1 $f16, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f20, 0xb(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + + "mov.d $f28, $f8 \n\t" + "mov.d $f30, $f10 \n\t" + "paddh $f28, $f28, $f12 \n\t" + "paddh $f30, $f30, $f14 \n\t" + "mov.d $f24, $f16 \n\t" + "mov.d $f26, $f18 \n\t" + "paddh $f24, $f24, $f20 \n\t" + "paddh $f26, $f26, $f22 \n\t" + "dmfc1 $9, $f12 \n\t" + "dmtc1 $8, $f12 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "psubh $f24, $f24, $f28 \n\t" + "psubh $f26, $f26, $f30 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + + "dmtc1 $10, $f30 \n\t" + "paddh $f0, $f0, $f30 \n\t" + "paddh $f2, $f2, $f30 \n\t" + "dmtc1 $11, $f30 \n\t" + "psrah $f0, $f0, $f30 \n\t" + "psrah $f2, $f2, $f30 \n\t" + "packushb $f0, $f0, $f2 \n\t" + "gsswlc1 $f0, 0xb(%[pDst]) \n\t" + "gsswrc1 $f0, 0x8(%[pDst]) \n\t" + + "dmtc1 $9, $f12 \n\t" + "xor $f28, $f28, $f28 \n\t" + "dli $9, 0x20 \n\t" + "gsldlc1 $f0, 0x15(%[pSrc]) \n\t" + "dmtc1 $9, $f30 \n\t" + "gsldrc1 $f0, 0xE(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "dmtc1 $8, $f24 \n\t" + + "paddh $f16, $f16, $f4 \n\t" + "paddh $f18, $f18, $f6 \n\t" + "paddh $f20, $f20, $f12 \n\t" + "paddh $f22, $f22, $f14 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "psubh $f20, $f20, $f16 \n\t" + "psubh $f22, $f22, $f18 \n\t" + "paddh $f8, $f8, $f0 \n\t" + "paddh $f10, $f10, $f2 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + + "dmtc1 $10, $f24 \n\t" + "paddh $f8, $f8, $f24 \n\t" + "paddh $f10, $f10, $f24 \n\t" + "dmtc1 $11, $f24 \n\t" + "psrah $f8, $f8, $f24 \n\t" + "psrah $f10, $f10, $f24 \n\t" + "packushb $f8, $f8, $f10 \n\t" + "gssdlc1 $f8, 0x10(%[pDst]) \n\t" + "gssdrc1 $f8, 0x9(%[pDst]) \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 2b \n\t" + "3: \n\t" + : [pSrc]"+&r"((unsigned char *)pSrc), [pDst]"+&r"((unsigned char *)pDst), + [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight) + : [iSrcStride]"r"((int)iSrcStride), [iDstStride]"r"((int)iDstStride) + : "memory", "$8", "$9", "$10", "$11", "$f0", "$f2", "$f4", "$f6", "$f8", + "$f10", "$f12", "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", + "$f28", "$f30" + ); + RECOVER_REG; +} + +//horizontal filter to gain half sample, that is (2, 0) location in quarter sample +static inline void McHorVer20Width5Or9Or17_mmi(const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17 || iWidth == 9) + McHorVer20Width9Or17_mmi(pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); + else //if (iWidth == 5) + McHorVer20Width5_mmi(pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +void McHorVer02Height5_mmi(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "move $12, %[pSrc] \n\t" + "move $13, %[pDst] \n\t" + "move $14, %[iHeight] \n\t" + + "dsrl %[iWidth], %[iWidth], 0x2 \n\t" + PTR_ADDU "$10, %[iSrcStride], %[iSrcStride] \n\t" + PTR_SUBU "%[pSrc], %[pSrc], $10 \n\t" + + "1: \n\t" + "xor $f28, $f28, $f28 \n\t" + MMI_LOAD_8P($f0, $f2, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f4, $f6, $f28, $8) + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f8, $f10, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f12, $f14, $f28, $8) + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f16, $f18, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f20, $f22, $f28, $8) + FILTER_HV_W4($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, + $f22, $f24, $f26, $f28, $f30, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f24, $f26, $f28, %[pSrc]) + "mov.d $f0, $f4 \n\t" + "mov.d $f2, $f6 \n\t" + "mov.d $f4, $f8 \n\t" + "mov.d $f6, $f10 \n\t" + "mov.d $f8, $f12 \n\t" + "mov.d $f10, $f14 \n\t" + "mov.d $f12, $f16 \n\t" + "mov.d $f14, $f18 \n\t" + "mov.d $f16, $f20 \n\t" + "mov.d $f18, $f22 \n\t" + "mov.d $f20, $f24 \n\t" + "mov.d $f22, $f26 \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + + "2: \n\t" + FILTER_HV_W4($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, + $f22, $f24, $f26, $f28, $f30, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f24, $f26, $f28, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W4($f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, + $f26, $f28, $f30, $f0, $f2, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f28, $f30, $f0, $8) + FILTER_HV_W4($f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, + $f30, $f0, $f2, $f4, $f6, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f0, $f2, $f4, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W4($f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, + $f2, $f4, $f6, $f8, $f10, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f4, $f6, $f8, $8) + FILTER_HV_W4($f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, + $f8, $f10, $f12, $f14, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f8, $f10, $f12, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W4($f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, + $f12, $f14, $f16, $f18, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f12, $f14, $f16, $8) + FILTER_HV_W4($f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, + $f16, $f18, $f20, $f22, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f16, $f18, $f20, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W4($f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, + $f20, $f22, $f24, $f26, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f20, $f22, $f24, $8) + "j 2b \n\t" + + "3: \n\t" + PTR_ADDIU "%[iWidth], %[iWidth], -0x1 \n\t" + "beqz %[iWidth], 4f \n\t" + "move %[pSrc], $12 \n\t" + "move %[pDst], $13 \n\t" + "move %[iHeight], $14 \n\t" + PTR_SUBU "%[pSrc], %[pSrc], $10 \n\t" + PTR_ADDIU "%[pSrc], %[pSrc], 0x4 \n\t" + PTR_ADDIU "%[pDst], %[pDst], 0x4 \n\t" + "j 1b \n\t" + "4: \n\t" + : [pSrc]"+&r"((unsigned char *)pSrc), [pDst]"+&r"((unsigned char *)pDst), + [iWidth]"+&r"(iWidth), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$9", "$10", "$12", "$13", "$14", "$f0", "$f2", "$f4", + "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20", "$f22", + "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +void McHorVer02Height9Or17_mmi(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "move $12, %[pSrc] \n\t" + "move $13, %[pDst] \n\t" + "move $14, %[iHeight] \n\t" + + "dsrl %[iWidth], %[iWidth], 0x3 \n\t" + PTR_ADDU "$10, %[iSrcStride], %[iSrcStride] \n\t" + PTR_SUBU "%[pSrc], %[pSrc], $10 \n\t" + + "1: \n\t" + "dli $8, 0x20 \n\t" + "xor $f28, $f28, $f28 \n\t" + "dmtc1 $8, $f30 \n\t" + + MMI_LOAD_8P($f0, $f2, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f4, $f6, $f28, $8) + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f8, $f10, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f12, $f14, $f28, $8) + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f16, $f18, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f20, $f22, $f28, $8) + FILTER_HV_W8($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, + $f22, $f24, $f26, $f28, $f30, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f24, $f26, $f28, %[pSrc]) + "mov.d $f0, $f4 \n\t" + "mov.d $f2, $f6 \n\t" + "mov.d $f4, $f8 \n\t" + "mov.d $f6, $f10 \n\t" + "mov.d $f8, $f12 \n\t" + "mov.d $f10, $f14 \n\t" + "mov.d $f12, $f16 \n\t" + "mov.d $f14, $f18 \n\t" + "mov.d $f16, $f20 \n\t" + "mov.d $f18, $f22 \n\t" + "mov.d $f20, $f24 \n\t" + "mov.d $f22, $f26 \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + + "2: \n\t" + FILTER_HV_W8($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, + $f22, $f24, $f26, $f28, $f30, %[pDst], $8, $9) + "dmtc1 $9, $f8 \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f24, $f26, $f28, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, + $f26, $f28, $f30, $f0, $f2, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f28, $f30, $f0, $8) + FILTER_HV_W8($f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, + $f30, $f0, $f2, $f4, $f6, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f0, $f2, $f4, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, + $f2, $f4, $f6, $f8, $f10, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f4, $f6, $f8, $8) + FILTER_HV_W8($f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, + $f6, $f8, $f10, $f12, $f14, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f8, $f10, $f12, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, + $f10, $f12, $f14, $f16, $f18, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f12, $f14, $f16, $8) + FILTER_HV_W8($f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, + $f14, $f16, $f18, $f20, $f22, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], $10 \n\t" + MMI_LOAD_8P($f16, $f18, $f20, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, + $f18, $f20, $f22, $f24, $f26, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 3f \n\t" + + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f20, $f22, $f24, $8) + "j 2b \n\t" + + "3: \n\t" + PTR_ADDIU "%[iWidth], %[iWidth], -0x1 \n\t" + "beqz %[iWidth], 4f \n\t" + + "move %[pSrc], $12 \n\t" + "move %[pDst], $13 \n\t" + "move %[iHeight], $14 \n\t" + PTR_SUBU "%[pSrc], %[pSrc], $10 \n\t" + PTR_ADDIU "%[pSrc], %[pSrc], 0x8 \n\t" + PTR_ADDIU "%[pDst], %[pDst], 0x8 \n\t" + "j 1b \n\t" + "4: \n\t" + : [pSrc]"+&r"((unsigned char *)pSrc), [pDst]"+&r"((unsigned char *)pDst), + [iWidth]"+&r"(iWidth), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$9", "$10", "$12", "$13", "$14", "$f0", "$f2", "$f4", + "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20", "$f22", + "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +//vertical filter to gain half sample, that is (0, 2) location in quarter sample +static inline void McHorVer02Height5Or9Or17_mmi(const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16 || iWidth == 8) + McHorVer02Height9Or17_mmi(pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight ); + else + McHorVer02Height5_mmi (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +static inline void McHorVer22HorFirst_mmi(const uint8_t *pSrc, int32_t iSrcStride, + uint8_t * pTap, int32_t iTapStride, + int32_t iWidth, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "dli $8, 0x9 \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + "bne %[iWidth], $8, 2f \n\t" + + "1: \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + + "mov.d $f28, $f8 \n\t" + "mov.d $f30, $f10 \n\t" + "paddh $f28, $f28, $f12 \n\t" + "paddh $f30, $f30, $f14 \n\t" + "mov.d $f24, $f16 \n\t" + "mov.d $f26, $f18 \n\t" + "paddh $f24, $f24, $f20 \n\t" + "paddh $f26, $f26, $f22 \n\t" + "dli $8, 0x2 \n\t" + "dmfc1 $9, $f12 \n\t" + "dmtc1 $8, $f12 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "psubh $f24, $f24, $f28 \n\t" + "psubh $f26, $f26, $f30 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + "gsswlc1 $f0, 0x3(%[pTap]) \n\t" + "gsswrc1 $f0, 0x0(%[pTap]) \n\t" + + "gsldlc1 $f0, 0xd(%[pSrc]) \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldrc1 $f0, 0x6(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "dli $8, 0x2 \n\t" + "dmtc1 $9, $f12 \n\t" + "dmtc1 $8, $f24 \n\t" + + "paddh $f16, $f16, $f4 \n\t" + "paddh $f18, $f18, $f6 \n\t" + "paddh $f20, $f20, $f12 \n\t" + "paddh $f22, $f22, $f14 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "psubh $f20, $f20, $f16 \n\t" + "psubh $f22, $f22, $f18 \n\t" + "paddh $f8, $f8, $f0 \n\t" + "paddh $f10, $f10, $f2 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + "gssdlc1 $f8, 0x9(%[pTap]) \n\t" + "gssdlc1 $f10, 0x11(%[pTap]) \n\t" + "gssdrc1 $f8, 0x2(%[pTap]) \n\t" + "gssdrc1 $f10, 0xa(%[pTap]) \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pTap], %[pTap], %[iTapStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 1b \n\t" + "j 3f \n\t" + + "2: \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "dli $8, 0x2 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + + "dmtc1 $8, $f30 \n\t" + "paddh $f8, $f8, $f12 \n\t" + "paddh $f10, $f10, $f14 \n\t" + "paddh $f16, $f16, $f20 \n\t" + "paddh $f18, $f18, $f22 \n\t" + "psllh $f16, $f16, $f30 \n\t" + "psllh $f18, $f18, $f30 \n\t" + "psubh $f16, $f16, $f8 \n\t" + "psubh $f18, $f18, $f10 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "psllh $f16, $f16, $f30 \n\t" + "psllh $f18, $f18, $f30 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "gssqc1 $f2, $f0, 0x0(%[pTap]) \n\t" + + "gsldlc1 $f0, 15(%[pSrc]) \n\t" + "gsldrc1 $f0, 8(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "gsldlc1 $f4, 0x14(%[pSrc]) \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "gsldrc1 $f4, 0xd(%[pSrc]) \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "gsldlc1 $f8, 0x10(%[pSrc]) \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "gsldrc1 $f8, 0x9(%[pSrc]) \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "gsldlc1 $f12, 0x13(%[pSrc]) \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "gsldrc1 $f12, 0xc(%[pSrc]) \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "gsldlc1 $f16, 0x11(%[pSrc]) \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "gsldrc1 $f16, 0xa(%[pSrc]) \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "gsldlc1 $f20, 0x12(%[pSrc]) \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "gsldrc1 $f20, 0xb(%[pSrc]) \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + + "mov.d $f28, $f8 \n\t" + "mov.d $f30, $f10 \n\t" + "paddh $f28, $f28, $f12 \n\t" + "paddh $f30, $f30, $f14 \n\t" + "mov.d $f24, $f16 \n\t" + "mov.d $f26, $f18 \n\t" + "dli $8, 0x2 \n\t" + "paddh $f24, $f24, $f20 \n\t" + "paddh $f26, $f26, $f22 \n\t" + "dmfc1 $9, $f12 \n\t" + "dmtc1 $8, $f12 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "psubh $f24, $f24, $f28 \n\t" + "psubh $f26, $f26, $f30 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + "psllh $f24, $f24, $f12 \n\t" + "psllh $f26, $f26, $f12 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f26 \n\t" + "gsswlc1 $f0, 0x13(%[pTap]) \n\t" + "gsswrc1 $f0, 0x10(%[pTap]) \n\t" + + "gsldlc1 $f0, 0x15(%[pSrc]) \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldrc1 $f0, 0xE(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "dli $8, 0x2 \n\t" + "dmtc1 $9, $f12 \n\t" + "dmtc1 $8, $f24 \n\t" + + "paddh $f16, $f16, $f4 \n\t" + "paddh $f18, $f18, $f6 \n\t" + "paddh $f20, $f20, $f12 \n\t" + "paddh $f22, $f22, $f14 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "psubh $f20, $f20, $f16 \n\t" + "psubh $f22, $f22, $f18 \n\t" + "paddh $f8, $f8, $f0 \n\t" + "paddh $f10, $f10, $f2 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + "psllh $f20, $f20, $f24 \n\t" + "psllh $f22, $f22, $f24 \n\t" + "paddh $f8, $f8, $f20 \n\t" + "paddh $f10, $f10, $f22 \n\t" + "gssdlc1 $f8, 0x19(%[pTap]) \n\t" + "gssdlc1 $f10, 0x21(%[pTap]) \n\t" + "gssdrc1 $f8, 0x12(%[pTap]) \n\t" + "gssdrc1 $f10, 0x1a(%[pTap]) \n\t" + + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pTap], %[pTap], %[iTapStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 2b \n\t" + "3: \n\t" + : [pSrc]"+&r"(pSrc), [pTap]"+&r"(pTap), [iWidth]"+&r"(iWidth), + [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iTapStride]"r"(iTapStride) + : "memory", "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +static inline void McHorVer22Width8VerLastAlign_mmi(const uint8_t *pTap, + int32_t iTapStride, uint8_t * pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "move $10, %[pTap] \n\t" + "move $11, %[pDst] \n\t" + "move $12, %[iHeight] \n\t" + "dsrl %[iWidth], 0x3 \n\t" + PTR_ADDU "$13, %[iTapStride], %[iTapStride] \n\t" + PTR_ADDU "$14, %[iDstStride], %[iDstStride] \n\t" + "dli $15, 0x0020002000200020 \n\t" + + "4: \n\t" + "gslqc1 $f2, $f0, 0x0(%[pTap]) \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gslqc1 $f6, $f4, 0x0($8) \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gslqc1 $f10, $f8, 0x0(%[pTap]) \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gslqc1 $f14, $f12, 0x0($8) \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gslqc1 $f18, $f16, 0x0(%[pTap]) \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gslqc1 $f22, $f20, 0x0($8) \n\t" + + FILTER_VER_ALIGN($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, + $f22, $f24, $f26, $f28, $f30, %[pDst], $0, $8, $9, $15) + + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gslqc1 $f26, $f24, 0x0(%[pTap]) \n\t" + "mov.d $f0, $f4 \n\t" + "mov.d $f2, $f6 \n\t" + "mov.d $f4, $f8 \n\t" + "mov.d $f6, $f10 \n\t" + "mov.d $f8, $f12 \n\t" + "mov.d $f10, $f14 \n\t" + "mov.d $f12, $f16 \n\t" + "mov.d $f14, $f18 \n\t" + "mov.d $f16, $f20 \n\t" + "mov.d $f18, $f22 \n\t" + "mov.d $f20, $f24 \n\t" + "mov.d $f22, $f26 \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_SUBU "%[pTap], %[pTap], %[iTapStride] \n\t" + + "5: \n\t" + FILTER_VER_ALIGN($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, + $f22, $f24, $f26, $f28, $f30, %[pDst], $0, $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gslqc1 $f26, $f24, 0x0(%[pTap]) \n\t" + + FILTER_VER_ALIGN($f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, + $f26, $f28, $f30, $f0, $f2, %[pDst], %[iDstStride], $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], $14 \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gslqc1 $f30, $f28, 0x0($8) \n\t" + + FILTER_VER_ALIGN($f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, + $f30, $f0, $f2, $f4, $f6, %[pDst], $0, $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gslqc1 $f2, $f0, 0x0(%[pTap]) \n\t" + + FILTER_VER_ALIGN($f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, + $f2, $f4, $f6, $f8, $f10, %[pDst], %[iDstStride], $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], $14 \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gslqc1 $f6, $f4, 0x0($8) \n\t" + + FILTER_VER_ALIGN($f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, + $f6, $f8, $f10, $f12, $f14, %[pDst], $0, $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gslqc1 $f10, $f8, 0x0(%[pTap]) \n\t" + + FILTER_VER_ALIGN($f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, + $f10, $f12, $f14, $f16, $f18, %[pDst], %[iDstStride], $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], $14 \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gslqc1 $f14, $f12, 0x0($8) \n\t" + + FILTER_VER_ALIGN($f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, + $f14, $f16, $f18, $f20, $f22, %[pDst], $0, $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gslqc1 $f18, $f16, 0x0(%[pTap]) \n\t" + + FILTER_VER_ALIGN($f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, + $f18, $f20, $f22, $f24, $f26, %[pDst], %[iDstStride], $8, $9, $15) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], $14 \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gslqc1 $f22, $f20, 0x0($8) \n\t" + "j 5b \n\t" + + "6: \n\t" + PTR_ADDIU "%[iWidth], %[iWidth], -0x1 \n\t" + "beqz %[iWidth], 7f \n\t" + "move %[pTap], $10 \n\t" + "move %[pDst], $11 \n\t" + "move %[iHeight], $12 \n\t" + PTR_ADDIU "%[pTap], %[pTap], 0x10 \n\t" + PTR_ADDIU "%[pDst], %[pDst], 0x8 \n\t" + "j 4b \n\t" + "7: \n\t" + : [pTap]"+&r"((unsigned char *)pTap), [pDst]"+&r"((unsigned char *)pDst), + [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight) + : [iTapStride]"r"((int)iTapStride), [iDstStride]"r"((int)iDstStride) + : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$f0", + "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", + "$f20", "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +static inline void McHorVer22Width8VerLastUnAlign_mmi(const uint8_t *pTap, + int32_t iTapStride, uint8_t * pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "move $10, %[pTap] \n\t" + "move $11, %[pDst] \n\t" + "move $12, %[iHeight] \n\t" + "dsrl %[iWidth], 0x3 \n\t" + PTR_ADDU "$13, %[iTapStride], %[iTapStride] \n\t" + "dli $14, 0x0020002000200020 \n\t" + + "4: \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gsldlc1 $f0, 0x7(%[pTap]) \n\t" + "gsldlc1 $f2, 0xF(%[pTap]) \n\t" + "gsldlc1 $f4, 0x7($8) \n\t" + "gsldlc1 $f6, 0xF($8) \n\t" + "gsldrc1 $f0, 0x0(%[pTap]) \n\t" + "gsldrc1 $f2, 0x8(%[pTap]) \n\t" + "gsldrc1 $f4, 0x0($8) \n\t" + "gsldrc1 $f6, 0x8($8) \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gsldlc1 $f8, 0x7(%[pTap]) \n\t" + "gsldlc1 $f10, 0xF(%[pTap]) \n\t" + "gsldlc1 $f12, 0x7($8) \n\t" + "gsldlc1 $f14, 0xF($8) \n\t" + "gsldrc1 $f8, 0x0(%[pTap]) \n\t" + "gsldrc1 $f10, 0x8(%[pTap]) \n\t" + "gsldrc1 $f12, 0x0($8) \n\t" + "gsldrc1 $f14, 0x8($8) \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gsldlc1 $f16, 0x7(%[pTap]) \n\t" + "gsldlc1 $f18, 0xF(%[pTap]) \n\t" + "gsldlc1 $f20, 0x7($8) \n\t" + "gsldlc1 $f22, 0xF($8) \n\t" + "gsldrc1 $f16, 0x0(%[pTap]) \n\t" + "gsldrc1 $f18, 0x8(%[pTap]) \n\t" + "gsldrc1 $f20, 0x0($8) \n\t" + "gsldrc1 $f22, 0x8($8) \n\t" + + FILTER_VER_UNALIGN($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, + $f20, $f22, $f24, $f26, $f28, $f30, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gsldlc1 $f24, 0x7(%[pTap]) \n\t" + "gsldlc1 $f26, 0xF(%[pTap]) \n\t" + "gsldrc1 $f24, 0x0(%[pTap]) \n\t" + "gsldrc1 $f26, 0x8(%[pTap]) \n\t" + "mov.d $f0, $f4 \n\t" + "mov.d $f2, $f6 \n\t" + "mov.d $f4, $f8 \n\t" + "mov.d $f6, $f10 \n\t" + "mov.d $f8, $f12 \n\t" + "mov.d $f10, $f14 \n\t" + "mov.d $f12, $f16 \n\t" + "mov.d $f14, $f18 \n\t" + "mov.d $f16, $f20 \n\t" + "mov.d $f18, $f22 \n\t" + "mov.d $f20, $f24 \n\t" + "mov.d $f22, $f26 \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_SUBU "%[pTap], %[pTap], %[iTapStride] \n\t" + + "5: \n\t" + FILTER_VER_UNALIGN($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, + $f20, $f22, $f24, $f26, $f28, $f30, %[pDst], $8, $9, $14) + + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gsldlc1 $f24, 0x7(%[pTap]) \n\t" + "gsldlc1 $f26, 0xF(%[pTap]) \n\t" + "gsldrc1 $f24, 0x0(%[pTap]) \n\t" + "gsldrc1 $f26, 0x8(%[pTap]) \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + + FILTER_VER_UNALIGN($f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, + $f24, $f26, $f28, $f30, $f0, $f2, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gsldlc1 $f28, 0x7($8) \n\t" + "gsldlc1 $f30, 0xF($8) \n\t" + "gsldrc1 $f28, 0x0($8) \n\t" + "gsldrc1 $f30, 0x8($8) \n\t" + + FILTER_VER_UNALIGN($f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, + $f28, $f30, $f0, $f2, $f4, $f6, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gsldlc1 $f0, 0x7(%[pTap]) \n\t" + "gsldlc1 $f2, 0xF(%[pTap]) \n\t" + "gsldrc1 $f0, 0x0(%[pTap]) \n\t" + "gsldrc1 $f2, 0x8(%[pTap]) \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + + FILTER_VER_UNALIGN($f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, + $f30, $f0, $f2, $f4, $f6, $f8, $f10, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gsldlc1 $f4, 0x7($8) \n\t" + "gsldlc1 $f6, 0xF($8) \n\t" + "gsldrc1 $f4, 0x0($8) \n\t" + "gsldrc1 $f6, 0x8($8) \n\t" + + FILTER_VER_UNALIGN($f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, + $f4, $f6, $f8, $f10, $f12, $f14, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gsldlc1 $f8, 0x7(%[pTap]) \n\t" + "gsldlc1 $f10, 0xF(%[pTap]) \n\t" + "gsldrc1 $f8, 0x0(%[pTap]) \n\t" + "gsldrc1 $f10, 0x8(%[pTap]) \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + + FILTER_VER_UNALIGN($f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, + $f8, $f10, $f12, $f14, $f16, $f18, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gsldlc1 $f12, 0x7($8) \n\t" + "gsldlc1 $f14, 0xF($8) \n\t" + "gsldrc1 $f12, 0x0($8) \n\t" + "gsldrc1 $f14, 0x8($8) \n\t" + + FILTER_VER_UNALIGN($f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, + $f12, $f14, $f16, $f18, $f20, $f22, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pTap], %[pTap], $13 \n\t" + "gsldlc1 $f16, 0x7(%[pTap]) \n\t" + "gsldlc1 $f18, 0xF(%[pTap]) \n\t" + "gsldrc1 $f16, 0x0(%[pTap]) \n\t" + "gsldrc1 $f18, 0x8(%[pTap]) \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + + FILTER_VER_UNALIGN($f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, + $f16, $f18, $f20, $f22, $f24, $f26, %[pDst], $8, $9, $14) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 6f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pTap], %[iTapStride] \n\t" + "gsldlc1 $f20, 0x7($8) \n\t" + "gsldlc1 $f22, 0xF($8) \n\t" + "gsldrc1 $f20, 0x0($8) \n\t" + "gsldrc1 $f22, 0x8($8) \n\t" + "j 5b \n\t" + + "6: \n\t" + PTR_ADDIU "%[iWidth], %[iWidth], -0x1 \n\t" + "beqz %[iWidth], 7f \n\t" + "move %[pTap], $10 \n\t" + "move %[pDst], $11 \n\t" + "move %[iHeight], $12 \n\t" + PTR_ADDIU "%[pTap], %[pTap], 0x10 \n\t" + PTR_ADDIU "%[pDst], %[pDst], 0x8 \n\t" + "j 4b \n\t" + + "7: \n\t" + : [pTap]"+&r"((unsigned char *)pTap), [pDst]"+&r"((unsigned char *)pDst), + [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight) + : [iTapStride]"r"((int)iTapStride), [iDstStride]"r"((int)iDstStride) + : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$f0", "$f2", + "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20", + "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample +static inline void McHorVer22Width5Or9Or17Height5Or9Or17_mmi(const uint8_t* pSrc, + int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16) + + if (iWidth == 17 || iWidth == 9){ + int32_t tmp1 = 2 * (iWidth - 8); + McHorVer22HorFirst_mmi(pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5); + + McHorVer22Width8VerLastAlign_mmi((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight); + + McHorVer22Width8VerLastUnAlign_mmi((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, + iDstStride, 8, iHeight); + } else { + int16_t iTmp[17 + 5]; + int32_t i, j, k; + + for (i = 0; i < iHeight; i++) { + for (j = 0; j < iWidth + 5; j++) { + iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride); + } + for (k = 0; k < iWidth; k++) { + pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10); + } + pSrc += iSrcStride; + pDst += iDstStride; + } + } +} + +void McCopyWidthEq4_mmi(const uint8_t *pSrc, int iSrcStride, + uint8_t *pDst, int iDstStride, int iHeight) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "1: \n\t" + "lwl $8, 0x3(%[pSrc]) \n\t" + "lwr $8, 0x0(%[pSrc]) \n\t" + "swl $8, 0x3(%[pDst]) \n\t" + "swr $8, 0x0(%[pDst]) \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8" + ); +} + +void McCopyWidthEq8_mmi(const uint8_t *pSrc, int iSrcStride, + uint8_t *pDst, int iDstStride, int iHeight) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "1: \n\t" + "ldl $8, 0x7(%[pSrc]) \n\t" + "ldr $8, 0x0(%[pSrc]) \n\t" + "sdl $8, 0x7(%[pDst]) \n\t" + "sdr $8, 0x0(%[pDst]) \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8" + ); +} + +void McCopyWidthEq16_mmi(const uint8_t *pSrc, int iSrcStride, + uint8_t *pDst, int iDstStride, int iHeight) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "1: \n\t" + "ldl $8, 0x7(%[pSrc]) \n\t" + "ldl $9, 0xF(%[pSrc]) \n\t" + "ldr $8, 0x0(%[pSrc]) \n\t" + "ldr $9, 0x8(%[pSrc]) \n\t" + "sdl $8, 0x7(%[pDst]) \n\t" + "sdl $9, 0xF(%[pDst]) \n\t" + "sdr $8, 0x0(%[pDst]) \n\t" + "sdr $9, 0x8(%[pDst]) \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$9" + ); +} + +static inline void McCopy_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McCopyWidthEq16_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McCopyWidthEq8_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 4) + McCopyWidthEq4_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +void McChromaWidthEq4_mmi(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, + int32_t iDstStride, const uint8_t *pABCD, int32_t iHeight) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "gsldlc1 $f6, 0x7(%[pABCD]) \n\t" + "gsldrc1 $f6, 0x0(%[pABCD]) \n\t" + "xor $f14, $f14, $f14 \n\t" + "punpcklbh $f6, $f6, $f6 \n\t" + "mov.d $f8, $f6 \n\t" + "punpcklhw $f6, $f6, $f6 \n\t" + "punpckhhw $f8, $f8, $f8 \n\t" + "mov.d $f10, $f6 \n\t" + "punpcklbh $f6, $f6, $f14 \n\t" + "punpckhbh $f10, $f10, $f14 \n\t" + + "mov.d $f12, $f8 \n\t" + "punpcklbh $f8, $f8, $f14 \n\t" + "punpckhbh $f12, $f12, $f14 \n\t" + PTR_ADDU "%[pABCD], %[pSrc], %[iSrcStride] \n\t" + "dli $8, 0x6 \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f2, 0x8(%[pSrc]) \n\t" + "dmtc1 $8, $f16 \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f2, 0x1(%[pSrc]) \n\t" + "dli $8, 0x0020002000200020 \n\t" + "punpcklbh $f0, $f0, $f14 \n\t" + "punpcklbh $f2, $f2, $f14 \n\t" + + "dmtc1 $8, $f18 \n\t" + "1: \n\t" + "pmullh $f0, $f0, $f6 \n\t" + "pmullh $f2, $f2, $f10 \n\t" + "paddh $f0, $f0, $f2 \n\t" + + "gsldlc1 $f2, 0x7(%[pABCD]) \n\t" + "gsldrc1 $f2, 0x0(%[pABCD]) \n\t" + "punpcklbh $f2, $f2, $f14 \n\t" + "mov.d $f4, $f2 \n\t" + "pmullh $f2, $f2, $f8 \n\t" + "paddh $f0, $f0, $f2 \n\t" + "gsldlc1 $f2, 0x8(%[pABCD]) \n\t" + "gsldrc1 $f2, 0x1(%[pABCD]) \n\t" + "punpcklbh $f2, $f2, $f14 \n\t" + "mov.d $f14, $f2 \n\t" + "pmullh $f2, $f2, $f12 \n\t" + "paddh $f0, $f0, $f2 \n\t" + "mov.d $f2, $f14 \n\t" + "paddh $f0, $f0, $f18 \n\t" + "psrlh $f0, $f0, $f16 \n\t" + "xor $f14, $f14, $f14 \n\t" + "packushb $f0, $f0, $f14 \n\t" + "gsswlc1 $f0, 0x3(%[pDst]) \n\t" + "gsswrc1 $f0, 0x0(%[pDst]) \n\t" + "mov.d $f0, $f4 \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "%[pABCD], %[pABCD], %[iSrcStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"((unsigned char *)pSrc), [pDst]"+&r"((unsigned char *)pDst), + [pABCD]"+&r"((unsigned char *)pABCD), [iHeight]"+&r"((int)iHeight) + : [iSrcStride]"r"((int)iSrcStride), [iDstStride]"r"((int)iDstStride) + : "memory", "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16", "$f18" + ); +} + +void McChromaWidthEq8_mmi(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, + int32_t iDstStride, const uint8_t *pABCD, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "gsldlc1 $f12, 0x7(%[pABCD]) \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldrc1 $f12, 0x0(%[pABCD]) \n\t" + "punpcklbh $f12, $f12, $f12 \n\t" + "punpckhhw $f14, $f12, $f12 \n\t" + "punpcklhw $f12, $f12, $f12 \n\t" + + "mov.d $f16, $f14 \n\t" + "punpckhwd $f14, $f12, $f12 \n\t" + "punpcklwd $f12, $f12, $f12 \n\t" + "punpckhwd $f18, $f16, $f16 \n\t" + "punpcklwd $f16, $f16, $f16 \n\t" + "mov.d $f20, $f14 \n\t" + "mov.d $f24, $f18 \n\t" + + "punpckhbh $f14, $f12, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpckhbh $f26, $f24, $f28 \n\t" + "punpcklbh $f24, $f24, $f28 \n\t" + + PTR_ADDU "%[pABCD], %[pSrc], %[iSrcStride] \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f4, 0x8(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x1(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "1: \n\t" + "dli $8, 0x20 \n\t" + "dmtc1 $8, $f30 \n\t" + + "pmullh $f0, $f0, $f12 \n\t" + "pmullh $f2, $f2, $f14 \n\t" + "pmullh $f4, $f4, $f20 \n\t" + "pmullh $f6, $f6, $f22 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + + "gsldlc1 $f4, 0x7(%[pABCD]) \n\t" + "gsldrc1 $f4, 0x0(%[pABCD]) \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "mov.d $f8, $f4 \n\t" + "mov.d $f10, $f6 \n\t" + "pmullh $f4, $f4, $f16 \n\t" + "pmullh $f6, $f6, $f18 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + + "gsldlc1 $f4, 0x8(%[pABCD]) \n\t" + "gsldrc1 $f4, 0x1(%[pABCD]) \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "mov.d $f28, $f4 \n\t" + "mov.d $f30, $f6 \n\t" + "pmullh $f4, $f4, $f24 \n\t" + "pmullh $f6, $f6, $f26 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "mov.d $f4, $f28 \n\t" + "mov.d $f6, $f30 \n\t" + + "dli $8, 0x0020002000200020 \n\t" + "dmfc1 $9, $f20 \n\t" + "dmtc1 $8, $f20 \n\t" + "dli $8, 0x6 \n\t" + "paddh $f0, $f0, $f20 \n\t" + "paddh $f2, $f2, $f20 \n\t" + "dmtc1 $8, $f20 \n\t" + "psrlh $f0, $f0, $f20 \n\t" + "psrlh $f2, $f2, $f20 \n\t" + + "xor $f28, $f28, $f28 \n\t" + "packushb $f0, $f0, $f2 \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + + "mov.d $f0, $f8 \n\t" + "mov.d $f2, $f10 \n\t" + "dmtc1 $9, $f20 \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "%[pABCD], %[pABCD], %[iSrcStride] \n\t" + + PTR_ADDIU "%[iHeight], %[iHeight], -1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [pABCD]"+&r"(pABCD), + [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +void McChroma_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int16_t iMvX, int16_t iMvY, + int32_t iWidth, int32_t iHeight) { + static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = { + McChromaWidthEq4_mmi, + McChromaWidthEq8_mmi + }; + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; + if (kiD8x == 0 && kiD8y == 0) { + McCopy_mmi (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); + return; + } + if (iWidth != 2) { + kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride, + g_kuiABCD[kiD8y][kiD8x], iHeight); + } else + McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, + iWidth, iHeight); +} + +void McHorVer20WidthEq8_mmi(const uint8_t *pSrc, int iSrcStride, uint8_t *pDst, + int iDstStride, int iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + PTR_ADDIU "%[pSrc], %[pSrc], -0x2 \n\t" + "xor $f28, $f28, $f28 \n\t" + "dli $8, 0x0010001000100010 \n\t" + "dmtc1 $8, $f24 \n\t" + "dli $8, 0x2 \n\t" + "dmtc1 $8, $f26 \n\t" + "dli $8, 0x5 \n\t" + "dmtc1 $8, $f30 \n\t" + "1: \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + "paddh $f8, $f8, $f12 \n\t" + "paddh $f10, $f10, $f14 \n\t" + "paddh $f16, $f16, $f20 \n\t" + "paddh $f18, $f18, $f22 \n\t" + "psllh $f16, $f16, $f26 \n\t" + "psllh $f18, $f18, $f26 \n\t" + "psubh $f16, $f16, $f8 \n\t" + "psubh $f18, $f18, $f10 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "psllh $f16, $f16, $f26 \n\t" + "psllh $f18, $f18, $f26 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f24 \n\t" + "psrah $f0, $f0, $f30 \n\t" + "psrah $f2, $f2, $f30 \n\t" + "packushb $f0, $f0, $f2 \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +void McHorVer20WidthEq16_mmi(const uint8_t *pSrc, int iSrcStride, uint8_t *pDst, + int iDstStride, int iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + PTR_ADDIU "%[pSrc], %[pSrc], -0x2 \n\t" + "dli $8, 0x0010001000100010 \n\t" + "dmtc1 $8, $f24 \n\t" + "dli $8, 0x2 \n\t" + "dmtc1 $8, $f26 \n\t" + "dli $8, 0x5 \n\t" + "dmtc1 $8, $f30 \n\t" + "1: \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + "paddh $f8, $f8, $f12 \n\t" + "paddh $f10, $f10, $f14 \n\t" + "paddh $f16, $f16, $f20 \n\t" + "paddh $f18, $f18, $f22 \n\t" + "psllh $f16, $f16, $f26 \n\t" + "psllh $f18, $f18, $f26 \n\t" + "psubh $f16, $f16, $f8 \n\t" + "psubh $f18, $f18, $f10 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "psllh $f16, $f16, $f26 \n\t" + "psllh $f18, $f18, $f26 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f24 \n\t" + "psrah $f0, $f0, $f30 \n\t" + "psrah $f2, $f2, $f30 \n\t" + "packushb $f0, $f0, $f2 \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + "gsldlc1 $f0, 0xF(%[pSrc]) \n\t" + "gsldlc1 $f4, 0x14(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x10(%[pSrc]) \n\t" + "gsldlc1 $f12, 0x13(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x11(%[pSrc]) \n\t" + "gsldlc1 $f20, 0x12(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x8(%[pSrc]) \n\t" + "gsldrc1 $f4, 0xd(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x9(%[pSrc]) \n\t" + "gsldrc1 $f12, 0xc(%[pSrc]) \n\t" + "gsldrc1 $f16, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f20, 0xb(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + "paddh $f8, $f8, $f12 \n\t" + "paddh $f10, $f10, $f14 \n\t" + "paddh $f16, $f16, $f20 \n\t" + "paddh $f18, $f18, $f22 \n\t" + "psllh $f16, $f16, $f26 \n\t" + "psllh $f18, $f18, $f26 \n\t" + "psubh $f16, $f16, $f8 \n\t" + "psubh $f18, $f18, $f10 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "psllh $f16, $f16, $f26 \n\t" + "psllh $f18, $f18, $f26 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "paddh $f0, $f0, $f24 \n\t" + "paddh $f2, $f2, $f24 \n\t" + "psrah $f0, $f0, $f30 \n\t" + "psrah $f2, $f2, $f30 \n\t" + "packushb $f0, $f0, $f2 \n\t" + "gssdlc1 $f0, 0xF(%[pDst]) \n\t" + "gssdrc1 $f0, 0x8(%[pDst]) \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +void McHorVer20WidthEq4_mmi(const uint8_t *pSrc, int iSrcStride, uint8_t *pDst, + int iDstStride, int iHeight) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "1: \n\t" + PTR_ADDIU "%[pSrc], %[pSrc], -0x2 \n\t" + "xor $f14, $f14, $f14 \n\t" + "dli $8, 0x0010001000100010 \n\t" + "dmtc1 $8, $f12 \n\t" + "1: \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f2, 0xc(%[pSrc]) \n\t" + "gsldlc1 $f4, 0x8(%[pSrc]) \n\t" + "gsldlc1 $f6, 0xb(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x9(%[pSrc]) \n\t" + "gsldlc1 $f10, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f2, 0x5(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x1(%[pSrc]) \n\t" + "gsldrc1 $f6, 0x4(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x2(%[pSrc]) \n\t" + "gsldrc1 $f10, 0x3(%[pSrc]) \n\t" + "dli $8, 0x2 \n\t" + "punpcklbh $f0, $f0, $f14 \n\t" + "punpcklbh $f2, $f2, $f14 \n\t" + "punpcklbh $f4, $f4, $f14 \n\t" + "punpcklbh $f6, $f6, $f14 \n\t" + "punpcklbh $f8, $f8, $f14 \n\t" + "punpcklbh $f10, $f10, $f14 \n\t" + "dmtc1 $8, $f16 \n\t" + "paddh $f4, $f4, $f6 \n\t" + "paddh $f8, $f8, $f10 \n\t" + "psllh $f8, $f8, $f16 \n\t" + "psubh $f8, $f8, $f4 \n\t" + "paddh $f0, $f0, $f2 \n\t" + "paddh $f0, $f0, $f8 \n\t" + "dli $8, 0x5 \n\t" + "psllh $f8, $f8, $f16 \n\t" + "paddh $f0, $f0, $f8 \n\t" + "paddh $f0, $f0, $f12 \n\t" + "dmtc1 $8, $f16 \n\t" + "psrah $f0, $f0, $f16 \n\t" + "packushb $f0, $f0, $f14 \n\t" + "gsswlc1 $f0, 0x3(%[pDst]) \n\t" + "gsswrc1 $f0, 0x0(%[pDst]) \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16" + ); +} + +static inline void McHorVer20_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer20WidthEq16_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer20WidthEq8_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McHorVer20WidthEq4_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +void McHorVer02WidthEq8_mmi(const uint8_t *pSrc, int iSrcStride, uint8_t *pDst, + int iDstStride, int iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + "xor $f28, $f28, $f28 \n\t" + MMI_LOAD_8P($f0, $f2, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f4, $f6, $f28, $8) + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f8, $f10, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f12, $f14, $f28, $8) + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f16, $f18, $f28, %[pSrc]) + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f20, $f22, $f28, $8) + + "1: \n\t" + FILTER_HV_W8($f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, + $f22, $f24, $f26, $f28, $f30, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f24, $f26, $f28, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f4, $f6, $f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, + $f26, $f28, $f30, $f0, $f2, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f28, $f30, $f0, $8) + FILTER_HV_W8($f8, $f10, $f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, + $f30, $f0, $f2, $f4, $f6, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f0, $f2, $f4, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f12, $f14, $f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, + $f2, $f4, $f6, $f8, $f10, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f4, $f6, $f8, $8) + FILTER_HV_W8($f16, $f18, $f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, + $f6, $f8, $f10, $f12, $f14, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f8, $f10, $f12, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f20, $f22, $f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, + $f10, $f12, $f14, $f16, $f18, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f12, $f14, $f16, $8) + FILTER_HV_W8($f24, $f26, $f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, + $f14, $f16, $f18, $f20, $f22, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f16, $f18, $f20, %[pSrc]) + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + FILTER_HV_W8($f28, $f30, $f0, $f2, $f4, $f6, $f8, $f10, $f12, $f14, $f16, + $f18, $f20, $f22, $f24, $f26, %[pDst], $8, $9) + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "beqz %[iHeight], 2f \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "$8, %[pSrc], %[iSrcStride] \n\t" + MMI_LOAD_8P($f20, $f22, $f24, $8) + "j 1b \n\t" + "2: \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +static inline void McHorVer02WidthEq16_mmi(const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { + McHorVer02WidthEq8_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + McHorVer02WidthEq8_mmi (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight); +} + +static inline void McHorVer02_mmi(const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iWidth, + int32_t iHeight) { + if (iWidth == 16) + McHorVer02WidthEq16_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer02WidthEq8_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McHorVer02_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight); +} + +void McHorVer22Width8HorFirst_mmi(const uint8_t *pSrc, int16_t iSrcStride, + uint8_t *pDst, int32_t iDstStride, int32_t iHeight) { + BACKUP_REG; + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "xor $f28, $f28, $f28 \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_SUBU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + "dli $8, 0x2 \n\t" + "dmtc1 $8, $f30 \n\t" + "1: \n\t" + "xor $f28, $f28, $f28 \n\t" + "gsldlc1 $f0, 0x7(%[pSrc]) \n\t" + "gsldlc1 $f4, 0xc(%[pSrc]) \n\t" + "gsldlc1 $f8, 0x8(%[pSrc]) \n\t" + "gsldlc1 $f12, 0xb(%[pSrc]) \n\t" + "gsldlc1 $f16, 0x9(%[pSrc]) \n\t" + "gsldlc1 $f20, 0xa(%[pSrc]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrc]) \n\t" + "gsldrc1 $f4, 0x5(%[pSrc]) \n\t" + "gsldrc1 $f8, 0x1(%[pSrc]) \n\t" + "gsldrc1 $f12, 0x4(%[pSrc]) \n\t" + "gsldrc1 $f16, 0x2(%[pSrc]) \n\t" + "gsldrc1 $f20, 0x3(%[pSrc]) \n\t" + "punpckhbh $f2, $f0, $f28 \n\t" + "punpckhbh $f6, $f4, $f28 \n\t" + "punpckhbh $f10, $f8, $f28 \n\t" + "punpckhbh $f14, $f12, $f28 \n\t" + "punpckhbh $f18, $f16, $f28 \n\t" + "punpckhbh $f22, $f20, $f28 \n\t" + "punpcklbh $f0, $f0, $f28 \n\t" + "punpcklbh $f4, $f4, $f28 \n\t" + "punpcklbh $f8, $f8, $f28 \n\t" + "punpcklbh $f12, $f12, $f28 \n\t" + "punpcklbh $f16, $f16, $f28 \n\t" + "punpcklbh $f20, $f20, $f28 \n\t" + "paddh $f8, $f8, $f12 \n\t" + "paddh $f10, $f10, $f14 \n\t" + "paddh $f16, $f16, $f20 \n\t" + "paddh $f18, $f18, $f22 \n\t" + "psllh $f16, $f16, $f30 \n\t" + "psllh $f18, $f18, $f30 \n\t" + "psubh $f16, $f16, $f8 \n\t" + "psubh $f18, $f18, $f10 \n\t" + "paddh $f0, $f0, $f4 \n\t" + "paddh $f2, $f2, $f6 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "psllh $f16, $f16, $f30 \n\t" + "psllh $f18, $f18, $f30 \n\t" + "paddh $f0, $f0, $f16 \n\t" + "paddh $f2, $f2, $f18 \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + "gssdlc1 $f2, 0xF(%[pDst]) \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + "gssdrc1 $f2, 0x8(%[pDst]) \n\t" + PTR_ADDU "%[pSrc], %[pSrc], %[iSrcStride] \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pSrc]"+&r"(pSrc), [pDst]"+&r"(pDst), [iHeight]"+&r"(iHeight) + : [iSrcStride]"r"(iSrcStride), [iDstStride]"r"(iDstStride) + : "memory", "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", + "$f14", "$f16", "$f18", "$f20", "$f22", "$f24", "$f26", "$f28", "$f30" + ); + RECOVER_REG; +} + +static inline void McHorVer22WidthEq8_mmi(const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { + ENFORCE_STACK_ALIGN_2D (int16_t, iTap, 21, 8, 16) + McHorVer22Width8HorFirst_mmi (pSrc - 2, iSrcStride, (uint8_t*)iTap, 16, iHeight + 5); + McHorVer22Width8VerLastAlign_mmi ((uint8_t*)iTap, 16, pDst, iDstStride, 8, iHeight); +} + +static inline void McHorVer22WidthEq16_mmi(const uint8_t* pSrc, int32_t iSrcStride, + uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { + McHorVer22WidthEq8_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + McHorVer22WidthEq8_mmi (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight); +} + +static inline void McHorVer22_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer22WidthEq16_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else if (iWidth == 8) + McHorVer22WidthEq8_mmi (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else + McHorVer22_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight); +} + +void PixelAvgWidthEq4_mmi(uint8_t *pDst, int iDstStride, const uint8_t *pSrcA, + int iSrcAStride, const uint8_t *pSrcB, int iSrcBStride, int iHeight ) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "1: \n\t" + "gsldlc1 $f0, 0x7(%[pSrcB]) \n\t" + "gsldlc1 $f2, 0x7(%[pSrcA]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrcB]) \n\t" + "gsldrc1 $f2, 0x0(%[pSrcA]) \n\t" + "pavgb $f0, $f0, $f2 \n\t" + "gsswlc1 $f0, 0x3(%[pDst]) \n\t" + "gsswrc1 $f0, 0x0(%[pDst]) \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t" + PTR_ADDU "%[pDst], %[pDst], %[iDstStride] \n\t" + PTR_ADDU "%[pSrcA], %[pSrcA], %[iSrcAStride] \n\t" + PTR_ADDU "%[pSrcB], %[pSrcB], %[iSrcBStride] \n\t" + "bnez %[iHeight], 1b \n\t" + : [pDst]"+&r"((unsigned char *)pDst), [pSrcA]"+&r"((unsigned char *)pSrcA), + [pSrcB]"+&r"((unsigned char *)pSrcB), [iHeight]"+&r"((int)iHeight) + : [iDstStride]"r"((int)iDstStride), [iSrcAStride]"r"((int)iSrcAStride), + [iSrcBStride]"r"((int)iSrcBStride) + : "memory", "$8", "$9", "$10", "$f0", "$f2" + ); +} + +void PixelAvgWidthEq8_mmi(uint8_t *pDst, int iDstStride, const uint8_t *pSrcA, + int iSrcAStride, const uint8_t *pSrcB, int iSrcBStride, int iHeight ) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "1: \n\t" + "gsldlc1 $f0, 0x7(%[pSrcA]) \n\t" + "gsldlc1 $f2, 0x7(%[pSrcB]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrcA]) \n\t" + "gsldrc1 $f2, 0x0(%[pSrcB]) \n\t" + "pavgb $f0, $f0, $f2 \n\t" + PTR_ADDU "$8, %[pSrcA], %[iSrcAStride] \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + PTR_ADDU "$9, %[pSrcB], %[iSrcBStride] \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + "gsldlc1 $f0, 0x7($8) \n\t" + "gsldlc1 $f2, 0x7($9) \n\t" + "gsldrc1 $f0, 0x0($8) \n\t" + "gsldrc1 $f2, 0x0($9) \n\t" + "pavgb $f0, $f0, $f2 \n\t" + PTR_ADDU "$10, %[pDst], %[iDstStride] \n\t" + "gssdlc1 $f0, 0x7($10) \n\t" + PTR_ADDU "%[pSrcA], $8, %[iSrcAStride] \n\t" + "gssdrc1 $f0, 0x0($10) \n\t" + PTR_ADDU "%[pSrcB], $9, %[iSrcBStride] \n\t" + PTR_ADDU "%[pDst], $10, %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x2 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pDst]"+&r"((unsigned char *)pDst), [pSrcA]"+&r"((unsigned char *)pSrcA), + [pSrcB]"+&r"((unsigned char *)pSrcB), [iHeight]"+&r"((int)iHeight) + : [iDstStride]"r"((int)iDstStride), [iSrcAStride]"r"((int)iSrcAStride), + [iSrcBStride]"r"((int)iSrcBStride) + : "memory", "$8", "$9", "$10", "$f0", "$f2" + ); +} + +void PixelAvgWidthEq16_mmi(uint8_t *pDst, int iDstStride, const uint8_t *pSrcA, + int iSrcAStride, const uint8_t *pSrcB, int iSrcBStride, int iHeight ) { + __asm__ volatile ( + ".set arch=loongson3a \n\t" + "1: \n\t" + "gsldlc1 $f0, 0x7(%[pSrcA]) \n\t" + "gsldlc1 $f2, 0xF(%[pSrcA]) \n\t" + "gsldlc1 $f4, 0x7(%[pSrcB]) \n\t" + "gsldlc1 $f6, 0xF(%[pSrcB]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrcA]) \n\t" + "gsldrc1 $f2, 0x8(%[pSrcA]) \n\t" + "gsldrc1 $f4, 0x0(%[pSrcB]) \n\t" + "gsldrc1 $f6, 0x8(%[pSrcB]) \n\t" + "pavgb $f0, $f0, $f4 \n\t" + "pavgb $f2, $f2, $f6 \n\t" + PTR_ADDU "$8, %[pSrcA], %[iSrcAStride] \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + "gssdlc1 $f2, 0xF(%[pDst]) \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + "gssdrc1 $f2, 0x8(%[pDst]) \n\t" + PTR_ADDU "$9, %[pSrcB], %[iSrcBStride] \n\t" + "gsldlc1 $f0, 0x7($8) \n\t" + "gsldlc1 $f2, 0xF($8) \n\t" + "gsldrc1 $f0, 0x0($8) \n\t" + "gsldrc1 $f2, 0x8($8) \n\t" + PTR_ADDU "$10, %[pDst], %[iDstStride] \n\t" + "gsldlc1 $f4, 0x7($9) \n\t" + "gsldlc1 $f6, 0xF($9) \n\t" + "gsldrc1 $f4, 0x0($9) \n\t" + "gsldrc1 $f6, 0x8($9) \n\t" + "pavgb $f0, $f0, $f4 \n\t" + "pavgb $f2, $f2, $f6 \n\t" + "gssdlc1 $f0, 0x7($10) \n\t" + "gssdlc1 $f2, 0xF($10) \n\t" + "gssdrc1 $f0, 0x0($10) \n\t" + "gssdrc1 $f2, 0x8($10) \n\t" + + PTR_ADDU "%[pSrcA], $8, %[iSrcAStride] \n\t" + PTR_ADDU "%[pSrcB], $9, %[iSrcBStride] \n\t" + PTR_ADDU "%[pDst], $10, %[iDstStride] \n\t" + "gsldlc1 $f0, 0x7(%[pSrcA]) \n\t" + "gsldlc1 $f2, 0xF(%[pSrcA]) \n\t" + "gsldlc1 $f4, 0x7(%[pSrcB]) \n\t" + "gsldlc1 $f6, 0xF(%[pSrcB]) \n\t" + "gsldrc1 $f0, 0x0(%[pSrcA]) \n\t" + "gsldrc1 $f2, 0x8(%[pSrcA]) \n\t" + "gsldrc1 $f4, 0x0(%[pSrcB]) \n\t" + "gsldrc1 $f6, 0x8(%[pSrcB]) \n\t" + "pavgb $f0, $f0, $f4 \n\t" + "pavgb $f2, $f2, $f6 \n\t" + PTR_ADDU "$8, %[pSrcA], %[iSrcAStride] \n\t" + PTR_ADDU "$9, %[pSrcB], %[iSrcBStride] \n\t" + "gssdlc1 $f0, 0x7(%[pDst]) \n\t" + "gssdlc1 $f2, 0xF(%[pDst]) \n\t" + "gssdrc1 $f0, 0x0(%[pDst]) \n\t" + "gssdrc1 $f2, 0x8(%[pDst]) \n\t" + "gsldlc1 $f0, 0x7($8) \n\t" + "gsldlc1 $f2, 0xF($8) \n\t" + "gsldlc1 $f4, 0x7($9) \n\t" + "gsldlc1 $f6, 0xF($9) \n\t" + "gsldrc1 $f0, 0x0($8) \n\t" + "gsldrc1 $f2, 0x8($8) \n\t" + "gsldrc1 $f4, 0x0($9) \n\t" + "gsldrc1 $f6, 0x8($9) \n\t" + PTR_ADDU "$10, %[pDst], %[iDstStride] \n\t" + "pavgb $f0, $f0, $f4 \n\t" + "pavgb $f2, $f2, $f6 \n\t" + "gssdlc1 $f0, 0x7($10) \n\t" + "gssdlc1 $f2, 0xF($10) \n\t" + "gssdrc1 $f0, 0x0($10) \n\t" + "gssdrc1 $f2, 0x8($10) \n\t" + PTR_ADDU "%[pSrcA], $8, %[iSrcAStride] \n\t" + PTR_ADDU "%[pSrcB], $9, %[iSrcBStride] \n\t" + PTR_ADDU "%[pDst], $10, %[iDstStride] \n\t" + PTR_ADDIU "%[iHeight], %[iHeight], -0x4 \n\t" + "bnez %[iHeight], 1b \n\t" + : [pDst]"+&r"((unsigned char *)pDst), [pSrcA]"+&r"((unsigned char *)pSrcA), + [pSrcB]"+&r"((unsigned char *)pSrcB), [iHeight]"+&r"((int)iHeight) + : [iDstStride]"r"((int)iDstStride), [iSrcAStride]"r"((int)iSrcAStride), + [iSrcBStride]"r"((int)iSrcBStride) + : "memory", "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6" + ); +} + +static inline void McHorVer01_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_mmi (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_mmi (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } +} + +static inline void McHorVer03_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_mmi (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_mmi (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); + } +} + +static inline void McHorVer10_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc, iSrcStride, pTmp, 16, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); + } +} + +static inline void McHorVer11_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_mmi (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_mmi (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} + +static inline void McHorVer12_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_mmi (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_mmi (pSrc, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } +} +static inline void McHorVer13_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_mmi (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_mmi (pSrc, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4 , iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +static inline void McHorVer21_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } +} + +static inline void McHorVer23_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq16_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22WidthEq8_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight); + } +} +static inline void McHorVer30_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight); + } +} +static inline void McHorVer31_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_mmi (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_mmi (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} +static inline void McHorVer32_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + if (iWidth == 16) { + McHorVer02WidthEq16_mmi (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq16_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer02WidthEq8_mmi (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + McHorVer22WidthEq8_mmi (pSrc, iSrcStride, pCtrTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } else { + McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight); + McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight); + } +} +static inline void McHorVer33_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, int32_t iWidth, int32_t iHeight) { + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + if (iWidth == 16) { + McHorVer20WidthEq16_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq16_mmi (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq16_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else if (iWidth == 8) { + McHorVer20WidthEq8_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02WidthEq8_mmi (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); + PixelAvgWidthEq8_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } else { + McHorVer20WidthEq4_mmi (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); + McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight); + PixelAvgWidthEq4_mmi (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); + } +} + +void McLuma_mmi(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y] + {McCopy_mmi, McHorVer01_mmi, McHorVer02_mmi, McHorVer03_mmi}, + {McHorVer10_mmi, McHorVer11_mmi, McHorVer12_mmi, McHorVer13_mmi}, + {McHorVer20_mmi, McHorVer21_mmi, McHorVer22_mmi, McHorVer23_mmi}, + {McHorVer30_mmi, McHorVer31_mmi, McHorVer32_mmi, McHorVer33_mmi}, + }; + + pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); +} + +void PixelAvg_mmi(uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { + PixelAvgWidthEq8_mmi, + PixelAvgWidthEq16_mmi + }; + kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); +} +#endif//HAVE_MMI +} // anon ns. + +void WelsCommon::InitMcFunc (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) { + pMcFuncs->pfLumaHalfpelHor = McHorVer20_c; + pMcFuncs->pfLumaHalfpelVer = McHorVer02_c; + pMcFuncs->pfLumaHalfpelCen = McHorVer22_c; + pMcFuncs->pfSampleAveraging = PixelAvg_c; + pMcFuncs->pMcChromaFunc = McChroma_c; + pMcFuncs->pMcLumaFunc = McLuma_c; + +#if defined (X86_ASM) + if (uiCpuFlag & WELS_CPU_SSE2) { + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width5Or9Or17_sse2; + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height5Or9Or17_sse2; + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width5Or9Or17Height5Or9Or17_sse2; + pMcFuncs->pfSampleAveraging = PixelAvg_sse2; + pMcFuncs->pMcChromaFunc = McChroma_sse2; + pMcFuncs->pMcLumaFunc = McLuma_sse2; + } + + if (uiCpuFlag & WELS_CPU_SSSE3) { + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width5Or9Or17_ssse3; + pMcFuncs->pfLumaHalfpelVer = McHorVer02_ssse3; + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width5Or9Or17_ssse3; + pMcFuncs->pMcChromaFunc = McChroma_ssse3; + pMcFuncs->pMcLumaFunc = McLuma_ssse3; + } +#ifdef HAVE_AVX2 + if (uiCpuFlag & WELS_CPU_AVX2) { + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width5Or9Or17_avx2; + pMcFuncs->pfLumaHalfpelVer = McHorVer02_avx2; + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width5Or9Or17_avx2; + pMcFuncs->pMcLumaFunc = McLuma_avx2; + } +#endif +#endif //(X86_ASM) + +#if defined(HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + pMcFuncs->pMcLumaFunc = McLuma_neon; + pMcFuncs->pMcChromaFunc = McChroma_neon; + pMcFuncs->pfSampleAveraging = PixelAvg_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width5Or9Or17_neon;//iWidth+1:4/8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height5Or9Or17_neon;//heigh+1:4/8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width5Or9Or17Height5Or9Or17_neon;//iWidth+1/heigh+1 + } +#endif +#if defined(HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pMcFuncs->pMcLumaFunc = McLuma_AArch64_neon; + pMcFuncs->pMcChromaFunc = McChroma_AArch64_neon; + pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width5Or9Or17_AArch64_neon;//iWidth+1:4/8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height5Or9Or17_AArch64_neon;//heigh+1:4/8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width5Or9Or17Height5Or9Or17_AArch64_neon;//iWidth+1/heigh+1 + } +#endif + +#if defined(HAVE_MMI) + if (uiCpuFlag & WELS_CPU_MMI) { + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width5Or9Or17_mmi; + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height5Or9Or17_mmi; + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width5Or9Or17Height5Or9Or17_mmi; + pMcFuncs->pfSampleAveraging = PixelAvg_mmi; + pMcFuncs->pMcChromaFunc = McChroma_mmi; + pMcFuncs->pMcLumaFunc = McLuma_mmi; + } +#endif//HAVE_MMI +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/memory_align.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/memory_align.cpp new file mode 100644 index 000000000..90efb74ae --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/memory_align.cpp @@ -0,0 +1,174 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include "memory_align.h" +#include "macros.h" + +namespace WelsCommon { + +#ifdef MEMORY_CHECK +static FILE* fpMemChkPoint; +static uint32_t nCountRequestNum; +static int32_t g_iMemoryLength; +#endif + + +CMemoryAlign::CMemoryAlign (const uint32_t kuiCacheLineSize) +#ifdef MEMORY_MONITOR + : m_nMemoryUsageInBytes (0) +#endif//MEMORY_MONITOR +{ + if ((kuiCacheLineSize == 0) || (kuiCacheLineSize & 0x0f)) + m_nCacheLineSize = 0x10; + else + m_nCacheLineSize = kuiCacheLineSize; +} + +CMemoryAlign::~CMemoryAlign() { +#ifdef MEMORY_MONITOR + assert (m_nMemoryUsageInBytes == 0); +#endif//MEMORY_MONITOR +} + +void* WelsMalloc (const uint32_t kuiSize, const char* kpTag, const uint32_t kiAlign) { + const int32_t kiSizeOfVoidPointer = sizeof (void**); + const int32_t kiSizeOfInt = sizeof (int32_t); + const int32_t kiAlignedBytes = kiAlign - 1; + const int32_t kiTrialRequestedSize = kuiSize + kiAlignedBytes + kiSizeOfVoidPointer + kiSizeOfInt; + const int32_t kiActualRequestedSize = kiTrialRequestedSize; + const uint32_t kiPayloadSize = kuiSize; + + uint8_t* pBuf = (uint8_t*) malloc (kiActualRequestedSize); + if (NULL == pBuf) + return NULL; + +#ifdef MEMORY_CHECK + if (fpMemChkPoint == NULL) { + fpMemChkPoint = fopen ("./enc_mem_check_point.txt", "at+"); + nCountRequestNum = 0; + } + + if (fpMemChkPoint != NULL) { + if (kpTag != NULL) + fprintf (fpMemChkPoint, "WelsMalloc(), 0x%x : actual uiSize:\t%d\tbytes, input uiSize: %d bytes, %d - %s\n", + (void*)pBuf, kiActualRequestedSize, kuiSize, nCountRequestNum++, kpTag); + else + fprintf (fpMemChkPoint, "WelsMalloc(), 0x%x : actual uiSize:\t%d\tbytes, input uiSize: %d bytes, %d \n", (void*)pBuf, + kiActualRequestedSize, kuiSize, nCountRequestNum++); + fflush (fpMemChkPoint); + } +#endif + uint8_t* pAlignedBuffer; + pAlignedBuffer = pBuf + kiAlignedBytes + kiSizeOfVoidPointer + kiSizeOfInt; + pAlignedBuffer -= ((uintptr_t) pAlignedBuffer & kiAlignedBytes); + * ((void**) (pAlignedBuffer - kiSizeOfVoidPointer)) = pBuf; + * ((int32_t*) (pAlignedBuffer - (kiSizeOfVoidPointer + kiSizeOfInt))) = kiPayloadSize; + + return pAlignedBuffer; +} + +void WelsFree (void* pPointer, const char* kpTag) { + if (pPointer) { +#ifdef MEMORY_CHECK + if (fpMemChkPoint != NULL) { + if (kpTag != NULL) + fprintf (fpMemChkPoint, "WelsFree(), 0x%x - %s: \t%d\t bytes \n", (void*) (* (((void**) pPointer) - 1)), kpTag, + g_iMemoryLength); + else + fprintf (fpMemChkPoint, "WelsFree(), 0x%x \n", (void*) (* (((void**) pPointer) - 1))); + fflush (fpMemChkPoint); + } +#endif + free (* (((void**) pPointer) - 1)); + } +} + +void* CMemoryAlign::WelsMallocz (const uint32_t kuiSize, const char* kpTag) { + void* pPointer = WelsMalloc (kuiSize, kpTag); + if (NULL == pPointer) { + return NULL; + } + // zero memory + memset (pPointer, 0, kuiSize); + + return pPointer; +} + +void* CMemoryAlign::WelsMalloc (const uint32_t kuiSize, const char* kpTag) { + void* pPointer = WelsCommon::WelsMalloc (kuiSize, kpTag, m_nCacheLineSize); +#ifdef MEMORY_MONITOR + if (pPointer != NULL) { + const int32_t kiMemoryLength = * ((int32_t*) ((uint8_t*)pPointer - sizeof (void**) - sizeof ( + int32_t))) + m_nCacheLineSize - 1 + sizeof (void**) + sizeof (int32_t); + m_nMemoryUsageInBytes += kiMemoryLength; +#ifdef MEMORY_CHECK + g_iMemoryLength = kiMemoryLength; +#endif + } +#endif//MEMORY_MONITOR + return pPointer; +} + +void CMemoryAlign::WelsFree (void* pPointer, const char* kpTag) { +#ifdef MEMORY_MONITOR + if (pPointer) { + const int32_t kiMemoryLength = * ((int32_t*) ((uint8_t*)pPointer - sizeof (void**) - sizeof ( + int32_t))) + m_nCacheLineSize - 1 + sizeof (void**) + sizeof (int32_t); + m_nMemoryUsageInBytes -= kiMemoryLength; +#ifdef MEMORY_CHECK + g_iMemoryLength = kiMemoryLength; +#endif + } +#endif//MEMORY_MONITOR + WelsCommon::WelsFree (pPointer, kpTag); +} + +void* WelsMallocz (const uint32_t kuiSize, const char* kpTag) { + void* pPointer = WelsMalloc (kuiSize, kpTag, 16); + if (NULL == pPointer) { + return NULL; + } + memset (pPointer, 0, kuiSize); + return pPointer; +} + +const uint32_t CMemoryAlign::WelsGetCacheLineSize() const { + return m_nCacheLineSize; +} + +const uint32_t CMemoryAlign::WelsGetMemoryUsage() const { + return m_nMemoryUsageInBytes; +} + +} // end of namespace WelsCommon diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/sad_common.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/sad_common.cpp new file mode 100644 index 000000000..953834218 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/sad_common.cpp @@ -0,0 +1,165 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file sample.c + * + * \brief compute SAD and SATD + * + * \date 2009.06.02 Created + * + ************************************************************************************* + */ + +#include "sad_common.h" +#include "macros.h" + +int32_t WelsSampleSad4x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + int32_t i = 0; + uint8_t* pSrc1 = pSample1; + uint8_t* pSrc2 = pSample2; + for (i = 0; i < 4; i++) { + iSadSum += WELS_ABS ((pSrc1[0] - pSrc2[0])); + iSadSum += WELS_ABS ((pSrc1[1] - pSrc2[1])); + iSadSum += WELS_ABS ((pSrc1[2] - pSrc2[2])); + iSadSum += WELS_ABS ((pSrc1[3] - pSrc2[3])); + + pSrc1 += iStride1; + pSrc2 += iStride2; + } + + return iSadSum; +} + +int32_t WelsSampleSad8x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + iSadSum += WelsSampleSad4x4_c (pSample1, iStride1, pSample2, iStride2); + iSadSum += WelsSampleSad4x4_c (pSample1 + 4, iStride1, pSample2 + 4, iStride2); + return iSadSum; +} + +int32_t WelsSampleSad4x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + iSadSum += WelsSampleSad4x4_c (pSample1, iStride1, pSample2, iStride2); + iSadSum += WelsSampleSad4x4_c (pSample1 + (iStride1 << 2), iStride1, pSample2 + (iStride2 << 2), iStride2); + return iSadSum; +} + +int32_t WelsSampleSad8x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + int32_t i = 0; + uint8_t* pSrc1 = pSample1; + uint8_t* pSrc2 = pSample2; + for (i = 0; i < 8; i++) { + iSadSum += WELS_ABS ((pSrc1[0] - pSrc2[0])); + iSadSum += WELS_ABS ((pSrc1[1] - pSrc2[1])); + iSadSum += WELS_ABS ((pSrc1[2] - pSrc2[2])); + iSadSum += WELS_ABS ((pSrc1[3] - pSrc2[3])); + iSadSum += WELS_ABS ((pSrc1[4] - pSrc2[4])); + iSadSum += WELS_ABS ((pSrc1[5] - pSrc2[5])); + iSadSum += WELS_ABS ((pSrc1[6] - pSrc2[6])); + iSadSum += WELS_ABS ((pSrc1[7] - pSrc2[7])); + + pSrc1 += iStride1; + pSrc2 += iStride2; + } + + return iSadSum; +} +int32_t WelsSampleSad16x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + + iSadSum += WelsSampleSad8x8_c (pSample1, iStride1, pSample2, iStride2); + iSadSum += WelsSampleSad8x8_c (pSample1 + 8, iStride1, pSample2 + 8, iStride2); + + return iSadSum; +} +int32_t WelsSampleSad8x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + iSadSum += WelsSampleSad8x8_c (pSample1, iStride1, pSample2, iStride2); + iSadSum += WelsSampleSad8x8_c (pSample1 + (iStride1 << 3), iStride1, pSample2 + (iStride2 << 3), iStride2); + + return iSadSum; +} +int32_t WelsSampleSad16x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + iSadSum += WelsSampleSad8x8_c (pSample1, iStride1, pSample2, iStride2); + iSadSum += WelsSampleSad8x8_c (pSample1 + 8, iStride1, pSample2 + 8, iStride2); + iSadSum += WelsSampleSad8x8_c (pSample1 + (iStride1 << 3), iStride1, pSample2 + (iStride2 << 3), iStride2); + iSadSum += WelsSampleSad8x8_c (pSample1 + (iStride1 << 3) + 8, iStride1, pSample2 + (iStride2 << 3) + 8, iStride2); + + return iSadSum; +} + +void WelsSampleSadFour16x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, + int32_t* pSad) { + * (pSad) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} +void WelsSampleSadFour16x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} +void WelsSampleSadFour8x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 + 1), iStride2); + +} +void WelsSampleSadFour8x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} +void WelsSampleSadFour4x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} +void WelsSampleSadFour8x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} +void WelsSampleSadFour4x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/utils.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/utils.cpp new file mode 100644 index 000000000..fc0fbf904 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/utils.cpp @@ -0,0 +1,126 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file utils.c + * + * \brief common tool/function utilization + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ +#include "utils.h" +#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms +#include "codec_app_def.h" +float WelsCalcPsnr (const void* kpTarPic, + const int32_t kiTarStride, + const void* kpRefPic, + const int32_t kiRefStride, + const int32_t kiWidth, + const int32_t kiHeight); + + +void WelsLog (SLogContext* logCtx, int32_t iLevel, const char* kpFmt, ...) { + va_list vl; + char pTraceTag[MAX_LOG_SIZE] = {0}; + switch (iLevel) { + case WELS_LOG_ERROR: + WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Error:", logCtx->pCodecInstance); + break; + case WELS_LOG_WARNING: + WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Warning:", logCtx->pCodecInstance); + break; + case WELS_LOG_INFO: + WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Info:", logCtx->pCodecInstance); + break; + case WELS_LOG_DEBUG: + WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Debug:", logCtx->pCodecInstance); + break; + default: + WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Detail:", logCtx->pCodecInstance); + break; + } + WelsStrcat (pTraceTag, MAX_LOG_SIZE, kpFmt); + va_start (vl, kpFmt); + logCtx->pfLog (logCtx->pLogCtx, iLevel, pTraceTag, vl); + va_end (vl); +} + +#ifndef CALC_PSNR +#define CONST_FACTOR_PSNR (10.0 / log(10.0)) // for good computation +#define CALC_PSNR(w, h, s) ((float)(CONST_FACTOR_PSNR * log( 65025.0 * w * h / s ))) +#endif//CALC_PSNR + +/* + * PSNR calculation routines + */ +/*! + ************************************************************************************* + * \brief PSNR calculation utilization in Wels + * + * \param pTarPic target picture to be calculated in Picture pData format + * \param iTarStride stride of target picture pData pBuffer + * \param pRefPic base referencing picture samples + * \param iRefStride stride of reference picture pData pBuffer + * \param iWidth picture iWidth in pixel + * \param iHeight picture iHeight in pixel + * + * \return actual PSNR result; + * + * \note N/A + ************************************************************************************* + */ +float WelsCalcPsnr (const void* kpTarPic, + const int32_t kiTarStride, + const void* kpRefPic, + const int32_t kiRefStride, + const int32_t kiWidth, + const int32_t kiHeight) { + int64_t iSqe = 0; + int32_t x, y; + uint8_t* pTar = (uint8_t*)kpTarPic; + uint8_t* pRef = (uint8_t*)kpRefPic; + + if (NULL == pTar || NULL == pRef) + return (-1.0f); + + for (y = 0; y < kiHeight; ++ y) { // OPTable !! + for (x = 0; x < kiWidth; ++ x) { + const int32_t kiT = pTar[y * kiTarStride + x] - pRef[y * kiRefStride + x]; + iSqe += kiT * kiT; + } + } + if (0 == iSqe) { + return (99.99f); + } + return CALC_PSNR (kiWidth, kiHeight, iSqe); +} + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/src/welsCodecTrace.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/welsCodecTrace.cpp new file mode 100644 index 000000000..b19e0a53b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/src/welsCodecTrace.cpp @@ -0,0 +1,103 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef _WIN32 +#include +#include +#endif + +#include +#include +#include + +#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms + +#include "welsCodecTrace.h" +#include "utils.h" + + + +static void welsStderrTrace (void* ctx, int level, const char* string) { + fprintf (stderr, "%s\n", string); +} + +welsCodecTrace::welsCodecTrace() { + + m_iTraceLevel = WELS_LOG_DEFAULT; + m_fpTrace = welsStderrTrace; + m_pTraceCtx = NULL; + + m_sLogCtx.pLogCtx = this; + m_sLogCtx.pfLog = StaticCodecTrace; + m_sLogCtx.pCodecInstance = NULL; +} + +welsCodecTrace::~welsCodecTrace() { + m_fpTrace = NULL; +} + + + +void welsCodecTrace::StaticCodecTrace (void* pCtx, const int32_t iLevel, const char* Str_Format, va_list vl) { + welsCodecTrace* self = (welsCodecTrace*) pCtx; + self->CodecTrace (iLevel, Str_Format, vl); +} + +void welsCodecTrace::CodecTrace (const int32_t iLevel, const char* Str_Format, va_list vl) { + if (m_iTraceLevel < iLevel) { + return; + } + + char pBuf[MAX_LOG_SIZE] = {0}; + WelsVsnprintf (pBuf, MAX_LOG_SIZE, Str_Format, vl); // confirmed_safe_unsafe_usage + if (m_fpTrace) { + m_fpTrace (m_pTraceCtx, iLevel, pBuf); + } +} + +void welsCodecTrace::SetCodecInstance (void* pCodecInstance) { + m_sLogCtx.pCodecInstance = pCodecInstance; +} + +void welsCodecTrace::SetTraceLevel (const int32_t iLevel) { + if (iLevel >= 0) + m_iTraceLevel = iLevel; +} + +void welsCodecTrace::SetTraceCallback (WelsTraceCallback func) { + m_fpTrace = func; +} + +void welsCodecTrace::SetTraceCallbackContext (void* ctx) { + m_pTraceCtx = ctx; +} + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/asm_inc.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/asm_inc.asm new file mode 100644 index 000000000..56366fb62 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/asm_inc.asm @@ -0,0 +1,743 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* sse2inc.asm +;* +;* Abstract +;* macro and constant +;* +;* History +;* 8/5/2009 Created +;* +;* +;*************************************************************************/ +;*********************************************************************** +; Options, for DEBUG +;*********************************************************************** + +%if 1 + %define MOVDQ movdqa +%else + %define MOVDQ movdqu +%endif + +%if 1 + %define WELSEMMS emms +%else + %define WELSEMMS +%endif + + +;*********************************************************************** +; Macros +;*********************************************************************** + +%ifdef WIN64 ; Windows x64 ;************************************ + +DEFAULT REL + +BITS 64 + +%define arg1 rcx +%define arg2 rdx +%define arg3 r8 +%define arg4 r9 +%define arg5 [rsp + push_num*8 + 40] +%define arg6 [rsp + push_num*8 + 48] +%define arg7 [rsp + push_num*8 + 56] +%define arg8 [rsp + push_num*8 + 64] +%define arg9 [rsp + push_num*8 + 72] +%define arg10 [rsp + push_num*8 + 80] +%define arg11 [rsp + push_num*8 + 88] +%define arg12 [rsp + push_num*8 + 96] + +%define arg1d ecx +%define arg2d edx +%define arg3d r8d +%define arg4d r9d +%define arg5d arg5 +%define arg6d arg6 +%define arg7d arg7 +%define arg8d arg8 +%define arg9d arg9 +%define arg10d arg10 +%define arg11d arg11 +%define arg12d arg12 + +%define r0 rcx +%define r1 rdx +%define r2 r8 +%define r3 r9 +%define r4 rax +%define r5 r10 +%define r6 r11 +%define r7 rsp + +%define r0d ecx +%define r1d edx +%define r2d r8d +%define r3d r9d +%define r4d eax +%define r5d r10d +%define r6d r11d + +%define r0w cx +%define r1w dx +%define r2w r8w +%define r3w r9w +%define r4w ax +%define r6w r11w + +%define r0b cl +%define r1b dl +%define r2b r8l +%define r3b r9l + +%define PUSHRFLAGS pushfq +%define POPRFLAGS popfq +%define retrq rax +%define retrd eax + +%elifdef UNIX64 ; Unix x64 ;************************************ + +DEFAULT REL + +BITS 64 + +%ifidn __OUTPUT_FORMAT__,elf64 +SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable +%endif + +%define arg1 rdi +%define arg2 rsi +%define arg3 rdx +%define arg4 rcx +%define arg5 r8 +%define arg6 r9 +%define arg7 [rsp + push_num*8 + 8] +%define arg8 [rsp + push_num*8 + 16] +%define arg9 [rsp + push_num*8 + 24] +%define arg10 [rsp + push_num*8 + 32] +%define arg11 [rsp + push_num*8 + 40] +%define arg12 [rsp + push_num*8 + 48] + +%define arg1d edi +%define arg2d esi +%define arg3d edx +%define arg4d ecx +%define arg5d r8d +%define arg6d r9d +%define arg7d arg7 +%define arg8d arg8 +%define arg9d arg9 +%define arg10d arg10 +%define arg11d arg11 +%define arg12d arg12 + +%define r0 rdi +%define r1 rsi +%define r2 rdx +%define r3 rcx +%define r4 r8 +%define r5 r9 +%define r6 r10 +%define r7 rsp + +%define r0d edi +%define r1d esi +%define r2d edx +%define r3d ecx +%define r4d r8d +%define r5d r9d +%define r6d r10d + +%define r0w di +%define r1w si +%define r2w dx +%define r3w cx +%define r4w r8w +%define r6w r10w + +%define r0b dil +%define r1b sil +%define r2b dl +%define r3b cl + +%define PUSHRFLAGS pushfq +%define POPRFLAGS popfq +%define retrq rax +%define retrd eax + +%elifdef X86_32 ; X86_32 ;************************************ + +BITS 32 + +%ifidn __OUTPUT_FORMAT__,elf +SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable +%endif + +%define arg1 [esp + push_num*4 + 4] +%define arg2 [esp + push_num*4 + 8] +%define arg3 [esp + push_num*4 + 12] +%define arg4 [esp + push_num*4 + 16] +%define arg5 [esp + push_num*4 + 20] +%define arg6 [esp + push_num*4 + 24] +%define arg7 [esp + push_num*4 + 28] +%define arg8 [esp + push_num*4 + 32] +%define arg9 [esp + push_num*4 + 36] +%define arg10 [esp + push_num*4 + 40] +%define arg11 [esp + push_num*4 + 44] +%define arg12 [esp + push_num*4 + 48] + +%define arg1d arg1 +%define arg2d arg2 +%define arg3d arg3 +%define arg4d arg4 +%define arg5d arg5 +%define arg6d arg6 +%define arg7d arg7 +%define arg8d arg8 +%define arg9d arg9 +%define arg10d arg10 +%define arg11d arg11 +%define arg12d arg12 + +%define r0 eax +%define r1 ecx +%define r2 edx +%define r3 ebx +%define r4 esi +%define r5 edi +%define r6 ebp +%define r7 esp + +%define r0d eax +%define r1d ecx +%define r2d edx +%define r3d ebx +%define r4d esi +%define r5d edi +%define r6d ebp + +%define r0w ax +%define r1w cx +%define r2w dx +%define r3w bx +%define r4w si +%define r6w bp + +%define r0b al +%define r1b cl +%define r2b dl +%define r3b bl + +%define PUSHRFLAGS pushfd +%define POPRFLAGS popfd +%define retrq eax ; 32 bit mode do not support 64 bits regesters +%define retrd eax + +%endif + +%macro LOAD_PARA 2 + mov %1, %2 +%endmacro + +%macro LOAD_1_PARA 0 + %ifdef X86_32 + mov r0, [esp + push_num*4 + 4] + %endif +%endmacro + +%macro LOAD_2_PARA 0 + %ifdef X86_32 + mov r0, [esp + push_num*4 + 4] + mov r1, [esp + push_num*4 + 8] + %endif +%endmacro + +%macro LOAD_3_PARA 0 + %ifdef X86_32 + mov r0, [esp + push_num*4 + 4] + mov r1, [esp + push_num*4 + 8] + mov r2, [esp + push_num*4 + 12] + %endif +%endmacro + +%macro LOAD_4_PARA 0 + %ifdef X86_32 + push r3 + %assign push_num push_num+1 + mov r0, [esp + push_num*4 + 4] + mov r1, [esp + push_num*4 + 8] + mov r2, [esp + push_num*4 + 12] + mov r3, [esp + push_num*4 + 16] + %endif +%endmacro + +%macro LOAD_5_PARA 0 + %ifdef X86_32 + push r3 + push r4 + %assign push_num push_num+2 + mov r0, [esp + push_num*4 + 4] + mov r1, [esp + push_num*4 + 8] + mov r2, [esp + push_num*4 + 12] + mov r3, [esp + push_num*4 + 16] + mov r4, [esp + push_num*4 + 20] + %elifdef WIN64 + mov r4, [rsp + push_num*8 + 40] + %endif +%endmacro + +%macro LOAD_6_PARA 0 + %ifdef X86_32 + push r3 + push r4 + push r5 + %assign push_num push_num+3 + mov r0, [esp + push_num*4 + 4] + mov r1, [esp + push_num*4 + 8] + mov r2, [esp + push_num*4 + 12] + mov r3, [esp + push_num*4 + 16] + mov r4, [esp + push_num*4 + 20] + mov r5, [esp + push_num*4 + 24] + %elifdef WIN64 + mov r4, [rsp + push_num*8 + 40] + mov r5, [rsp + push_num*8 + 48] + %endif +%endmacro + +%macro LOAD_7_PARA 0 + %ifdef X86_32 + push r3 + push r4 + push r5 + push r6 + %assign push_num push_num+4 + mov r0, [esp + push_num*4 + 4] + mov r1, [esp + push_num*4 + 8] + mov r2, [esp + push_num*4 + 12] + mov r3, [esp + push_num*4 + 16] + mov r4, [esp + push_num*4 + 20] + mov r5, [esp + push_num*4 + 24] + mov r6, [esp + push_num*4 + 28] + %elifdef WIN64 + mov r4, [rsp + push_num*8 + 40] + mov r5, [rsp + push_num*8 + 48] + mov r6, [rsp + push_num*8 + 56] + %elifdef UNIX64 + mov r6, [rsp + push_num*8 + 8] + %endif +%endmacro + + + +%macro LOAD_4_PARA_POP 0 + %ifdef X86_32 + pop r3 + %endif +%endmacro + +%macro LOAD_5_PARA_POP 0 + %ifdef X86_32 + pop r4 + pop r3 + %endif +%endmacro + +%macro LOAD_6_PARA_POP 0 + %ifdef X86_32 + pop r5 + pop r4 + pop r3 + %endif +%endmacro + +%macro LOAD_7_PARA_POP 0 + %ifdef X86_32 + pop r6 + pop r5 + pop r4 + pop r3 + %endif +%endmacro + +%macro PUSH_XMM 1 + %ifdef WIN64 + %assign xmm_num_regs %1 + %if xmm_num_regs > 6 + %ifdef push_num + %assign push_num push_num+2*(%1-6) + %endif + sub rsp, 16*(%1 - 6) + movdqu [rsp], xmm6 + %endif + %if xmm_num_regs > 7 + movdqu [rsp+16], xmm7 + %endif + %if xmm_num_regs > 8 + movdqu [rsp+32], xmm8 + %endif + %if xmm_num_regs > 9 + movdqu [rsp+48], xmm9 + %endif + %if xmm_num_regs > 10 + movdqu [rsp+64], xmm10 + %endif + %if xmm_num_regs > 11 + movdqu [rsp+80], xmm11 + %endif + %if xmm_num_regs > 12 + movdqu [rsp+96], xmm12 + %endif + %if xmm_num_regs > 13 + movdqu [rsp+112], xmm13 + %endif + %if xmm_num_regs > 14 + movdqu [rsp+128], xmm14 + %endif + %if xmm_num_regs > 15 + movdqu [rsp+144], xmm15 + %endif + %endif +%endmacro + +%macro POP_XMM 0 + %ifdef WIN64 + %if xmm_num_regs > 15 + movdqu xmm15, [rsp+144] + %endif + %if xmm_num_regs > 14 + movdqu xmm14, [rsp+128] + %endif + %if xmm_num_regs > 13 + movdqu xmm13, [rsp+112] + %endif + %if xmm_num_regs > 12 + movdqu xmm12, [rsp+96] + %endif + %if xmm_num_regs > 11 + movdqu xmm11, [rsp+80] + %endif + %if xmm_num_regs > 10 + movdqu xmm10, [rsp+64] + %endif + %if xmm_num_regs > 9 + movdqu xmm9, [rsp+48] + %endif + %if xmm_num_regs > 8 + movdqu xmm8, [rsp+32] + %endif + %if xmm_num_regs > 7 + movdqu xmm7, [rsp+16] + %endif + %if xmm_num_regs > 6 + movdqu xmm6, [rsp] + add rsp, 16*(xmm_num_regs - 6) + %endif + %endif +%endmacro + +%macro SIGN_EXTENSION 2 + %ifndef X86_32 + movsxd %1, %2 + %endif +%endmacro + +%macro SIGN_EXTENSIONW 2 + %ifndef X86_32 + movsx %1, %2 + %endif +%endmacro + +%macro ZERO_EXTENSION 1 + %ifndef X86_32 + mov dword %1, %1 + %endif +%endmacro + +%macro WELS_EXTERN 1 + ALIGN 16, nop + %ifdef PREFIX + %ifdef WELS_PRIVATE_EXTERN + global _%1: WELS_PRIVATE_EXTERN + %else + global _%1 + %endif + %define %1 _%1 + %else + %ifdef WELS_PRIVATE_EXTERN + global %1: WELS_PRIVATE_EXTERN + %else + global %1 + %endif + %endif + %1: +%endmacro + +%macro WELS_AbsW 2 + pxor %2, %2 + psubw %2, %1 + pmaxsw %1, %2 +%endmacro + +%macro MMX_XSwap 4 + movq %4, %2 + punpckh%1 %4, %3 + punpckl%1 %2, %3 +%endmacro + +; pOut mm1, mm4, mm5, mm3 +%macro MMX_Trans4x4W 5 + MMX_XSwap wd, %1, %2, %5 + MMX_XSwap wd, %3, %4, %2 + MMX_XSwap dq, %1, %3, %4 + MMX_XSwap dq, %5, %2, %3 +%endmacro + +;for TRANSPOSE +%macro SSE2_XSawp 4 + movdqa %4, %2 + punpckl%1 %2, %3 + punpckh%1 %4, %3 +%endmacro + +; in: xmm1, xmm2, xmm3, xmm4 pOut: xmm1, xmm4, xmm5, mm3 +%macro SSE2_Trans4x4D 5 + SSE2_XSawp dq, %1, %2, %5 + SSE2_XSawp dq, %3, %4, %2 + SSE2_XSawp qdq, %1, %3, %4 + SSE2_XSawp qdq, %5, %2, %3 +%endmacro + +;in: xmm0, xmm1, xmm2, xmm3 pOut: xmm0, xmm1, xmm3, xmm4 +%macro SSE2_TransTwo4x4W 5 + SSE2_XSawp wd, %1, %2, %5 + SSE2_XSawp wd, %3, %4, %2 + SSE2_XSawp dq, %1, %3, %4 + SSE2_XSawp dq, %5, %2, %3 + SSE2_XSawp qdq, %1, %5, %2 + SSE2_XSawp qdq, %4, %3, %5 +%endmacro + +;in: m1, m2, m3, m4, m5, m6, m7, m8 +;pOut: m5, m3, m4, m8, m6, m2, m7, m1 +%macro SSE2_TransTwo8x8B 9 + movdqa %9, %8 + SSE2_XSawp bw, %1, %2, %8 + SSE2_XSawp bw, %3, %4, %2 + SSE2_XSawp bw, %5, %6, %4 + movdqa %6, %9 + movdqa %9, %4 + SSE2_XSawp bw, %7, %6, %4 + + SSE2_XSawp wd, %1, %3, %6 + SSE2_XSawp wd, %8, %2, %3 + SSE2_XSawp wd, %5, %7, %2 + movdqa %7, %9 + movdqa %9, %3 + SSE2_XSawp wd, %7, %4, %3 + + SSE2_XSawp dq, %1, %5, %4 + SSE2_XSawp dq, %6, %2, %5 + SSE2_XSawp dq, %8, %7, %2 + movdqa %7, %9 + movdqa %9, %5 + SSE2_XSawp dq, %7, %3, %5 + + SSE2_XSawp qdq, %1, %8, %3 + SSE2_XSawp qdq, %4, %2, %8 + SSE2_XSawp qdq, %6, %7, %2 + movdqa %7, %9 + movdqa %9, %1 + SSE2_XSawp qdq, %7, %5, %1 + movdqa %5, %9 +%endmacro + +;xmm0, xmm6, xmm7, [eax], [ecx] +;xmm7 = 0, eax = pix1, ecx = pix2, xmm0 save the result +%macro SSE2_LoadDiff8P 5 + movq %1, %4 + punpcklbw %1, %3 + movq %2, %5 + punpcklbw %2, %3 + psubw %1, %2 +%endmacro + +; m2 = m1 + m2, m1 = m1 - m2 +%macro SSE2_SumSub 3 + movdqa %3, %2 + paddw %2, %1 + psubw %1, %3 +%endmacro + + +%macro butterfly_1to16_sse 3 ; xmm? for dst, xmm? for tmp, one byte for pSrc [generic register name: a/b/c/d] + mov %3h, %3l + movd %1, e%3x ; i.e, 1% = eax (=b0) + pshuflw %2, %1, 00h ; ..., b0 b0 b0 b0 b0 b0 b0 b0 + pshufd %1, %2, 00h ; b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0 +%endmacro + +;copy a dw into a xmm for 8 times +%macro SSE2_Copy8Times 2 + movd %1, %2 + punpcklwd %1, %1 + pshufd %1, %1, 0 +%endmacro + +;copy a db into a xmm for 16 times +%macro SSE2_Copy16Times 2 + movd %1, %2 + pshuflw %1, %1, 0 + punpcklqdq %1, %1 + packuswb %1, %1 +%endmacro + + + +;*********************************************************************** +;preprocessor constants +;*********************************************************************** +;dw 32,32,32,32,32,32,32,32 for xmm +;dw 32,32,32,32 for mm +%macro WELS_DW32 1 + pcmpeqw %1,%1 + psrlw %1,15 + psllw %1,5 +%endmacro + +;dw 1, 1, 1, 1, 1, 1, 1, 1 for xmm +;dw 1, 1, 1, 1 for mm +%macro WELS_DW1 1 + pcmpeqw %1,%1 + psrlw %1,15 +%endmacro + +;all 0 for xmm and mm +%macro WELS_Zero 1 + pxor %1, %1 +%endmacro + +;dd 1, 1, 1, 1 for xmm +;dd 1, 1 for mm +%macro WELS_DD1 1 + pcmpeqw %1,%1 + psrld %1,31 +%endmacro + +;dB 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +%macro WELS_DB1 1 + pcmpeqw %1,%1 + psrlw %1,15 + packuswb %1,%1 +%endmacro + +%macro WELS_DW1_VEX 1 + vpcmpeqw %1, %1, %1 + vpsrlw %1, %1, 15 +%endmacro + +%macro WELS_DW32_VEX 1 + vpcmpeqw %1, %1, %1 + vpsrlw %1, %1, 15 + vpsllw %1, %1, 5 +%endmacro + +%macro WELS_DW32767_VEX 1 + vpcmpeqw %1, %1, %1 + vpsrlw %1, %1, 1 +%endmacro + + +;*********************************************************************** +; Utility macros for X86_32 PIC support +;*********************************************************************** + +; Used internally by other macros. +%macro INIT_X86_32_PIC_ 2 +%ifdef X86_32_PICASM + %xdefine pic_ptr %1 + %xdefine pic_ptr_preserve %2 + %if pic_ptr_preserve + %assign push_num push_num+1 + push pic_ptr + %endif + call %%get_pc +%%pic_refpoint: + jmp %%pic_init_done +%%get_pc: + mov pic_ptr, [esp] + ret +%%pic_init_done: + %define pic(data_addr) (pic_ptr+(data_addr)-%%pic_refpoint) +%else + %define pic(data_addr) (data_addr) +%endif +%endmacro + +; Get program counter and define a helper macro "pic(addr)" to convert absolute +; addresses to program counter-relative addresses if X86_32_PICASM is defined. +; Otherwise define "pic(addr)" as an identity function. +; %1=register to store PC/EIP in. +%macro INIT_X86_32_PIC 1 + INIT_X86_32_PIC_ %1, 1 +%endmacro + +; Equivalent as above, but without preserving the value of the register argument. +%macro INIT_X86_32_PIC_NOPRESERVE 1 + INIT_X86_32_PIC_ %1, 0 +%endmacro + +; Clean up after INIT_X86_32_PIC. +; Restore the register used to hold PC/EIP if applicable, and undefine defines. +%macro DEINIT_X86_32_PIC 0 +%ifdef X86_32_PICASM + %if pic_ptr_preserve + pop pic_ptr + %assign push_num push_num-1 + %endif + %undef pic_ptr + %undef pic_ptr_preserve +%endif + %undef pic +%endmacro + +; Equivalent as above, but without undefining. Useful for functions with +; multiple epilogues. +%macro DEINIT_X86_32_PIC_KEEPDEF 0 +%ifdef X86_32_PICASM + %if pic_ptr_preserve + pop pic_ptr + %assign push_num push_num-1 + %endif +%endif +%endmacro diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/cpuid.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/cpuid.asm new file mode 100644 index 000000000..5b0c77758 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/cpuid.asm @@ -0,0 +1,212 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* cpu_mmx.asm +;* +;* Abstract +;* verify cpuid feature support and cpuid detection +;* +;* History +;* 04/29/2009 Created +;* +;*************************************************************************/ + +%include "asm_inc.asm" + +;****************************************************************************************** +; Macros +;****************************************************************************************** + + +;****************************************************************************************** +; Code +;****************************************************************************************** + +SECTION .text + +; refer to "The IA-32 Intel(R) Architecture Software Developers Manual, Volume 2A A-M" +; section CPUID - CPU Identification + +;****************************************************************************************** +; int32_t WelsCPUIdVerify() +;****************************************************************************************** +WELS_EXTERN WelsCPUIdVerify + push r1 + PUSHRFLAGS + PUSHRFLAGS + + pop r1 + mov eax, r1d + xor eax, 00200000h + xor eax, r1d + POPRFLAGS + pop r1 + ret + +;**************************************************************************************************** +; void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD ) +;**************************************************************************************************** +%ifdef WIN64 + +WELS_EXTERN WelsCPUId + push rbx + push rdx + + mov eax, ecx + mov ecx, [r9] + cpuid + mov [r9], ecx + mov [r8], ebx + mov rcx, [rsp + 2*8 + 40] + mov [rcx], edx + pop rdx + mov [rdx], eax + + pop rbx + ret + +%elifdef UNIX64 +WELS_EXTERN WelsCPUId + push rbx + push rcx + push rdx + + mov eax, edi + mov ecx, [rcx] + cpuid + mov [r8], edx + pop rdx + pop r8 + mov [r8], ecx + mov [rdx], ebx + mov [rsi], eax + + pop rbx + ret + +%elifdef X86_32 + +WELS_EXTERN WelsCPUId + push ebx + push edi + + mov eax, [esp+12] ; operating index + mov edi, [esp+24] + mov ecx, [edi] + cpuid ; cpuid + + ; processing various information return + mov edi, [esp+16] + mov [edi], eax + mov edi, [esp+20] + mov [edi], ebx + mov edi, [esp+24] + mov [edi], ecx + mov edi, [esp+28] + mov [edi], edx + + pop edi + pop ebx + ret + +%endif + +; need call after cpuid=1 and eax, ecx flag got then +;**************************************************************************************************** +; int32_t WelsCPUSupportAVX( uint32_t eax, uint32_t ecx ) +;**************************************************************************************************** +WELS_EXTERN WelsCPUSupportAVX +%ifdef WIN64 + mov eax, ecx + mov ecx, edx +%elifdef UNIX64 + mov eax, edi + mov ecx, esi +%else + mov eax, [esp+4] + mov ecx, [esp+8] +%endif + + ; refer to detection of AVX addressed in INTEL AVX manual document + and ecx, 018000000H + cmp ecx, 018000000H ; check both OSXSAVE and AVX feature flags + jne avx_not_supported + ; processor supports AVX instructions and XGETBV is enabled by OS + mov ecx, 0 ; specify 0 for XFEATURE_ENABLED_MASK register + XGETBV ; result in EDX:EAX + and eax, 06H + cmp eax, 06H ; check OS has enabled both XMM and YMM state support + jne avx_not_supported + mov eax, 1 + ret +avx_not_supported: + mov eax, 0 + ret + + +; need call after cpuid=1 and eax, ecx flag got then +;**************************************************************************************************** +; int32_t WelsCPUSupportFMA( uint32_t eax, uint32_t ecx ) +;**************************************************************************************************** +WELS_EXTERN WelsCPUSupportFMA +%ifdef WIN64 + mov eax, ecx + mov ecx, edx +%elifdef UNIX64 + mov eax, edi + mov ecx, esi +%else + mov eax, [esp+4] + mov ecx, [esp+8] +%endif + ; refer to detection of FMA addressed in INTEL AVX manual document + and ecx, 018001000H + cmp ecx, 018001000H ; check OSXSAVE, AVX, FMA feature flags + jne fma_not_supported + ; processor supports AVX,FMA instructions and XGETBV is enabled by OS + mov ecx, 0 ; specify 0 for XFEATURE_ENABLED_MASK register + XGETBV ; result in EDX:EAX + and eax, 06H + cmp eax, 06H ; check OS has enabled both XMM and YMM state support + jne fma_not_supported + mov eax, 1 + ret +fma_not_supported: + mov eax, 0 + ret + +;****************************************************************************************** +; void WelsEmms() +;****************************************************************************************** +WELS_EXTERN WelsEmms + emms ; empty mmx technology states + ret + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/dct.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/dct.asm new file mode 100644 index 000000000..3b203a587 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/dct.asm @@ -0,0 +1,1036 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* ?Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* ?Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* dct.asm +;* +;* History +;* 8/4/2009 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + +%macro LOAD_3_PARA_TO_5_PARA_IDCT 0 +%ifdef X86_32 + push r3 + push r4 + %assign push_num push_num+2 + mov r0, [esp + push_num*4 + 4] + mov r1, [esp + push_num*4 + 8] + mov r4, [esp + push_num*4 + 12] +%else + mov r4, r2 +%endif + mov r2, r0 + mov r3, r1 +%endmacro + +%ifdef PREFIX + %define prefixed(a) _ %+ a +%else + %define prefixed(a) a +%endif + +%ifdef X86_32_PICASM +SECTION .text align=32 +%else +SECTION .rodata align=32 +%endif + +;*********************************************************************** +; Constant +;*********************************************************************** + +align 32 +wels_shufb0312_movzxw_128: + db 0, 80h, 3, 80h, 1, 80h, 2, 80h, 4, 80h, 7, 80h, 5, 80h, 6, 80h +wels_shufb2301_128: + db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 +wels_shufb0231_128: + db 0, 2, 3, 1, 4, 6, 7, 5, 8, 10, 11, 9, 12, 14, 15, 13 +wels_dw32_128: + times 8 dw 32 +wels_p1m1p1m1w_256: + times 8 dw 1, -1 +wels_p1p2m1m2w_256: + times 4 dw 1, 2, -1, -2 +wels_p1p1m1m1w_256: + times 4 dw 1, 1, -1, -1 +wels_8xp1w_8xm1w: + times 8 dw 1 + times 8 dw -1 +wels_4xp1w_4xm1w_256: + times 4 dw 1 + times 4 dw -1 + times 4 dw 1 + times 4 dw -1 +wels_4xp1w_4xp2w_4xm1w_4xm2w: + times 4 dw 1 + times 4 dw 2 + times 4 dw -1 + times 4 dw -2 + +align 16 +wels_p1m1p1m1w_128: + times 4 dw 1, -1 +wels_p1p2p1p2w_128: + times 4 dw 1, 2 +wels_p1m1m1p1w_128: + times 2 dw 1, -1, -1, 1 +wels_p0m8000p0m8000w_128: + times 4 dw 0, -8000h +wels_p1p1m1m1w_128: + times 2 dw 1, 1, -1, -1 +wels_4xp1w_4xp2w: + times 4 dw 1 + times 4 dw 2 +wels_4xp0w_4xm8000w: + times 4 dw 0 + times 4 dw -8000h + +SECTION .text + +;*********************************************************************** +; MMX functions +;*********************************************************************** + +%macro MMX_LoadDiff4P 5 + movd %1, [%3] + movd %2, [%4] + punpcklbw %1, %5 + punpcklbw %2, %5 + psubw %1, %2 +%endmacro + +%macro MMX_LoadDiff4x4P 10 ;d0, d1, d2, d3, pix1address, pix1stride, pix2address, pix2stride, tmp(mm), 0(mm) + MMX_LoadDiff4P %1, %9, %5, %7, %10 + MMX_LoadDiff4P %2, %9, %5+%6, %7+%8, %10 + lea %5, [%5+2*%6] + lea %7, [%7+2*%8] + MMX_LoadDiff4P %3, %9, %5, %7, %10 + MMX_LoadDiff4P %4, %9, %5+%6, %7+%8, %10 +%endmacro + +%macro MMX_SumSubMul2 3 + movq %3, %1 + psllw %1, $01 + paddw %1, %2 + psllw %2, $01 + psubw %3, %2 +%endmacro + +%macro MMX_SumSubDiv2 3 + movq %3, %2 + psraw %3, $01 + paddw %3, %1 + psraw %1, $01 + psubw %1, %2 +%endmacro + +%macro MMX_SumSub 3 + movq %3, %2 + psubw %2, %1 + paddw %1, %3 +%endmacro + +%macro MMX_DCT 6 + MMX_SumSub %4, %1, %6 + MMX_SumSub %3, %2, %6 + MMX_SumSub %3, %4, %6 + MMX_SumSubMul2 %1, %2, %5 +%endmacro + +%macro MMX_IDCT 6 + MMX_SumSub %4, %5, %6 + MMX_SumSubDiv2 %3, %2, %1 + MMX_SumSub %1, %4, %6 + MMX_SumSub %3, %5, %6 +%endmacro + +%macro MMX_StoreDiff4P 6 + movd %2, %6 + punpcklbw %2, %4 + paddw %1, %3 + psraw %1, $06 + paddsw %1, %2 + packuswb %1, %2 + movd %5, %1 +%endmacro + +;*********************************************************************** +; void WelsDctT4_mmx( int16_t *pDct[4], uint8_t *pix1, int32_t i_pix1, uint8_t *pix2, int32_t i_pix2 ) +;*********************************************************************** +WELS_EXTERN WelsDctT4_mmx + %assign push_num 0 + LOAD_5_PARA + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r4, r4d + WELS_Zero mm7 + + MMX_LoadDiff4x4P mm1, mm2, mm3, mm4, r1, r2, r3, r4, mm0, mm7 + + MMX_DCT mm1, mm2, mm3 ,mm4, mm5, mm6 + MMX_Trans4x4W mm3, mm1, mm4, mm5, mm2 + + MMX_DCT mm3, mm5, mm2 ,mm4, mm1, mm6 + MMX_Trans4x4W mm2, mm3, mm4, mm1, mm5 + + movq [r0+ 0], mm2 + movq [r0+ 8], mm1 + movq [r0+16], mm5 + movq [r0+24], mm4 + WELSEMMS + LOAD_5_PARA_POP + ret + +;*********************************************************************** +; void IdctResAddPred_mmx(uint8_t* pPred, int32_t iStride, int16_t* pDct); +;*********************************************************************** +WELS_EXTERN IdctResAddPred_mmx + %assign push_num 0 + LOAD_3_PARA_TO_5_PARA_IDCT + jmp prefixed(WelsIDctT4Rec_mmx.begin) + +;*********************************************************************** +; void WelsIDctT4Rec_mmx(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs) +;*********************************************************************** +WELS_EXTERN WelsIDctT4Rec_mmx + %assign push_num 0 + LOAD_5_PARA +.begin: + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + movq mm0, [r4+ 0] + movq mm1, [r4+ 8] + movq mm2, [r4+16] + movq mm3, [r4+24] + + MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4 + MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6 + MMX_Trans4x4W mm1, mm3, mm0, mm4, mm2 + MMX_IDCT mm3, mm0, mm4, mm2, mm1, mm6 + + WELS_Zero mm7 + WELS_DW32 mm6 + + MMX_StoreDiff4P mm3, mm0, mm6, mm7, [r0], [r2] + MMX_StoreDiff4P mm4, mm0, mm6, mm7, [r0+r1], [r2+r3] + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + MMX_StoreDiff4P mm1, mm0, mm6, mm7, [r0], [r2] + MMX_StoreDiff4P mm2, mm0, mm6, mm7, [r0+r1], [r2+r3] + + WELSEMMS + LOAD_5_PARA_POP + ret + + +;*********************************************************************** +; SSE2 functions +;*********************************************************************** + +%macro SSE2_Store4x8p 6 + movlps [%1+0x00], %2 + movhps [%1+0x20], %2 + movlps [%1+0x08], %3 + movhps [%1+0x28], %3 + movlps [%1+0x10], %4 + movhps [%1+0x30], %4 + movlps [%1+0x18], %5 + movhps [%1+0x38], %5 +%endmacro + +%macro SSE2_Load4x8p 6 + MOVDQ %2, [%1+0x00] + MOVDQ %4, [%1+0x10] + MOVDQ %6, [%1+0x20] + MOVDQ %3, [%1+0x30] + SSE2_XSawp qdq, %4, %3, %5 + SSE2_XSawp qdq, %2, %6, %3 +%endmacro + +%macro SSE2_SumSubMul2 3 + movdqa %3, %1 + psllw %1, 1 + paddw %1, %2 + psllw %2, 1 + psubw %3, %2 +%endmacro + +%macro SSE2_SumSubDiv2 4 + movdqa %4, %1 + movdqa %3, %2 + psraw %2, $01 + psraw %4, $01 + paddw %1, %2 + psubw %4, %3 +%endmacro + +%macro SSE2_StoreDiff16p 9 + paddw %1, %4 + psraw %1, $06 + movq %3, %7 + punpcklbw %3, %5 + paddsw %1, %3 + paddw %2, %4 + psraw %2, $06 + movq %3, %9 + punpcklbw %3, %5 + paddsw %2, %3 + packuswb %1, %2 + movlps %6, %1 + movhps %8, %1 +%endmacro + +%macro SSE2_StoreDiff8p 5 + movq %2, %5 + punpcklbw %2, %3 + paddsw %2, %1 + packuswb %2, %2 + movq %4, %2 +%endmacro + +%macro SSE2_Load2x4P 2 + MOVDQ %1, [%2] +%endmacro + +%macro SSE2_Store2x4P 2 + MOVDQ [%1], %2 +%endmacro + +; out=%1 pPixel1Line1=%2 pPixel1Line2=%3 pPixel2Line1=%4 pPixel2Line2=%5 zero=%6 clobber=%7,%8 +%macro SSE2_LoadDiff2x4P 8 + movd %1, [%2] + movd %7, [%3] + punpckldq %1, %7 + punpcklbw %1, %6 + movd %7, [%4] + movd %8, [%5] + punpckldq %7, %8 + punpcklbw %7, %6 + psubw %1, %7 +%endmacro + +; pRec1=%1 pRec2=%2 data=%3 pPred1=%4 pPred2=%5 dw32=%6 zero=%7 clobber=%8,%9 +%macro SSE2_StoreDiff2x4P 9 + paddw %3, %6 + psraw %3, 6 + movd %8, [%4] + movd %9, [%5] + punpckldq %8, %9 + punpcklbw %8, %7 + paddsw %3, %8 + packuswb %3, %3 + movd [%1], %3 + psrlq %3, 32 + movd [%2], %3 +%endmacro + +%macro SSE2_Load8DC 6 + movdqa %1, %6 ; %1 = dc0 dc1 + paddw %1, %5 + psraw %1, $06 ; (dc + 32) >> 6 + + movdqa %2, %1 + psrldq %2, 4 + punpcklwd %2, %2 + punpckldq %2, %2 ; %2 = dc2 dc2 dc2 dc2 dc3 dc3 dc3 dc3 + + movdqa %3, %1 + psrldq %3, 8 + punpcklwd %3, %3 + punpckldq %3, %3 ; %3 = dc4 dc4 dc4 dc4 dc5 dc5 dc5 dc5 + + movdqa %4, %1 + psrldq %4, 12 + punpcklwd %4, %4 + punpckldq %4, %4 ; %4 = dc6 dc6 dc6 dc6 dc7 dc7 dc7 dc7 + + punpcklwd %1, %1 + punpckldq %1, %1 ; %1 = dc0 dc0 dc0 dc0 dc1 dc1 dc1 dc1 +%endmacro + +%macro SSE2_DCT 6 + SSE2_SumSub %6, %3, %5 + SSE2_SumSub %1, %2, %5 + SSE2_SumSub %3, %2, %5 + SSE2_SumSubMul2 %6, %1, %4 +%endmacro + +%macro SSE2_IDCT 7 + SSE2_SumSub %7, %2, %6 + SSE2_SumSubDiv2 %1, %3, %5, %4 + SSE2_SumSub %2, %1, %5 + SSE2_SumSub %7, %4, %5 +%endmacro + +; Do 2 horizontal 4-pt DCTs in parallel packed as 8 words in an xmm register. +; out=%1 in=%1 clobber=%2 +%macro SSE2_DCT_HORIZONTAL 2 + pshuflw %2, %1, 1bh ; [x[3],x[2],x[1],x[0]] low qw + pmullw %1, [pic(wels_p1m1p1m1w_128)] ; [x[0],-x[1],x[2],-x[3], ...] + pshufhw %2, %2, 1bh ; [x[3],x[2],x[1],x[0]] high qw + paddw %1, %2 ; s = [x[0]+x[3],-x[1]+x[2],x[2]+x[1],-x[3]+x[0], ...] + pshufd %2, %1, 0b1h ; [s[2],s[3],s[0],s[1], ...] + pmullw %1, [pic(wels_p1m1m1p1w_128)] ; [s[0],-s[1],-s[2],s[3], ...] + pmullw %2, [pic(wels_p1p2p1p2w_128)] ; [s[2],2*s[3],s[0],2*s[1], ...]] + paddw %1, %2 ; y = [s[0]+s[2],-s[1]+2*s[3],-s[2]+s[0],s[3]+2*s[1], ...] +%endmacro + +; Do 2 horizontal 4-pt IDCTs in parallel packed as 8 words in an xmm register. +; +; Use a multiply by reciprocal to get -x>>1, and x+=-x>>1 to get x>>1, which +; avoids a cumbersome blend with SSE2 to get a vector with right-shifted odd +; elements. +; +; out=%1 in=%1 wels_p1m1m1p1w_128=%2 clobber=%3,%4 +%macro SSE2_IDCT_HORIZONTAL 4 + movdqa %3, [pic(wels_p0m8000p0m8000w_128)] + pmulhw %3, %1 ; x[0:7] * [0,-8000h,0,-8000h, ...] >> 16 + pshufd %4, %1, 0b1h ; [x[2],x[3],x[0],x[1], ...] + pmullw %4, %2 ; [x[2],-x[3],-x[0],x[1], ...] + paddw %1, %3 ; [x[0]+0,x[1]+(-x[1]>>1),x[2]+0,x[3]+(-x[3]>>1), ...] + paddw %1, %4 ; s = [x[0]+x[2],(x[1]>>1)-x[3],x[2]-x[0],(x[3]>>1)+x[1], ...] + pshuflw %3, %1, 1bh ; [s[3],s[2],s[1],s[0]] low qw + pmullw %1, [pic(wels_p1p1m1m1w_128)] ; [s[0],s[1],-s[2],-s[3], ...] + pshufhw %3, %3, 1bh ; [s[3],s[2],s[1],s[0]] high qw + pmullw %3, %2 ; [s[3],-s[2],-s[1],s[0], ...] + paddw %1, %3 ; y = [s[0]+s[3],s[1]-s[2],-s[2]-s[1],-s[3]+s[0], ...] +%endmacro + +; Do 4 vertical 4-pt DCTs in parallel packed as 16 words in 2 xmm registers. +; Uses scrambled input to save a negation. +; [y0,y1]=%1 [y2,y3]=%2 [x1,x0]=%1 [x2,x3]=%2 clobber=%3 +%macro SSE2_DCT_4x4P 3 + movdqa %3, %1 + psubw %1, %2 ; [x1-x2,x0-x3] + paddw %2, %3 ; [x1+x2,x0+x3] + movdqa %3, %2 + punpckhqdq %2, %1 ; s03 = [x0+x3,x0-x3] + punpcklqdq %3, %1 ; s12 = [x1+x2,x1-x2] + movdqa %1, %2 + pmullw %1, [pic(wels_4xp1w_4xp2w)] ; [s03[0],2*s03[1]] + paddw %1, %3 ; [y0,y1] = [s03[0]+s12[0],2*s03[1]+s12[1]] + pmullw %3, [pic(wels_4xp1w_4xp2w)] ; [s12[0],2*s12[1]] + psubw %2, %3 ; [y2,y3] = [s03[0]-s12[0],s03[1]-2*s12[1]] +%endmacro + +; Do 4 vertical 4-pt IDCTs in parallel packed as 16 words in 2 xmm registers. +; Output is scrambled to save a negation. +; [y1,y0]=%1 [y2,y3]=%2 [x0,x1]=%1 [x2,x3]=%2 clobber=%3,%4 +%macro SSE2_IDCT_4x4P 4 + movdqa %4, [pic(wels_4xp0w_4xm8000w)] + movdqa %3, %1 + pmulhw %3, %4 ; x[0:1] * [0,-8000h] >> 16 + pmulhw %4, %2 ; x[2:3] * [0,-8000h] >> 16 + paddw %3, %1 ; [x[0],x[1]>>1] + paddw %4, %2 ; [x[2],x[3]>>1] + psubw %3, %2 ; [x[0]-x[2],(x[1]>>1)-x[3]] + paddw %1, %4 ; [x[2]+x[0],(x[3]>>1)+x[1]] + movdqa %2, %3 + punpckhqdq %3, %1 ; s13 = [(x[1]>>1)-x[3],(x[3]>>1)+x[1]] + punpcklqdq %2, %1 ; s02 = [x[0]-x[2], x[2]+x[0]] + movdqa %1, %2 + paddw %1, %3 ; [y1,y0] = [s02[0]+s13[0],s02[1]+s13[1]] + psubw %2, %3 ; [y2,y3] = [s02[0]-s13[0],s02[1]-s13[1]] +%endmacro + +;*********************************************************************** +; void WelsDctFourT4_sse2(int16_t *pDct, uint8_t *pix1, int32_t i_pix1, uint8_t *pix2, int32_t i_pix2 ) +;*********************************************************************** +WELS_EXTERN WelsDctFourT4_sse2 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r4, r4d + pxor xmm7, xmm7 + ;Load 4x8 + SSE2_LoadDiff8P xmm0, xmm6, xmm7, [r1], [r3] + SSE2_LoadDiff8P xmm1, xmm6, xmm7, [r1+r2], [r3+r4] + lea r1, [r1 + 2 * r2] + lea r3, [r3 + 2 * r4] + SSE2_LoadDiff8P xmm2, xmm6, xmm7, [r1], [r3] + SSE2_LoadDiff8P xmm3, xmm6, xmm7, [r1+r2], [r3+r4] + + SSE2_DCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm0 + SSE2_DCT_HORIZONTAL xmm2, xmm5 + SSE2_DCT_HORIZONTAL xmm0, xmm5 + SSE2_DCT_HORIZONTAL xmm3, xmm5 + SSE2_DCT_HORIZONTAL xmm4, xmm5 + + SSE2_Store4x8p r0, xmm2, xmm0, xmm3, xmm4, xmm1 + + lea r1, [r1 + 2 * r2] + lea r3, [r3 + 2 * r4] + + ;Load 4x8 + SSE2_LoadDiff8P xmm0, xmm6, xmm7, [r1 ], [r3 ] + SSE2_LoadDiff8P xmm1, xmm6, xmm7, [r1+r2 ], [r3+r4] + lea r1, [r1 + 2 * r2] + lea r3, [r3 + 2 * r4] + SSE2_LoadDiff8P xmm2, xmm6, xmm7, [r1], [r3] + SSE2_LoadDiff8P xmm3, xmm6, xmm7, [r1+r2], [r3+r4] + + SSE2_DCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm0 + SSE2_DCT_HORIZONTAL xmm2, xmm5 + SSE2_DCT_HORIZONTAL xmm0, xmm5 + SSE2_DCT_HORIZONTAL xmm3, xmm5 + SSE2_DCT_HORIZONTAL xmm4, xmm5 + + SSE2_Store4x8p r0+64, xmm2, xmm0, xmm3, xmm4, xmm1 + + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs); +;*********************************************************************** +WELS_EXTERN WelsIDctFourT4Rec_sse2 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + ;Load 4x8 + SSE2_Load4x8p r4, xmm0, xmm1, xmm4, xmm2, xmm5 + + movdqa xmm7, [pic(wels_p1m1m1p1w_128)] + SSE2_IDCT_HORIZONTAL xmm0, xmm7, xmm5, xmm6 + SSE2_IDCT_HORIZONTAL xmm1, xmm7, xmm5, xmm6 + SSE2_IDCT_HORIZONTAL xmm4, xmm7, xmm5, xmm6 + SSE2_IDCT_HORIZONTAL xmm2, xmm7, xmm5, xmm6 + SSE2_IDCT xmm1, xmm4, xmm2, xmm3, xmm5, xmm6, xmm0 + + WELS_Zero xmm7 + WELS_DW32 xmm6 + + SSE2_StoreDiff16p xmm1, xmm3, xmm5, xmm6, xmm7, [r0], [r2], [r0 + r1], [r2 + r3] + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff16p xmm0, xmm4, xmm5, xmm6, xmm7, [r0], [r2], [r0 + r1], [r2 + r3] + + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_Load4x8p r4+64, xmm0, xmm1, xmm4, xmm2, xmm5 + + movdqa xmm7, [pic(wels_p1m1m1p1w_128)] + SSE2_IDCT_HORIZONTAL xmm0, xmm7, xmm5, xmm6 + SSE2_IDCT_HORIZONTAL xmm1, xmm7, xmm5, xmm6 + SSE2_IDCT_HORIZONTAL xmm4, xmm7, xmm5, xmm6 + SSE2_IDCT_HORIZONTAL xmm2, xmm7, xmm5, xmm6 + SSE2_IDCT xmm1, xmm4, xmm2, xmm3, xmm5, xmm6, xmm0 + + WELS_Zero xmm7 + WELS_DW32 xmm6 + + SSE2_StoreDiff16p xmm1, xmm3, xmm5, xmm6, xmm7, [r0], [r2], [r0 + r1], [r2 + r3] + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff16p xmm0, xmm4, xmm5, xmm6, xmm7, [r0], [r2], [r0 + r1], [r2 + r3] + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; void WelsDctT4_sse2(int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2) +;*********************************************************************** +WELS_EXTERN WelsDctT4_sse2 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 5 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r4, r4d + + WELS_Zero xmm2 + SSE2_LoadDiff2x4P xmm0, r1+r2, r1, r3+r4, r3, xmm2, xmm3, xmm4 + add r1, r2 + add r3, r4 + SSE2_LoadDiff2x4P xmm1, r1+r2, r1+2*r2, r3+r4, r3+2*r4, xmm2, xmm3, xmm4 + SSE2_DCT_HORIZONTAL xmm0, xmm3 + SSE2_DCT_HORIZONTAL xmm1, xmm3 + SSE2_DCT_4x4P xmm0, xmm1, xmm3 + SSE2_Store2x4P r0, xmm0 + SSE2_Store2x4P r0+16, xmm1 + + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; void IdctResAddPred_sse2(uint8_t* pPred, int32_t iStride, int16_t* pDct); +;*********************************************************************** +WELS_EXTERN IdctResAddPred_sse2 + %assign push_num 0 + LOAD_3_PARA_TO_5_PARA_IDCT + jmp prefixed(WelsIDctT4Rec_sse2.begin) + +;*********************************************************************** +; void WelsIDctT4Rec_sse2(uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct); +;*********************************************************************** +WELS_EXTERN WelsIDctT4Rec_sse2 + %assign push_num 0 + LOAD_5_PARA +.begin: + INIT_X86_32_PIC r5 + PUSH_XMM 6 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + SSE2_Load2x4P xmm0, r4 + SSE2_Load2x4P xmm1, r4+16 + movdqa xmm4, [pic(wels_p1m1m1p1w_128)] + SSE2_IDCT_HORIZONTAL xmm0, xmm4, xmm2, xmm3 + SSE2_IDCT_HORIZONTAL xmm1, xmm4, xmm2, xmm3 + SSE2_IDCT_4x4P xmm0, xmm1, xmm2, xmm3 + WELS_Zero xmm4 + WELS_DW32 xmm5 + SSE2_StoreDiff2x4P r0+r1, r0, xmm0, r2+r3, r2, xmm5, xmm4, xmm2, xmm3 + add r0, r1 + add r2, r3 + SSE2_StoreDiff2x4P r0+r1, r0+2*r1, xmm1, r2+r3, r2+2*r3, xmm5, xmm4, xmm2, xmm3 + + POP_XMM + DEINIT_X86_32_PIC + LOAD_5_PARA_POP + ret + +%macro SSE2_StoreDiff4x8p 8 + SSE2_StoreDiff8p %1, %3, %4, [%5], [%6] + SSE2_StoreDiff8p %1, %3, %4, [%5 + %7], [%6 + %8] + SSE2_StoreDiff8p %2, %3, %4, [%5 + 8], [%6 + 8] + SSE2_StoreDiff8p %2, %3, %4, [%5 + %7 + 8], [%6 + %8 + 8] +%endmacro + + ;*********************************************************************** +; void WelsIDctRecI16x16Dc_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *dct_dc) +;*********************************************************************** +WELS_EXTERN WelsIDctRecI16x16Dc_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm7, xmm7 + WELS_DW32 xmm6 + + SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [r4] + SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3 + + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3 + + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3 + + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3 + + SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [r4 + 16] + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3 + + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3 + + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3 + + lea r0, [r0 + 2 * r1] + lea r2, [r2 + 2 * r3] + SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3 + POP_XMM + LOAD_5_PARA_POP + ret + + +;*********************************************************************** +; AVX2 functions +;*********************************************************************** + +%ifdef HAVE_AVX2 +; out=%1 pPixel1=%2 iStride1=%3 pPixel2=%4 iStride2=%5 wels_shufb0312_movzxw=%6 clobber=%7,%8 +%macro AVX2_LoadDiff16P 8 + vmovq x%1, [%2 ] + vpbroadcastq y%7, [%2 + 4 * %3] + vpblendd y%1, y%1, y%7, 11110000b + vpshufb y%1, y%1, y%6 + vmovq x%7, [%4 ] + vpbroadcastq y%8, [%4 + 4 * %5] + vpblendd y%7, y%7, y%8, 11110000b + vpshufb y%7, y%7, y%6 + vpsubw y%1, y%1, y%7 +%endmacro + +; pRec=%1 iStride=%2 data=%3,%4 pPred=%5 iPredStride=%6 dw32=%7 wels_shufb0312_movzxw=%8 clobber=%9,%10 +%macro AVX2_StoreDiff32P 10 + vpaddw y%3, y%3, y%7 + vpsraw y%3, y%3, 6 + vmovq x%9, [%5 ] + vpbroadcastq y%10, [%5 + 4 * %6] + add %5, %6 + vpblendd y%9, y%9, y%10, 11110000b + vpshufb y%9, y%9, y%8 + vpaddsw y%3, y%3, y%9 + vpaddw y%4, y%4, y%7 + vpsraw y%4, y%4, 6 + vmovq x%9, [%5 ] + vpbroadcastq y%10, [%5 + 4 * %6] + vpblendd y%9, y%9, y%10, 11110000b + vpshufb y%9, y%9, y%8 + vpaddsw y%4, y%4, y%9 + vpackuswb y%3, y%3, y%4 + vbroadcasti128 y%4, [pic(wels_shufb0231_128)] + vpshufb y%3, y%3, y%4 + vextracti128 x%4, y%3, 1 + vmovlps [%1 ], x%3 + vmovlps [%1 + 4 * %2], x%4 + add %1, %2 + vmovhps [%1 ], x%3 + vmovhps [%1 + 4 * %2], x%4 +%endmacro + +; out=%1,%2,%3,%4 pDct=%5 clobber=%6 +%macro AVX2_Load4x16P 6 + vmovdqa x%2, [%5+0x00] + vinserti128 y%2, y%2, [%5+0x40], 1 + vmovdqa x%6, [%5+0x20] + vinserti128 y%6, y%6, [%5+0x60], 1 + vpunpcklqdq y%1, y%2, y%6 + vpunpckhqdq y%2, y%2, y%6 + vmovdqa x%4, [%5+0x10] + vinserti128 y%4, y%4, [%5+0x50], 1 + vmovdqa x%6, [%5+0x30] + vinserti128 y%6, y%6, [%5+0x70], 1 + vpunpcklqdq y%3, y%4, y%6 + vpunpckhqdq y%4, y%4, y%6 +%endmacro + +; pDct=%1 data=%1,%2,%3,%4 clobber=%5 +%macro AVX2_Store4x16P 6 + vpunpcklqdq y%6, y%2, y%3 + vmovdqa [%1+0x00], x%6 + vextracti128 [%1+0x40], y%6, 1 + vpunpckhqdq y%6, y%2, y%3 + vmovdqa [%1+0x20], x%6 + vextracti128 [%1+0x60], y%6, 1 + vpunpcklqdq y%6, y%4, y%5 + vmovdqa [%1+0x10], x%6 + vextracti128 [%1+0x50], y%6, 1 + vpunpckhqdq y%6, y%4, y%5 + vmovdqa [%1+0x30], x%6 + vextracti128 [%1+0x70], y%6, 1 +%endmacro + +%macro AVX2_Load4x4P 2 + vmovdqu y%1, [%2] +%endmacro + +%macro AVX2_Store4x4P 2 + vmovdqu [%1], y%2 +%endmacro + +; Load 4 lines of 4 pixels, shuffle and zero extend to 16-bit. +; out=%1 pPixel=%2 iStride=%3 [wels_shufb0312_movzxw]=%4 clobber=%5,%6 +%macro AVX2_Loadzx4x4P 6 + vmovd x%1, [%2 ] + add %2, %3 + vpbroadcastd x%5, [%2 + 2 * %3] + vpblendd x%1, x%1, x%5, 1010b + vpbroadcastd y%5, [%2 ] + vpbroadcastd y%6, [%2 + %3] + vpblendd y%5, y%5, y%6, 10101010b + vpblendd y%1, y%1, y%5, 11110000b + vpshufb y%1, y%1, %4 +%endmacro + +; out=%1 pPixel1=%2 iStride1=%3 pPixel2=%4 iStride2=%5 wels_shufb0312_movzxw=%6 clobber=%7,%8,%9 +%macro AVX2_LoadDiff4x4P 9 + AVX2_Loadzx4x4P %1, %2, %3, y%6, %7, %8 + AVX2_Loadzx4x4P %7, %4, %5, y%6, %8, %9 + vpsubw y%1, y%1, y%7 +%endmacro + +; pRec=%1 iStride=%2 data=%3 pPred=%4 iPredStride=%5 dw32=%6 wels_shufb0312_movzxw=%7 clobber=%8,%9,%10 +%macro AVX2_StoreDiff4x4P 10 + vpaddw y%3, y%3, y%6 + vpsraw y%3, y%3, 6 + AVX2_Loadzx4x4P %8, %4, %5, y%7, %9, %10 + vpaddsw y%3, y%3, y%8 + vpackuswb y%3, y%3, y%3 + vbroadcasti128 y%8, [pic(wels_shufb0231_128)] + vpshufb y%3, y%3, y%8 + vextracti128 x%8, y%3, 1 + vmovd [%1 ], x%3 + add %1, %2 + vmovd [%1 ], x%8 + vpsrlq x%8, x%8, 32 + vmovd [%1 + %2], x%8 + vpsrlq x%3, x%3, 32 + vmovd [%1 + 2 * %2], x%3 +%endmacro + +; 4-pt DCT +; out=%1,%2,%3,%4 in=%1,%2,%3,%4 clobber=%5 +%macro AVX2_DCT 5 + vpsubw %5, %1, %4 ; s3 = x0 - x3 + vpaddw %1, %1, %4 ; s0 = x0 + x3 + vpsubw %4, %2, %3 ; s2 = x1 - x2 + vpaddw %2, %2, %3 ; s1 = x1 + x2 + vpsubw %3, %1, %2 ; y2 = s0 - s1 + vpaddw %1, %1, %2 ; y0 = s0 + s1 + vpsllw %2, %5, 1 + vpaddw %2, %2, %4 ; y1 = 2 * s3 + s2 + vpsllw %4, %4, 1 + vpsubw %4, %5, %4 ; y3 = s3 - 2 * s2 +%endmacro + +; 4-pt IDCT +; out=%1,%2,%3,%4 in=%1,%2,%3,%4 clobber=%5 +%macro AVX2_IDCT 5 + vpsraw %5, %2, 1 + vpsubw %5, %5, %4 ; t3 = (x1 >> 1) - x3 + vpsraw %4, %4, 1 + vpaddw %4, %2, %4 ; t2 = x1 + (x3 >> 1) + vpaddw %2, %1, %3 ; t0 = x0 + x2 + vpsubw %3, %1, %3 ; t1 = x0 - x2 + vpaddw %1, %2, %4 ; y0 = t0 + t2 + vpsubw %4, %2, %4 ; y3 = t0 - t2 + vpaddw %2, %3, %5 ; y1 = t1 + t3 + vpsubw %3, %3, %5 ; y2 = t1 - t3 +%endmacro + +; Do 4 horizontal 4-pt DCTs in parallel packed as 16 words in a ymm register. +; Uses scrambled input to save a negation. +; [y0,y1,y2,y3]=%1 [x0,x3,x1,x2]=%1 wels_shufb2301=%2 clobber=%3 +%macro AVX2_DCT_HORIZONTAL 3 + vpsignw %3, %1, [pic(wels_p1m1p1m1w_256)] ; [x0,-x3,x1,-x2] + vpshufb %1, %1, %2 ; [x3,x0,x2,x1] + vpaddw %1, %1, %3 ; s = [x0+x3,-x3+x0,x1+x2,-x2+x1] + vpmullw %3, %1, [pic(wels_p1p2m1m2w_256)] ; [s[0],2*s[1],-s[2],-2*s[3], ...] + vpshufd %1, %1, 0b1h ; [s[2],s[3],s[0],s[1], ...] + vpaddw %1, %1, %3 ; [y0,y1,y2,y3] = [s[0]+s[2],2*s[1]+s[3],-s[2]+s[0],-2*s[3]+s[1], ...] +%endmacro + +; Do 4 horizontal 4-pt IDCTs in parallel packed as 16 words in a ymm register. +; Output is scrambled to save a negation. +; [y0,y3,y1,y2]=%1 [x0,x1,x2,x3]=%1 wels_shufb2301=%2 clobber=%3 +%macro AVX2_IDCT_HORIZONTAL 3 + vpsraw %3, %1, 1 ; [x0>>1,x1>>1,x2>>1,x3>>1] + vpblendw %3, %1, %3, 10101010b ; [x0,x1>>1,x2,x3>>1] + vpsignw %1, %1, [pic(wels_p1p1m1m1w_256)] ; [x0,x1,-x2,-x3] + vpshufd %3, %3, 0b1h ; [x2,x3>>1,x0,x1>>1] + vpaddw %1, %3, %1 ; s = [x2+x0,(x3>>1)+x1,x0-x2,(x1>>1)-x3] + vpshufb %3, %1, %2 ; [s[1],s[0],s[3],s[2], ...] + vpsignw %1, %1, [pic(wels_p1m1p1m1w_256)] ; [s[0],-s[1],s[2],-s[3], ...] + vpaddw %1, %1, %3 ; [y0,y3,y1,y2] = [s[0]+s[1],-s[1]+s[0],s[2]+s[3],-s[3]+s[2], ...] +%endmacro + +; Do 4 vertical 4-pt DCTs in parallel packed as 16 words in a ymm register. +; Uses scrambled input to save a negation. +; [y0,y1,y2,y3]=%1 [x0,x3,x1,x2]=%1 clobber=%2 +%macro AVX2_DCT_4x4P 2 + vpsignw %2, %1, [pic(wels_4xp1w_4xm1w_256)] ; [x0,-x3,x1,-x2] + vpshufd %1, %1, 4eh ; [x3,x0,x2,x1] + vpaddw %1, %1, %2 ; s = [x0+x3,-x3+x0,x1+x2,-x2+x1] + vpmullw %2, %1, [pic(wels_4xp1w_4xp2w_4xm1w_4xm2w)] ; [s[0],2*s[1],-s[2],-2*s[3]] + vpermq %1, %1, 4eh ; [s[2],s[3],s[0],s[1]] + vpaddw %1, %1, %2 ; [y0,y1,y2,y3] = [s[0]+s[2],2*s[1]+s[3],-s[2]+s[0],-2*s[3]+s[1]] +%endmacro + +; Do 4 vertical 4-pt IDCTs in parallel packed as 16 words in a ymm register. +; Output is scrambled to save a negation. +; [y0,y3,y1,y2]=%1 [x0,x1,x2,x3]=%1 clobber=%2 +%macro AVX2_IDCT_4x4P 2 + vpsraw %2, %1, 1 ; [x0>>1,x1>>1,x2>>1,x3>>1] + vpblendw %2, %1, %2, 11110000b ; [x0,x1>>1,x2,x3>>1] + vpsignw %1, %1, [pic(wels_8xp1w_8xm1w)] ; [x0,x1,-x2,-x3] + vpermq %2, %2, 4eh ; [x2,x3>>1,x0,x1>>1] + vpaddw %1, %2, %1 ; s = [x2+x0,(x3>>1)+x1,x0-x2,(x1>>1)-x3] + vpshufd %2, %1, 4eh ; [s[1],s[0],s[3],s[2]] + vpmullw %1, %1, [pic(wels_4xp1w_4xm1w_256)] ; [s[0],-s[1],s[2],-s[3], ...] + vpaddw %1, %1, %2 ; [y0,y3,y1,y2] = [s[0]+s[1],-s[1]+s[0],s[2]+s[3],-s[3]+s[2]] +%endmacro + +;*********************************************************************** +; void WelsDctFourT4_avx2(int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2) +;*********************************************************************** +WELS_EXTERN WelsDctFourT4_avx2 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 7 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r4, r4d + + vbroadcasti128 ymm6, [pic(wels_shufb0312_movzxw_128)] + + ;Load 4x16 + AVX2_LoadDiff16P mm0, r1, r2, r3, r4, mm6, mm4, mm5 + add r1, r2 + add r3, r4 + AVX2_LoadDiff16P mm1, r1, r2, r3, r4, mm6, mm4, mm5 + add r1, r2 + add r3, r4 + AVX2_LoadDiff16P mm2, r1, r2, r3, r4, mm6, mm4, mm5 + add r1, r2 + add r3, r4 + AVX2_LoadDiff16P mm3, r1, r2, r3, r4, mm6, mm4, mm5 + + AVX2_DCT ymm0, ymm1, ymm2, ymm3, ymm5 + vbroadcasti128 ymm6, [pic(wels_shufb2301_128)] + AVX2_DCT_HORIZONTAL ymm0, ymm6, ymm5 + AVX2_DCT_HORIZONTAL ymm1, ymm6, ymm5 + AVX2_DCT_HORIZONTAL ymm2, ymm6, ymm5 + AVX2_DCT_HORIZONTAL ymm3, ymm6, ymm5 + + AVX2_Store4x16P r0, mm0, mm1, mm2, mm3, mm5 + vzeroupper + + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; void IdctFourResAddPred_avx2(uint8_t* pPred, int32_t iStride, const int16_t* pDct, const int8_t* pNzc); +;*********************************************************************** +WELS_EXTERN IdctFourResAddPred_avx2 + %assign push_num 0 + LOAD_3_PARA_TO_5_PARA_IDCT + jmp prefixed(WelsIDctFourT4Rec_avx2.begin) + +;*********************************************************************** +; void WelsIDctFourT4Rec_avx2(uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct); +;*********************************************************************** +WELS_EXTERN WelsIDctFourT4Rec_avx2 + %assign push_num 0 + LOAD_5_PARA +.begin: + INIT_X86_32_PIC r5 + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + AVX2_Load4x16P mm0, mm1, mm2, mm3, r4, mm5 + vbroadcasti128 ymm6, [pic(wels_shufb2301_128)] + AVX2_IDCT_HORIZONTAL ymm0, ymm6, ymm5 + AVX2_IDCT_HORIZONTAL ymm1, ymm6, ymm5 + AVX2_IDCT_HORIZONTAL ymm2, ymm6, ymm5 + AVX2_IDCT_HORIZONTAL ymm3, ymm6, ymm5 + AVX2_IDCT ymm0, ymm1, ymm2, ymm3, ymm5 + + vbroadcasti128 ymm6, [pic(wels_shufb0312_movzxw_128)] + vbroadcasti128 ymm7, [pic(wels_dw32_128)] + AVX2_StoreDiff32P r0, r1, mm0, mm1, r2, r3, mm7, mm6, mm5, mm4 + add r2, r3 + add r0, r1 + AVX2_StoreDiff32P r0, r1, mm2, mm3, r2, r3, mm7, mm6, mm5, mm4 + vzeroupper + + POP_XMM + DEINIT_X86_32_PIC + LOAD_5_PARA_POP + ret + +;*********************************************************************** +; void WelsDctT4_avx2(int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2) +;*********************************************************************** +WELS_EXTERN WelsDctT4_avx2 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 5 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r4, r4d + + vbroadcasti128 ymm1, [pic(wels_shufb0312_movzxw_128)] + AVX2_LoadDiff4x4P mm0, r1, r2, r3, r4, mm1, mm2, mm3, mm4 + AVX2_DCT_4x4P ymm0, ymm2 + vbroadcasti128 ymm1, [pic(wels_shufb2301_128)] + AVX2_DCT_HORIZONTAL ymm0, ymm1, ymm2 + AVX2_Store4x4P r0, mm0 + vzeroupper + + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; void IdctResAddPred_avx2(uint8_t* pPred, int32_t iStride, int16_t* pDct); +;*********************************************************************** +WELS_EXTERN IdctResAddPred_avx2 + %assign push_num 0 + LOAD_3_PARA_TO_5_PARA_IDCT + jmp prefixed(WelsIDctT4Rec_avx2.begin) + +;*********************************************************************** +; void WelsIDctT4Rec_avx2(uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct); +;*********************************************************************** +WELS_EXTERN WelsIDctT4Rec_avx2 + %assign push_num 0 + LOAD_5_PARA +.begin: + INIT_X86_32_PIC r5 + PUSH_XMM 6 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + AVX2_Load4x4P mm0, r4 + vbroadcasti128 ymm4, [pic(wels_shufb2301_128)] + AVX2_IDCT_HORIZONTAL ymm0, ymm4, ymm1 + AVX2_IDCT_4x4P ymm0, ymm1 + vbroadcasti128 ymm4, [pic(wels_shufb0312_movzxw_128)] + vbroadcasti128 ymm5, [pic(wels_dw32_128)] + AVX2_StoreDiff4x4P r0, r1, mm0, r2, r3, mm5, mm4, mm1, mm2, mm3 + vzeroupper + + POP_XMM + DEINIT_X86_32_PIC + LOAD_5_PARA_POP + ret +%endif + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/deblock.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/deblock.asm new file mode 100644 index 000000000..513b37eb1 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/deblock.asm @@ -0,0 +1,848 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* deblock.asm +;* +;* Abstract +;* edge loop +;* +;* History +;* 08/07/2009 Created +;* +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;******************************************************************************* +; Macros and other preprocessor constants +;******************************************************************************* + +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +ALIGN 16 +FOUR_16B_SSE2: dw 4, 4, 4, 4, 4, 4, 4, 4 + +ALIGN 16 +WELS_DB1_16: + times 16 db 1 +WELS_DB127_16: + times 16 db 127 +WELS_DB96_16: + times 16 db 96 +WELS_SHUFB0000111122223333: + times 4 db 0 + times 4 db 1 + times 4 db 2 + times 4 db 3 + + +SECTION .text + +; Unsigned byte absolute difference. +; a=%1 b=%2 clobber=%3 +; Subtract once in each direction with saturation and return the maximum. +%macro SSE2_AbsDiffUB 3 + movdqa %3, %2 + psubusb %3, %1 + psubusb %1, %2 + por %1, %3 +%endmacro + +; Unsigned byte compare less than. +; lhs=%1 rhs^0x7f=%2 0x7f=%3 +; No unsigned byte lt/gt compare instruction available; xor by 0x7f and use a +; signed compare. Some other options do exist. This one allows modifying the lhs +; without mov and uses a bitwise op which can be executed on most ports on +; common architectures. +%macro SSE2_CmpltUB 3 + pxor %1, %3 + pcmpgtb %1, %2 +%endmacro + +; Unsigned byte compare greater than or equal. +%macro SSE2_CmpgeUB 2 + pminub %1, %2 + pcmpeqb %1, %2 +%endmacro + +; Clip unsigned bytes to ref +/- diff. +; data=%1 ref=%2 maxdiff_from_ref=%3 clobber=%4 +%macro SSE2_ClipUB 4 + movdqa %4, %2 + psubusb %4, %3 + paddusb %3, %2 + pmaxub %1, %4 + pminub %1, %3 +%endmacro + +; (a + b + 1 - c) >> 1 +; a=%1 b=%2 c=%3 [out:a^b&c]=%4 +%macro SSE2_AvgbFloor1 4 + movdqa %4, %1 + pxor %4, %2 + pavgb %1, %2 + pand %4, %3 + psubb %1, %4 +%endmacro + +; (a + b + carry) >> 1 +; a=%1 b=%2 carry-1=%3 +%macro SSE2_AvgbFloor2 3 + pxor %1, %3 + pxor %2, %3 + pavgb %1, %2 + pxor %1, %3 +%endmacro + +; a = (a & m) | (b & ~m) +; a=%1 b=%2 m=%3 +%macro SSE2_Blend 3 + pand %1, %3 + pandn %3, %2 + por %1, %3 +%endmacro + +; Compute +; p0 = clip(p0 + clip((q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1, -iTc, iTc), 0, 255) +; q0 = clip(q0 - clip((q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1, -iTc, iTc), 0, 255) +; 16-wide parallel in packed byte representation in xmm registers. +; +; p1=%1 p0=%2 q0=%3 q1=%4 iTc=%5 FFh=%6 xmmclobber=%7,%8 +%macro SSE2_DeblockP0Q0_Lt4 8 + ; (q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1 clipped to [-96, 159] and biased to [0, 255]. + ; A limited range is sufficient because the value is clipped to [-iTc, iTc] later. + ; Bias so that unsigned saturation can be used. + ; Get ((p1 - q1) >> 2) + 192 via a pxor and two pavgbs. + ; q0 - p0 is split into a non-negative and non-positive part. The latter is + ; subtracted from the biased value. + movdqa %7, %2 + psubusb %7, %3 ; clip(p0 - q0, 0, 255) + ; ((p1 - q1) >> 2) + 0xc0 + pxor %4, %6 ; q1 ^ 0xff aka -q1 - 1 & 0xff + pavgb %1, %4 ; (((p1 - q1 + 0x100) >> 1) + pavgb %1, %6 ; + 0x100) >> 1 + psubusb %1, %7 ; -= clip(p0 - q0, 0, 255) saturate. + movdqa %8, %3 + psubusb %8, %2 ; (clip(q0 - p0, 0, 255) + pavgb %8, %1 ; + clip(((p1 - q1 + 0x300) >> 2) - clip(p0 - q0, 0, 255), 0, 255) + 1) >> 1 + + ; Unbias and split into a non-negative and a non-positive part. + ; Clip each part to iTc via minub. + ; Add/subtract each part to/from p0/q0 and clip. + movdqa %6, [pic(WELS_DB96_16)] + psubusb %6, %8 + psubusb %8, [pic(WELS_DB96_16)] + pminub %6, %5 + pminub %8, %5 + psubusb %2, %6 + paddusb %2, %8 ; p0 + paddusb %3, %6 + psubusb %3, %8 ; q0 +%endmacro + + +;******************************************************************************* +; void DeblockLumaLt4V_ssse3(uint8_t * pPix, int32_t iStride, int32_t iAlpha, +; int32_t iBeta, int8_t * pTC) +;******************************************************************************* + +WELS_EXTERN DeblockLumaLt4V_ssse3 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + movd xmm1, arg3d + movd xmm2, arg4d + pxor xmm3, xmm3 + pxor xmm1, [pic(WELS_DB127_16)] + pxor xmm2, [pic(WELS_DB127_16)] + pshufb xmm1, xmm3 ; iAlpha ^ 0x7f + pshufb xmm2, xmm3 ; iBeta ^ 0x7f + mov r2, r1 ; iStride + neg r1 ; -iStride + lea r3, [r0 + r1] ; pPix - iStride + + ; Compute masks to enable/disable deblocking. + MOVDQ xmm6, [r3 + 0 * r1] ; p0 + MOVDQ xmm7, [r3 + 1 * r1] ; p1 + MOVDQ xmm0, [r0 + 0 * r2] ; q0 + movdqa xmm4, xmm6 + SSE2_AbsDiffUB xmm6, xmm0, xmm3 ; |p0 - q0| + SSE2_CmpltUB xmm6, xmm1, [pic(WELS_DB127_16)] ; bDeltaP0Q0 = |p0 - q0| < iAlpha + MOVDQ xmm1, [r0 + 1 * r2] ; q1 + SSE2_AbsDiffUB xmm7, xmm4, xmm3 ; |p1 - p0| + SSE2_AbsDiffUB xmm0, xmm1, xmm3 ; |q1 - q0| + pmaxub xmm7, xmm0 ; max(|p1 - p0|, |q1 - q0|) + SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP1P0 & bDeltaQ1Q0 = max(|p1 - p0|, |q1 - q0|) < iBeta + pand xmm6, xmm7 ; bDeltaP0Q0P1P0Q1Q0 = bDeltaP0Q0 & bDeltaP1P0 & bDeltaQ1Q0 + MOVDQ xmm7, [r3 + 2 * r1] ; p2 + movdqa xmm0, xmm7 + SSE2_AbsDiffUB xmm7, xmm4, xmm3 ; |p2 - p0| + SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP2P0 = |p2 - p0| < iBeta + MOVDQ xmm5, [r0 + 2 * r2] ; q2 + MOVDQ xmm3, [r0 + 0 * r2] ; q0 + movdqa xmm1, xmm5 + SSE2_AbsDiffUB xmm5, xmm3, xmm4 ; |q2 - q0| + SSE2_CmpltUB xmm5, xmm2, [pic(WELS_DB127_16)] ; bDeltaQ2Q0 = |q2 - q0| < iBeta + + pavgb xmm3, [r3 + 0 * r1] + pcmpeqw xmm2, xmm2 ; FFh + pxor xmm3, xmm2 + ; (p2 + ((p0 + q0 + 1) >> 1)) >> 1 + pxor xmm0, xmm2 + pavgb xmm0, xmm3 + pxor xmm0, xmm2 + ; (q2 + ((p0 + q0 + 1) >> 1)) >> 1 + pxor xmm1, xmm2 + pavgb xmm1, xmm3 + pxor xmm1, xmm2 + + movd xmm3, [r4] + pshufb xmm3, [pic(WELS_SHUFB0000111122223333)] ; iTc + movdqa xmm4, xmm3 ; iTc0 = iTc + pcmpgtb xmm3, xmm2 ; iTc > -1 ? 0xff : 0x00 + pand xmm6, xmm3 ; bDeltaP0Q0P1P0Q1Q0 &= iTc > -1 + movdqa xmm3, xmm4 + psubb xmm3, xmm7 ; iTc -= bDeltaP2P0 ? -1 : 0 + psubb xmm3, xmm5 ; iTc -= bDeltaQ2Q0 ? -1 : 0 + pand xmm3, xmm6 ; iTc &= bDeltaP0Q0P1P0Q1Q0 ? 0xff : 0 + pand xmm7, xmm6 ; bDeltaP2P0 &= bDeltaP0Q0P1P0Q1Q0 + pand xmm5, xmm6 ; bDeltaQ2Q0 &= bDeltaP0Q0P1P0Q1Q0 + pand xmm7, xmm4 ; iTc0 & (bDeltaP2P0 ? 0xff : 0) + pand xmm5, xmm4 ; iTc0 & (bDeltaQ2Q0 ? 0xff : 0) + + MOVDQ xmm4, [r3 + 1 * r1] + SSE2_ClipUB xmm0, xmm4, xmm7, xmm6 ; clip p1. + MOVDQ xmm6, [r0 + 1 * r2] + MOVDQ [r3 + 1 * r1], xmm0 ; store p1. + SSE2_ClipUB xmm1, xmm6, xmm5, xmm7 ; clip q1. + MOVDQ [r0 + 1 * r2], xmm1 ; store q1. + + MOVDQ xmm1, [r3 + 0 * r1] ; p0 + MOVDQ xmm0, [r0 + 0 * r2] ; q0 + SSE2_DeblockP0Q0_Lt4 xmm4, xmm1, xmm0, xmm6, xmm3, xmm2, xmm5, xmm7 + MOVDQ [r3 + 0 * r1], xmm1 ; store p0. + MOVDQ [r0 + 0 * r2], xmm0 ; store q0. + + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret + + +; Deblock 3x16 luma pixels for the eq4 case. +; +; Compose 8-bit averages from pavgbs. Ie. (p1 + p0 + p2 + q0 + 2) >> 2 can be +; written as (((p1 + p0) >> 1) + ((p2 + q0 + (p1 ^ p0 & 1)) >> 1) + 1) >> 1, +; which maps to 3 pavgbs. +; +; pPix=%1 iStride=%2 [in:q0,out:p0]=%3 [in:q1,out:p1]=%4 bDeltaP0Q0P1P0Q1Q0=%5 bDeltaP2P0=%6 clobber=%7,%8,%9,%10 preserve_p0p1=%11 db1=%12 +%macro SSE2_DeblockLumaEq4_3x16P 12 + movdqa %7, %3 + movdqa %8, %6 + MOVDQ %10, [%1 + 1 * %2] ; p1 + SSE2_Blend %7, %10, %8 ; t0 = bDeltaP2P0 ? q0 : p1 + movdqa %8, %6 + MOVDQ %9, [%1 + 2 * %2] ; p2 + SSE2_Blend %9, %4, %8 ; t1 = bDeltaP2P0 ? p2 : q1 + SSE2_AvgbFloor1 %4, %9, %12, %8 ; t1 = (t1 + q1) >> 1 + SSE2_AvgbFloor1 %10, [%1], %12, %8 ; (p0 + p1) >> 1, p0 ^ p1 + pxor %8, %12 + SSE2_AvgbFloor1 %7, %4, %8, %9 ; (t0 + t1 + (p0 ^ p1 & 1)) >> 1 + MOVDQ %9, [%1 + 2 * %2] ; p2 + SSE2_AvgbFloor1 %3, %9, %8, %4 ; (p2 + q0 + (p0 ^ p1 & 1)) >> 1 + pavgb %7, %10 ; p0' = (p0 + p1 + t0 + t1 + 2) >> 2 + movdqa %8, %10 + pxor %8, %3 ; (p0 + p1) >> 1 ^ (p2 + q0 + (p0 ^ p1 & 1)) >> 1 + pand %8, %12 ; & 1 + pavgb %10, %3 ; p1' = (p0 + p1 + p2 + q0 + 2) >> 2 + pand %6, %5 ; bDeltaP2P0 &= bDeltaP0Q0P1P0Q1Q0 +%if %11 + MOVDQ %3, [%1 + 0 * %2] ; p0 + movdqa %4, %5 + SSE2_Blend %7, %3, %4 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0 +%else + SSE2_Blend %7, [%1 + 0 * %2], %5 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0 +%endif + MOVDQ [%1 + 0 * %2], %7 ; store p0 + add %1, %2 + movdqa %7, %10 + psubb %10, %8 ; (p0 + p1 + p2 + q0) >> 2 + psubb %8, %12 + MOVDQ %4, [%1 + (3 - 1) * %2] ; p3 + SSE2_AvgbFloor2 %4, %9, %8 ; (p2 + p3 + ((p0 + p1) >> 1 ^ (p2 + q0 + (p0 ^ p1 & 1)) >> 1 & 1)) >> 1 + pavgb %10, %4 ; p2' = (((p0 + p1 + p2 + q0) >> 1) + p2 + p3 + 2) >> 2 + movdqa %8, %6 + SSE2_Blend %10, [%1 + (2 - 1) * %2], %8 ; p2out = bDeltaP2P0 ? p2' : p2 + MOVDQ [%1 + (2 - 1) * %2], %10 ; store p2 +%if %11 + MOVDQ %4, [%1 + (1 - 1) * %2] ; p1 + SSE2_Blend %7, %4, %6 ; p1out = bDeltaP2P0 ? p1' : p1 +%else + SSE2_Blend %7, [%1 + (1 - 1) * %2], %6 ; p1out = bDeltaP2P0 ? p1' : p1 +%endif + MOVDQ [%1 + (1 - 1) * %2], %7 ; store p1 +%endmacro + + +;******************************************************************************* +; void DeblockLumaEq4V_ssse3(uint8_t * pPix, int32_t iStride, int32_t iAlpha, +; int32_t iBeta) +;******************************************************************************* + +WELS_EXTERN DeblockLumaEq4V_ssse3 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 10 + SIGN_EXTENSION r1, r1d + movd xmm1, arg3d + movd xmm2, arg4d + shr r2, 2 + add r2, 1 + movd xmm3, r2d + pxor xmm4, xmm4 + pxor xmm1, [pic(WELS_DB127_16)] + pxor xmm2, [pic(WELS_DB127_16)] + pshufb xmm1, xmm4 ; iAlpha ^ 0x7f + pshufb xmm2, xmm4 ; iBeta ^ 0x7f + pshufb xmm3, xmm4 ; (iAlpha >> 2) + 1 + mov r2, r1 ; iStride + neg r1 ; -iStride + lea r3, [r0 + r1] ; pPix - iStride + + ; Compute masks to enable/disable filtering. + MOVDQ xmm7, [r3 + 1 * r1] ; p1 + MOVDQ xmm6, [r3 + 0 * r1] ; p0 + MOVDQ xmm0, [r0 + 0 * r2] ; q0 + movdqa xmm4, xmm6 + SSE2_AbsDiffUB xmm6, xmm0, xmm5 ; |p0 - q0| + SSE2_CmpgeUB xmm3, xmm6 ; |p0 - q0| < (iAlpha >> 2) + 2 + SSE2_CmpltUB xmm6, xmm1, [pic(WELS_DB127_16)] ; bDeltaP0Q0 = |p0 - q0| < iAlpha + MOVDQ xmm1, [r0 + 1 * r2] ; q1 + SSE2_AbsDiffUB xmm7, xmm4, xmm5 ; |p1 - p0| + SSE2_AbsDiffUB xmm0, xmm1, xmm5 ; |q1 - q0| + pmaxub xmm7, xmm0 ; max(|p1 - p0|, |q1 - q0|) + SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP1P0 & bDeltaQ1Q0 = max(|p1 - p0|, |q1 - q0|) < iBeta + pand xmm6, xmm7 ; & bDeltaP0Q0 + + MOVDQ xmm7, [r3 + 2 * r1] ; p2 + SSE2_AbsDiffUB xmm7, xmm4, xmm5 ; |p2 - p0| + SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP2P0 = |p2 - p0| < iBeta + pand xmm7, xmm3 ; &= |p0 - q0| < (iAlpha >> 2) + 2 + + MOVDQ xmm0, [r0 + 0 * r2] ; q0 + MOVDQ xmm5, [r0 + 2 * r2] ; q2 + SSE2_AbsDiffUB xmm5, xmm0, xmm4 ; |q2 - q0| + SSE2_CmpltUB xmm5, xmm2, [pic(WELS_DB127_16)] ; bDeltaQ2Q0 = |q2 - q0| < iBeta + pand xmm5, xmm3 ; &= |p0 - q0| < (iAlpha >> 2) + 2 + +%ifdef X86_32 + ; Push xmm5 to free up one register. Align stack so as to ensure that failed + ; store forwarding penalty cannot occur (up to ~50 cycles for 128-bit on IVB). + mov r2, esp + sub esp, 16 + and esp, -16 + movdqa [esp], xmm5 + SSE2_DeblockLumaEq4_3x16P r3, r1, xmm0, xmm1, xmm6, xmm7, xmm2, xmm3, xmm5, xmm4, 1, [pic(WELS_DB1_16)] + movdqa xmm5, [esp] + mov esp, r2 + neg r1 + SSE2_DeblockLumaEq4_3x16P r0, r1, xmm0, xmm1, xmm6, xmm5, xmm2, xmm3, xmm7, xmm4, 0, [pic(WELS_DB1_16)] +%else + movdqa xmm9, [WELS_DB1_16] + SSE2_DeblockLumaEq4_3x16P r3, r1, xmm0, xmm1, xmm6, xmm7, xmm2, xmm3, xmm8, xmm4, 1, xmm9 + SSE2_DeblockLumaEq4_3x16P r0, r2, xmm0, xmm1, xmm6, xmm5, xmm2, xmm3, xmm7, xmm4, 0, xmm9 +%endif + + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret + + +; [out:p1,p0,q0,q1]=%1,%2,%3,%4 pPixCb=%5 pPixCr=%6 iStride=%7 3*iStride-1=%8 xmmclobber=%9,%10,%11 +%macro SSE2_LoadCbCr_4x16H 11 + movd %1, [%5 + 0 * %7 - 2] ; [p1,p0,q0,q1] cb line 0 + movd %2, [%5 + 2 * %7 - 2] ; [p1,p0,q0,q1] cb line 2 + punpcklbw %1, %2 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 0,2 + movd %2, [%5 + 4 * %7 - 2] ; [p1,p0,q0,q1] cb line 4 + movd %9, [%5 + 2 * %8] ; [p1,p0,q0,q1] cb line 6 + punpcklbw %2, %9 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 4,6 + punpcklwd %1, %2 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cb line 0,2,4,6 + movd %2, [%6 + 0 * %7 - 2] ; [p1,p0,q0,q1] cr line 0 + movd %9, [%6 + 2 * %7 - 2] ; [p1,p0,q0,q1] cr line 2 + punpcklbw %2, %9 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 0,2 + movd %9, [%6 + 4 * %7 - 2] ; [p1,p0,q0,q1] cr line 4 + movd %10, [%6 + 2 * %8] ; [p1,p0,q0,q1] cr line 6 + punpcklbw %9, %10 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 4,6 + punpcklwd %2, %9 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cr line 0,2,4,6 + add %5, %7 ; pPixCb += iStride + add %6, %7 ; pPixCr += iStride + movd %9, [%5 + 0 * %7 - 2] ; [p1,p0,q0,q1] cb line 1 + movd %10, [%5 + 2 * %7 - 2] ; [p1,p0,q0,q1] cb line 3 + punpcklbw %9, %10 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 1,3 + movd %10, [%5 + 4 * %7 - 2] ; [p1,p0,q0,q1] cb line 5 + movd %3, [%5 + 2 * %8] ; [p1,p0,q0,q1] cb line 7 + punpcklbw %10, %3 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 5,7 + punpcklwd %9, %10 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cb line 1,3,5,7 + movd %10, [%6 + 0 * %7 - 2] ; [p1,p0,q0,q1] cr line 1 + movd %3, [%6 + 2 * %7 - 2] ; [p1,p0,q0,q1] cr line 3 + punpcklbw %10, %3 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 1,3 + movd %3, [%6 + 4 * %7 - 2] ; [p1,p0,q0,q1] cr line 5 + movd %4, [%6 + 2 * %8] ; [p1,p0,q0,q1] cr line 7 + punpcklbw %3, %4 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 5,7 + punpcklwd %10, %3 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cr line 1,3,5,7 + movdqa %3, %1 + punpckldq %1, %2 ; [p1,p1,p1,p1,p1,p1,p1,p1,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line 0,2,4,6 + punpckhdq %3, %2 ; [q0,q0,q0,q0,q0,q0,q0,q0,q1,q1,q1,q1,q1,q1,q1,q1] cb/cr line 0,2,4,6 + movdqa %11, %9 + punpckldq %9, %10 ; [p1,p1,p1,p1,p1,p1,p1,p1,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line 1,3,5,7 + punpckhdq %11, %10 ; [q0,q0,q0,q0,q0,q0,q0,q0,q1,q1,q1,q1,q1,q1,q1,q1] cb/cr line 1,3,5,7 + movdqa %2, %1 + punpcklqdq %1, %9 ; [p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1] cb/cr line 0,2,4,6,1,3,5,7 + punpckhqdq %2, %9 ; [p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line 0,2,4,6,1,3,5,7 + movdqa %4, %3 + punpcklqdq %3, %11 ; [q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0] cb/cr line 0,2,4,6,1,3,5,7 + punpckhqdq %4, %11 ; [q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1] cb/cr line 0,2,4,6,1,3,5,7 +%endmacro + +; pPixCb+iStride=%1 pPixCr+iStride=%2 iStride=%3 3*iStride-1=%4 p0=%5 q0=%6 rclobber=%7 dwclobber={%8,%9} xmmclobber=%10 +%macro SSE2_StoreCbCr_4x16H 10 + movdqa %10, %5 + punpcklbw %10, %6 ; [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 0,2,4,6 + punpckhbw %5, %6 ; [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 1,3,5,7 + mov %7, r7 ; preserve stack pointer + and r7, -16 ; align stack pointer + sub r7, 32 ; allocate stack space + movdqa [r7 ], %10 ; store [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 0,2,4,6 on the stack + movdqa [r7 + 16], %5 ; store [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 1,3,5,7 on the stack + mov %8, [r7 + 16] ; [p0,q0,p0,q0] cb line 1,3 + mov [%1 + 0 * %3 - 1], %9 ; store [p0,q0] cb line 1 + shr %8, 16 ; [p0,q0] cb line 3 + mov [%1 + 2 * %3 - 1], %9 ; store [p0,q0] cb line 3 + mov %8, [r7 + 20] ; [p0,q0,p0,q0] cb line 5,7 + mov [%1 + 4 * %3 - 1], %9 ; store [p0,q0] cb line 5 + shr %8, 16 ; [p0,q0] cb line 7 + mov [%1 + 2 * %4 + 1], %9 ; store [p0,q0] cb line 7 + mov %8, [r7 + 24] ; [p0,q0,p0,q0] cr line 1,3 + mov [%2 + 0 * %3 - 1], %9 ; store [p0,q0] cr line 1 + shr %8, 16 ; [p0,q0] cr line 3 + mov [%2 + 2 * %3 - 1], %9 ; store [p0,q0] cr line 3 + mov %8, [r7 + 28] ; [p0,q0,p0,q0] cr line 5,7 + mov [%2 + 4 * %3 - 1], %9 ; store [p0,q0] cr line 5 + shr %8, 16 ; [p0,q0] cr line 7 + mov [%2 + 2 * %4 + 1], %9 ; store [p0,q0] cr line 7 + sub %1, %3 ; pPixCb -= iStride + sub %2, %3 ; pPixCr -= iStride + mov %8, [r7 ] ; [p0,q0,p0,q0] cb line 0,2 + mov [%1 + 0 * %3 - 1], %9 ; store [p0,q0] cb line 0 + shr %8, 16 ; [p0,q0] cb line 2 + mov [%1 + 2 * %3 - 1], %9 ; store [p0,q0] cb line 2 + mov %8, [r7 + 4] ; [p0,q0,p0,q0] cb line 4,6 + mov [%1 + 4 * %3 - 1], %9 ; store [p0,q0] cb line 4 + shr %8, 16 ; [p0,q0] cb line 6 + mov [%1 + 2 * %4 + 1], %9 ; store [p0,q0] cb line 6 + mov %8, [r7 + 8] ; [p0,q0,p0,q0] cr line 0,2 + mov [%2 + 0 * %3 - 1], %9 ; store [p0,q0] cr line 0 + shr %8, 16 ; [p0,q0] cr line 2 + mov [%2 + 2 * %3 - 1], %9 ; store [p0,q0] cr line 2 + mov %8, [r7 + 12] ; [p0,q0,p0,q0] cr line 4,6 + mov [%2 + 4 * %3 - 1], %9 ; store [p0,q0] cr line 4 + shr %8, 16 ; [p0,q0] cr line 6 + mov [%2 + 2 * %4 + 1], %9 ; store [p0,q0] cr line 6 + mov r7, %7 ; restore stack pointer +%endmacro + +; p1=%1 p0=%2 q0=%3 q1=%4 iAlpha=%5 iBeta=%6 pTC=%7 xmmclobber=%8,%9,%10 interleaveTC=%11 +%macro SSSE3_DeblockChromaLt4 11 + movdqa %8, %3 + SSE2_AbsDiffUB %8, %2, %9 ; |p0 - q0| + SSE2_CmpgeUB %8, %5 ; !bDeltaP0Q0 = |p0 - q0| >= iAlpha + movdqa %9, %4 + SSE2_AbsDiffUB %9, %3, %5 ; |q1 - q0| + movdqa %10, %1 + SSE2_AbsDiffUB %10, %2, %5 ; |p1 - p0| + pmaxub %9, %10 ; max(|q1 - q0|, |p1 - p0|) + pxor %10, %10 + movd %5, %6 + pshufb %5, %10 ; iBeta + SSE2_CmpgeUB %9, %5 ; !bDeltaQ1Q0 | !bDeltaP1P0 = max(|q1 - q0|, |p1 - p0|) >= iBeta + por %8, %9 ; | !bDeltaP0Q0 + movd %5, [%7] +%if %11 + punpckldq %5, %5 + punpcklbw %5, %5 ; iTc +%else + pshufd %5, %5, 0 ; iTc +%endif + pcmpeqw %10, %10 ; FFh + movdqa %9, %5 + pcmpgtb %9, %10 ; iTc > -1 ? FFh : 00h + pandn %8, %5 ; iTc & bDeltaP0Q0 & bDeltaP1P0 & bDeltaQ1Q0 + pand %8, %9 ; &= (iTc > -1 ? FFh : 00h) + SSE2_DeblockP0Q0_Lt4 %1, %2, %3, %4, %8, %10, %5, %9 +%endmacro + +; p1=%1 p0=%2 q0=%3 q1=%4 iAlpha=%5 iBeta=%6 xmmclobber=%7,%8,%9 +%macro SSSE3_DeblockChromaEq4 9 + movdqa %7, %3 + SSE2_AbsDiffUB %7, %2, %8 ; |p0 - q0| + SSE2_CmpgeUB %7, %5 ; !bDeltaP0Q0 = |p0 - q0| >= iAlpha + movdqa %8, %4 + SSE2_AbsDiffUB %8, %3, %5 ; |q1 - q0| + movdqa %9, %1 + SSE2_AbsDiffUB %9, %2, %5 ; |p1 - p0| + pmaxub %8, %9 ; max(|q1 - q0|, |p1 - p0|) + pxor %9, %9 + movd %5, %6 + pshufb %5, %9 ; iBeta + SSE2_CmpgeUB %8, %5 ; !bDeltaQ1Q0 | !bDeltaP1P0 = max(|q1 - q0|, |p1 - p0|) >= iBeta + por %7, %8 ; !bDeltaP0Q0P1P0Q1Q0 = !bDeltaP0Q0 | !bDeltaQ1Q0 | !bDeltaP1P0 + WELS_DB1 %5 + movdqa %8, %2 + SSE2_AvgbFloor1 %8, %4, %5, %9 ; (p0 + q1) >> 1 + pavgb %8, %1 ; p0' = (p1 + ((p0 + q1) >> 1) + 1) >> 1 + movdqa %9, %7 + SSE2_Blend %2, %8, %7 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0 + SSE2_AvgbFloor1 %1, %3, %5, %7 ; (q0 + p1) >> 1 + pavgb %1, %4 ; q0' = (q1 + ((q0 + p1) >> 1) + 1) >> 1 + SSE2_Blend %3, %1, %9 ; q0out = bDeltaP0Q0P1P0Q1Q0 ? q0' : q0 +%endmacro + + +;****************************************************************************** +; void DeblockChromaLt4V_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride, +; int32_t iAlpha, int32_t iBeta, int8_t * pTC); +;******************************************************************************* + +WELS_EXTERN DeblockChromaLt4V_ssse3 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + movd xmm7, arg4d + pxor xmm0, xmm0 + pshufb xmm7, xmm0 ; iAlpha + mov r3, r2 + neg r3 ; -iStride + + movq xmm0, [r0 + 0 * r2] ; q0 cb + movhps xmm0, [r1 + 0 * r2] ; q0 cr + movq xmm2, [r0 + 1 * r3] ; p0 cb + movhps xmm2, [r1 + 1 * r3] ; p0 cr + movq xmm1, [r0 + 1 * r2] ; q1 cb + movhps xmm1, [r1 + 1 * r2] ; q1 cr + movq xmm3, [r0 + 2 * r3] ; p1 cb + movhps xmm3, [r1 + 2 * r3] ; p1 cr + +%ifidni arg6, r5 + SSSE3_DeblockChromaLt4 xmm3, xmm2, xmm0, xmm1, xmm7, arg5d, arg6, xmm4, xmm5, xmm6, 1 +%else + mov r2, arg6 + SSSE3_DeblockChromaLt4 xmm3, xmm2, xmm0, xmm1, xmm7, arg5d, r2, xmm4, xmm5, xmm6, 1 +%endif + + movlps [r0 + 1 * r3], xmm2 ; store p0 cb + movhps [r1 + 1 * r3], xmm2 ; store p0 cr + movlps [r0 ], xmm0 ; store q0 cb + movhps [r1 ], xmm0 ; store q0 cr + + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret + + +;******************************************************************************** +; void DeblockChromaEq4V_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride, +; int32_t iAlpha, int32_t iBeta) +;******************************************************************************** + +WELS_EXTERN DeblockChromaEq4V_ssse3 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + movd xmm7, arg4d + pxor xmm0, xmm0 + pshufb xmm7, xmm0 ; iAlpha + mov r3, r2 + neg r3 ; -iStride + + movq xmm0, [r0 + 0 * r2] ; q0 cb + movhps xmm0, [r1 + 0 * r2] ; q0 cr + movq xmm2, [r0 + 1 * r3] ; p0 cb + movhps xmm2, [r1 + 1 * r3] ; p0 cr + movq xmm1, [r0 + 1 * r2] ; q1 cb + movhps xmm1, [r1 + 1 * r2] ; q1 cr + movq xmm3, [r0 + 2 * r3] ; p1 cb + movhps xmm3, [r1 + 2 * r3] ; p1 cr + + SSSE3_DeblockChromaEq4 xmm3, xmm2, xmm0, xmm1, xmm7, arg5d, xmm4, xmm5, xmm6 + + movlps [r0 + 1 * r3], xmm2 ; store p0 cb + movhps [r1 + 1 * r3], xmm2 ; store p0 cr + movlps [r0 + 0 * r2], xmm0 ; store q0 cb + movhps [r1 + 0 * r2], xmm0 ; store q0 cr + + POP_XMM + LOAD_4_PARA_POP + ret + + +;******************************************************************************* +; void DeblockChromaLt4H_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride, +; int32_t iAlpha, int32_t iBeta, int8_t * pTC); +;******************************************************************************* + +WELS_EXTERN DeblockChromaLt4H_ssse3 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + movd xmm7, arg4d + pxor xmm0, xmm0 + pshufb xmm7, xmm0 ; iAlpha + lea r3, [3 * r2 - 1] ; 3 * iStride - 1 + + SSE2_LoadCbCr_4x16H xmm0, xmm1, xmm4, xmm5, r0, r1, r2, r3, xmm2, xmm3, xmm6 + INIT_X86_32_PIC r1 + SSSE3_DeblockChromaLt4 xmm0, xmm1, xmm4, xmm5, xmm7, arg5d, r5, xmm2, xmm3, xmm6, 0 + DEINIT_X86_32_PIC + SSE2_StoreCbCr_4x16H r0, r1, r2, r3, xmm1, xmm4, r5, r4d, r4w, xmm0 + + POP_XMM + LOAD_6_PARA_POP + ret + + +;*************************************************************************** +; void DeblockChromaEq4H_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride, +; int32_t iAlpha, int32_t iBeta) +;*************************************************************************** + +WELS_EXTERN DeblockChromaEq4H_ssse3 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + movd xmm7, arg4d + pxor xmm0, xmm0 + pshufb xmm7, xmm0 ; iAlpha + lea r3, [3 * r2 - 1] ; 3 * iStride - 1 + + SSE2_LoadCbCr_4x16H xmm0, xmm1, xmm4, xmm5, r0, r1, r2, r3, xmm2, xmm3, xmm6 + SSSE3_DeblockChromaEq4 xmm0, xmm1, xmm4, xmm5, xmm7, arg5d, xmm2, xmm3, xmm6 +%ifdef X86_32 + push r4 + push r5 + SSE2_StoreCbCr_4x16H r0, r1, r2, r3, xmm1, xmm4, r5, r4d, r4w, xmm0 + pop r5 + pop r4 +%else + SSE2_StoreCbCr_4x16H r0, r1, r2, r3, xmm1, xmm4, r5, r4d, r4w, xmm0 +%endif + + POP_XMM + LOAD_4_PARA_POP + ret + + +;******************************************************************************** +; +; void DeblockLumaTransposeH2V_sse2(uint8_t * pPixY, int32_t iStride, uint8_t * pDst); +; +;******************************************************************************** + +WELS_EXTERN DeblockLumaTransposeH2V_sse2 + push r3 + push r4 + push r5 + +%assign push_num 3 + LOAD_3_PARA + PUSH_XMM 8 + + SIGN_EXTENSION r1, r1d + + mov r5, r7 + mov r3, r7 + and r3, 0Fh + sub r7, r3 + sub r7, 10h + + lea r3, [r0 + r1 * 8] + lea r4, [r1 * 3] + + movq xmm0, [r0] + movq xmm7, [r3] + punpcklqdq xmm0, xmm7 + movq xmm1, [r0 + r1] + movq xmm7, [r3 + r1] + punpcklqdq xmm1, xmm7 + movq xmm2, [r0 + r1*2] + movq xmm7, [r3 + r1*2] + punpcklqdq xmm2, xmm7 + movq xmm3, [r0 + r4] + movq xmm7, [r3 + r4] + punpcklqdq xmm3, xmm7 + + lea r0, [r0 + r1 * 4] + lea r3, [r3 + r1 * 4] + movq xmm4, [r0] + movq xmm7, [r3] + punpcklqdq xmm4, xmm7 + movq xmm5, [r0 + r1] + movq xmm7, [r3 + r1] + punpcklqdq xmm5, xmm7 + movq xmm6, [r0 + r1*2] + movq xmm7, [r3 + r1*2] + punpcklqdq xmm6, xmm7 + + movdqa [r7], xmm0 + movq xmm7, [r0 + r4] + movq xmm0, [r3 + r4] + punpcklqdq xmm7, xmm0 + movdqa xmm0, [r7] + + SSE2_TransTwo8x8B xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r7] + ;pOut: m5, m3, m4, m8, m6, m2, m7, m1 + + movdqa [r2], xmm4 + movdqa [r2 + 10h], xmm2 + movdqa [r2 + 20h], xmm3 + movdqa [r2 + 30h], xmm7 + movdqa [r2 + 40h], xmm5 + movdqa [r2 + 50h], xmm1 + movdqa [r2 + 60h], xmm6 + movdqa [r2 + 70h], xmm0 + + mov r7, r5 + POP_XMM + pop r5 + pop r4 + pop r3 + ret + + +;******************************************************************************************* +; +; void DeblockLumaTransposeV2H_sse2(uint8_t * pPixY, int32_t iStride, uint8_t * pSrc); +; +;******************************************************************************************* + +WELS_EXTERN DeblockLumaTransposeV2H_sse2 + push r3 + push r4 + +%assign push_num 2 + LOAD_3_PARA + PUSH_XMM 8 + + SIGN_EXTENSION r1, r1d + + mov r4, r7 + mov r3, r7 + and r3, 0Fh + sub r7, r3 + sub r7, 10h + + movdqa xmm0, [r2] + movdqa xmm1, [r2 + 10h] + movdqa xmm2, [r2 + 20h] + movdqa xmm3, [r2 + 30h] + movdqa xmm4, [r2 + 40h] + movdqa xmm5, [r2 + 50h] + movdqa xmm6, [r2 + 60h] + movdqa xmm7, [r2 + 70h] + + SSE2_TransTwo8x8B xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r7] + ;pOut: m5, m3, m4, m8, m6, m2, m7, m1 + + lea r2, [r1 * 3] + + movq [r0], xmm4 + movq [r0 + r1], xmm2 + movq [r0 + r1*2], xmm3 + movq [r0 + r2], xmm7 + + lea r0, [r0 + r1*4] + movq [r0], xmm5 + movq [r0 + r1], xmm1 + movq [r0 + r1*2], xmm6 + movq [r0 + r2], xmm0 + + psrldq xmm4, 8 + psrldq xmm2, 8 + psrldq xmm3, 8 + psrldq xmm7, 8 + psrldq xmm5, 8 + psrldq xmm1, 8 + psrldq xmm6, 8 + psrldq xmm0, 8 + + lea r0, [r0 + r1*4] + movq [r0], xmm4 + movq [r0 + r1], xmm2 + movq [r0 + r1*2], xmm3 + movq [r0 + r2], xmm7 + + lea r0, [r0 + r1*4] + movq [r0], xmm5 + movq [r0 + r1], xmm1 + movq [r0 + r1*2], xmm6 + movq [r0 + r2], xmm0 + + + mov r7, r4 + POP_XMM + pop r4 + pop r3 + ret + +WELS_EXTERN WelsNonZeroCount_sse2 + %assign push_num 0 + LOAD_1_PARA + movdqu xmm0, [r0] + movq xmm1, [r0+16] + WELS_DB1 xmm2 + pminub xmm0, xmm2 + pminub xmm1, xmm2 + movdqu [r0], xmm0 + movq [r0+16], xmm1 + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/expand_picture.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/expand_picture.asm new file mode 100644 index 000000000..a3402bbe3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/expand_picture.asm @@ -0,0 +1,728 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* expand_picture.asm +;* +;* Abstract +;* mmxext/sse for expand_frame +;* +;* History +;* 09/25/2009 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + + + +;*********************************************************************** +; Macros and other preprocessor constants +;*********************************************************************** + +;*********************************************************************** +; Code +;*********************************************************************** + + + +SECTION .text + + +;;;;;;;expanding result;;;;;;; + +;aaaa|attttttttttttttttb|bbbb +;aaaa|attttttttttttttttb|bbbb +;aaaa|attttttttttttttttb|bbbb +;aaaa|attttttttttttttttb|bbbb +;---------------------------- +;aaaa|attttttttttttttttb|bbbb +;llll|l r|rrrr +;llll|l r|rrrr +;llll|l r|rrrr +;llll|l r|rrrr +;llll|l r|rrrr +;cccc|ceeeeeeeeeeeeeeeed|dddd +;---------------------------- +;cccc|ceeeeeeeeeeeeeeeed|dddd +;cccc|ceeeeeeeeeeeeeeeed|dddd +;cccc|ceeeeeeeeeeeeeeeed|dddd +;cccc|ceeeeeeeeeeeeeeeed|dddd + +%macro mov_line_8x4_mmx 3 ; dst, stride, mm? + movq [%1], %3 + movq [%1+%2], %3 + lea %1, [%1+2*%2] + movq [%1], %3 + movq [%1+%2], %3 + lea %1, [%1+2*%2] +%endmacro + +%macro mov_line_end8x4_mmx 3 ; dst, stride, mm? + movq [%1], %3 + movq [%1+%2], %3 + lea %1, [%1+2*%2] + movq [%1], %3 + movq [%1+%2], %3 + lea %1, [%1+%2] +%endmacro + +%macro mov_line_16x4_sse2 4 ; dst, stride, xmm?, u/a + movdq%4 [%1], %3 ; top(bottom)_0 + movdq%4 [%1+%2], %3 ; top(bottom)_1 + lea %1, [%1+2*%2] + movdq%4 [%1], %3 ; top(bottom)_2 + movdq%4 [%1+%2], %3 ; top(bottom)_3 + lea %1, [%1+2*%2] +%endmacro + +%macro mov_line_end16x4_sse2 4 ; dst, stride, xmm?, u/a + movdq%4 [%1], %3 ; top(bottom)_0 + movdq%4 [%1+%2], %3 ; top(bottom)_1 + lea %1, [%1+2*%2] + movdq%4 [%1], %3 ; top(bottom)_2 + movdq%4 [%1+%2], %3 ; top(bottom)_3 + lea %1, [%1+%2] +%endmacro + +%macro mov_line_32x4_sse2 3 ; dst, stride, xmm? + movdqa [%1], %3 ; top(bottom)_0 + movdqa [%1+16], %3 ; top(bottom)_0 + movdqa [%1+%2], %3 ; top(bottom)_1 + movdqa [%1+%2+16], %3 ; top(bottom)_1 + lea %1, [%1+2*%2] + movdqa [%1], %3 ; top(bottom)_2 + movdqa [%1+16], %3 ; top(bottom)_2 + movdqa [%1+%2], %3 ; top(bottom)_3 + movdqa [%1+%2+16], %3 ; top(bottom)_3 + lea %1, [%1+2*%2] +%endmacro + +%macro mov_line_end32x4_sse2 3 ; dst, stride, xmm? + movdqa [%1], %3 ; top(bottom)_0 + movdqa [%1+16], %3 ; top(bottom)_0 + movdqa [%1+%2], %3 ; top(bottom)_1 + movdqa [%1+%2+16], %3 ; top(bottom)_1 + lea %1, [%1+2*%2] + movdqa [%1], %3 ; top(bottom)_2 + movdqa [%1+16], %3 ; top(bottom)_2 + movdqa [%1+%2], %3 ; top(bottom)_3 + movdqa [%1+%2+16], %3 ; top(bottom)_3 + lea %1, [%1+%2] +%endmacro + +%macro exp_top_bottom_sse2 1 ; iPaddingSize [luma(32)/chroma(16)] + ;r2 [width/16(8)] + ;r0 [pSrc +0], r5 [pSrc -width] r1[-stride], 32(16) ;top + ;r3 [pSrc +(h-1)*stride], r4 [pSrc + (h+31)*stride],32(16); bottom + +%if %1 == 32 ; for luma + sar r2, 04h ; width / 16(8) pixels +.top_bottom_loops: + ; top + movdqa xmm0, [r0] ; first line of picture pData + mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm? + mov_line_16x4_sse2 r5, r1, xmm0, a + mov_line_16x4_sse2 r5, r1, xmm0, a + mov_line_16x4_sse2 r5, r1, xmm0, a + mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm? + mov_line_16x4_sse2 r5, r1, xmm0, a + mov_line_16x4_sse2 r5, r1, xmm0, a + mov_line_end16x4_sse2 r5, r1, xmm0, a + + ; bottom + movdqa xmm1, [r3] ; last line of picture pData + mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm? + mov_line_16x4_sse2 r4, r1, xmm1, a + mov_line_16x4_sse2 r4, r1, xmm1, a + mov_line_16x4_sse2 r4, r1, xmm1, a + mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm? + mov_line_16x4_sse2 r4, r1, xmm1, a + mov_line_16x4_sse2 r4, r1, xmm1, a + mov_line_end16x4_sse2 r4, r1, xmm1, a + + lea r0, [r0+16] ; top pSrc + lea r5, [r5+16] ; top dst + lea r3, [r3+16] ; bottom pSrc + lea r4, [r4+16] ; bottom dst + neg r1 ; positive/negative stride need for next loop? + + dec r2 + jnz near .top_bottom_loops +%elif %1 == 16 ; for chroma ?? + mov r6, r2 + sar r2, 04h ; (width / 16) pixels +.top_bottom_loops: + ; top + movdqa xmm0, [r0] ; first line of picture pData + mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm? + mov_line_16x4_sse2 r5, r1, xmm0, a + mov_line_16x4_sse2 r5, r1, xmm0, a + mov_line_end16x4_sse2 r5, r1, xmm0, a + + ; bottom + movdqa xmm1, [r3] ; last line of picture pData + mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm? + mov_line_16x4_sse2 r4, r1, xmm1, a + mov_line_16x4_sse2 r4, r1, xmm1, a + mov_line_end16x4_sse2 r4, r1, xmm1, a + + lea r0, [r0+16] ; top pSrc + lea r5, [r5+16] ; top dst + lea r3, [r3+16] ; bottom pSrc + lea r4, [r4+16] ; bottom dst + neg r1 ; positive/negative stride need for next loop? + + dec r2 + jnz near .top_bottom_loops + + ; for remaining 8 bytes + and r6, 0fh ; any 8 bytes left? + test r6, r6 + jz near .to_be_continued ; no left to exit here + + ; top + movq mm0, [r0] ; remained 8 byte + mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm? + mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm? + mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm? + mov_line_end8x4_mmx r5, r1, mm0 ; dst, stride, mm? + ; bottom + movq mm1, [r3] + mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm? + mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm? + mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm? + mov_line_end8x4_mmx r4, r1, mm1 ; dst, stride, mm? + WELSEMMS + +.to_be_continued: +%endif +%endmacro + +%macro exp_left_right_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a + ;r6 [height] + ;r0 [pSrc+0] r5[pSrc-32] r1[stride] + ;r3 [pSrc+(w-1)] r4[pSrc+w] + +%if %1 == 32 ; for luma +.left_right_loops: + ; left + movzx r2d, byte [r0] ; pixel pData for left border + SSE2_Copy16Times xmm0, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d] + movdqa [r5], xmm0 + movdqa [r5+16], xmm0 + + ; right + movzx r2d, byte [r3] + SSE2_Copy16Times xmm1, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d] + movdqa [r4], xmm1 + movdqa [r4+16], xmm1 + + lea r0, [r0+r1] ; left pSrc + lea r5, [r5+r1] ; left dst + lea r3, [r3+r1] ; right pSrc + lea r4, [r4+r1] ; right dst + + dec r6 + jnz near .left_right_loops +%elif %1 == 16 ; for chroma ?? +.left_right_loops: + ; left + movzx r2d, byte [r0] ; pixel pData for left border + SSE2_Copy16Times xmm0, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d] + movdqa [r5], xmm0 + + ; right + movzx r2d, byte [r3] + SSE2_Copy16Times xmm1, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d] + movdq%2 [r4], xmm1 ; might not be aligned 16 bytes in case chroma planes + + lea r0, [r0+r1] ; left pSrc + lea r5, [r5+r1] ; left dst + lea r3, [r3+r1] ; right pSrc + lea r4, [r4+r1] ; right dst + + dec r6 + jnz near .left_right_loops +%endif +%endmacro + +%macro exp_cross_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a + ; top-left: (x)mm3, top-right: (x)mm4, bottom-left: (x)mm5, bottom-right: (x)mm6 + ; edi: TL, ebp: TR, eax: BL, ebx: BR, ecx, -stride + ;r3:TL ,r4:TR,r5:BL,r6:BR r1:-stride +%if %1 == 32 ; luma + ; TL + mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + mov_line_end32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm? + + ; TR + mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + mov_line_end32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm? + + ; BL + mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + mov_line_end32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm? + + ; BR + mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? + mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? + mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? + mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? + mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? + mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? + mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? + mov_line_end32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm? +%elif %1 == 16 ; chroma + ; TL + mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm? + mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm? + mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm? + mov_line_end16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm? + + ; TR + mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm? + mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm? + mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm? + mov_line_end16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm? + + ; BL + mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm? + mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm? + mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm? + mov_line_end16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm? + + ; BR + mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm? + mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm? + mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm? + mov_line_end16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm? +%endif +%endmacro + +;***********************************************************************---------------- +; void ExpandPictureLuma_sse2( uint8_t *pDst, +; const int32_t iStride, +; const int32_t iWidth, +; const int32_t iHeight ); +;***********************************************************************---------------- +WELS_EXTERN ExpandPictureLuma_sse2 + + push r4 + push r5 + push r6 + + %assign push_num 3 + LOAD_4_PARA + PUSH_XMM 7 + + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + + ;also prepare for cross border pData top-left:xmm3 + + movzx r6d,byte[r0] + SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0] + + neg r1 + lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride] + neg r1 + + push r3 + + + dec r3 ;h-1 + imul r3,r1 ;(h-1)*stride + lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom + + mov r6,r1 ;r6 = stride + sal r6,05h ;r6 = 32*stride + lea r4,[r3+r6] ;r4 = dst bottom + + ;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6 + + movzx r6d,byte [r3] ;bottom-left + SSE2_Copy16Times xmm5,r6d + + lea r6,[r3+r2-1] + movzx r6d,byte [r6] + SSE2_Copy16Times xmm6,r6d ;bottom-right + + neg r1 ;r1 = -stride + + push r0 + push r1 + push r2 + + exp_top_bottom_sse2 32 + + ; for both left and right border + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + pop r2 + pop r1 + pop r0 + + lea r5,[r0-32] ;left border dst luma =32 chroma = -16 + + lea r3,[r0+r2-1] ;right border src + lea r4,[r3+1] ;right border dst + + ;prepare for cross border data: top-rigth with xmm4 + movzx r6d,byte [r3] ;top -rigth + SSE2_Copy16Times xmm4,r6d + + neg r1 ;r1 = stride + + + pop r6 ; r6 = height + + + + push r0 + push r1 + push r2 + push r6 + + exp_left_right_sse2 32,a + + pop r6 + pop r2 + pop r1 + pop r0 + + ; for cross border [top-left, top-right, bottom-left, bottom-right] + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued.. + + neg r1 ;r1 = -stride + lea r3,[r0-32] + lea r3,[r3+r1] ;last line of top-left border + + lea r4,[r0+r2] ;psrc +width + lea r4,[r4+r1] ;psrc +width -stride + + + neg r1 ;r1 = stride + add r6,32 ;height +32(16) ,luma = 32, chroma = 16 + imul r6,r1 + + lea r5,[r3+r6] ;last line of bottom-left border + lea r6,[r4+r6] ;last line of botoom-right border + + neg r1 ; r1 = -stride + + ; for left & right border expanding + exp_cross_sse2 32,a + + POP_XMM + LOAD_4_PARA_POP + + pop r6 + pop r5 + pop r4 + + %assign push_num 0 + + + ret + +;***********************************************************************---------------- +; void ExpandPictureChromaAlign_sse2( uint8_t *pDst, +; const int32_t iStride, +; const int32_t iWidth, +; const int32_t iHeight ); +;***********************************************************************---------------- +WELS_EXTERN ExpandPictureChromaAlign_sse2 + + push r4 + push r5 + push r6 + + %assign push_num 3 + LOAD_4_PARA + PUSH_XMM 7 + + SIGN_EXTENSION r1,r1d + SIGN_EXTENSION r2,r2d + SIGN_EXTENSION r3,r3d + + ;also prepare for cross border pData top-left:xmm3 + + movzx r6d,byte [r0] + SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0] + + neg r1 + lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride] + neg r1 + + push r3 + + + dec r3 ;h-1 + imul r3,r1 ;(h-1)*stride + lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom + + mov r6,r1 ;r6 = stride + sal r6,04h ;r6 = 32*stride + lea r4,[r3+r6] ;r4 = dst bottom + + ;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6 + + movzx r6d,byte [r3] ;bottom-left + SSE2_Copy16Times xmm5,r6d + + lea r6,[r3+r2-1] + movzx r6d,byte [r6] + SSE2_Copy16Times xmm6,r6d ;bottom-right + + neg r1 ;r1 = -stride + + push r0 + push r1 + push r2 + + exp_top_bottom_sse2 16 + + ; for both left and right border + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + pop r2 + pop r1 + pop r0 + + lea r5,[r0-16] ;left border dst luma =32 chroma = -16 + + lea r3,[r0+r2-1] ;right border src + lea r4,[r3+1] ;right border dst + + ;prepare for cross border data: top-rigth with xmm4 + movzx r6d,byte [r3] ;top -rigth + SSE2_Copy16Times xmm4,r6d + + neg r1 ;r1 = stride + + + pop r6 ; r6 = height + + + + push r0 + push r1 + push r2 + push r6 + exp_left_right_sse2 16,a + + pop r6 + pop r2 + pop r1 + pop r0 + + ; for cross border [top-left, top-right, bottom-left, bottom-right] + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued.. + + neg r1 ;r1 = -stride + lea r3,[r0-16] + lea r3,[r3+r1] ;last line of top-left border + + lea r4,[r0+r2] ;psrc +width + lea r4,[r4+r1] ;psrc +width -stride + + + neg r1 ;r1 = stride + add r6,16 ;height +32(16) ,luma = 32, chroma = 16 + imul r6,r1 + + lea r5,[r3+r6] ;last line of bottom-left border + lea r6,[r4+r6] ;last line of botoom-right border + + neg r1 ; r1 = -stride + + ; for left & right border expanding + exp_cross_sse2 16,a + + POP_XMM + LOAD_4_PARA_POP + + pop r6 + pop r5 + pop r4 + + %assign push_num 0 + + + ret + +;***********************************************************************---------------- +; void ExpandPictureChromaUnalign_sse2( uint8_t *pDst, +; const int32_t iStride, +; const int32_t iWidth, +; const int32_t iHeight ); +;***********************************************************************---------------- +WELS_EXTERN ExpandPictureChromaUnalign_sse2 + push r4 + push r5 + push r6 + + %assign push_num 3 + LOAD_4_PARA + PUSH_XMM 7 + + SIGN_EXTENSION r1,r1d + SIGN_EXTENSION r2,r2d + SIGN_EXTENSION r3,r3d + + ;also prepare for cross border pData top-left:xmm3 + + movzx r6d,byte [r0] + SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0] + + neg r1 + lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride] + neg r1 + + push r3 + + + dec r3 ;h-1 + imul r3,r1 ;(h-1)*stride + lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom + + mov r6,r1 ;r6 = stride + sal r6,04h ;r6 = 32*stride + lea r4,[r3+r6] ;r4 = dst bottom + + ;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6 + + movzx r6d,byte [r3] ;bottom-left + SSE2_Copy16Times xmm5,r6d + + lea r6,[r3+r2-1] + movzx r6d,byte [r6] + SSE2_Copy16Times xmm6,r6d ;bottom-right + + neg r1 ;r1 = -stride + + push r0 + push r1 + push r2 + + exp_top_bottom_sse2 16 + + ; for both left and right border + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + pop r2 + pop r1 + pop r0 + + lea r5,[r0-16] ;left border dst luma =32 chroma = -16 + + lea r3,[r0+r2-1] ;right border src + lea r4,[r3+1] ;right border dst + + ;prepare for cross border data: top-rigth with xmm4 + movzx r6d,byte [r3] ;top -rigth + SSE2_Copy16Times xmm4,r6d + + neg r1 ;r1 = stride + + + pop r6 ; r6 = height + + + + push r0 + push r1 + push r2 + push r6 + exp_left_right_sse2 16,u + + pop r6 + pop r2 + pop r1 + pop r0 + + ; for cross border [top-left, top-right, bottom-left, bottom-right] + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued.. + + neg r1 ;r1 = -stride + lea r3,[r0-16] + lea r3,[r3+r1] ;last line of top-left border + + lea r4,[r0+r2] ;psrc +width + lea r4,[r4+r1] ;psrc +width -stride + + + neg r1 ;r1 = stride + add r6,16 ;height +32(16) ,luma = 32, chroma = 16 + imul r6,r1 + + lea r5,[r3+r6] ;last line of bottom-left border + lea r6,[r4+r6] ;last line of botoom-right border + + neg r1 ; r1 = -stride + + ; for left & right border expanding + exp_cross_sse2 16,u + + POP_XMM + LOAD_4_PARA_POP + + pop r6 + pop r5 + pop r4 + + %assign push_num 0 + + + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/intra_pred_com.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/intra_pred_com.asm new file mode 100644 index 000000000..b0bd1318b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/intra_pred_com.asm @@ -0,0 +1,117 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* intra_pred_common.asm +;* +;* Abstract +;* sse2 function for intra predict operations +;* +;* History +;* 18/09/2009 Created +;* +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + +;*********************************************************************** +; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); +;*********************************************************************** + +%macro SSE2_PRED_H_16X16_ONE_LINE 0 + add r0, 16 + add r1, r2 + movzx r3, byte [r1] + SSE2_Copy16Times xmm0, r3d + movdqa [r0], xmm0 +%endmacro + +WELS_EXTERN WelsI16x16LumaPredH_sse2 + push r3 + %assign push_num 1 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + dec r1 + movzx r3, byte [r1] + SSE2_Copy16Times xmm0, r3d + movdqa [r0], xmm0 + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + pop r3 + ret + +;*********************************************************************** +; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); +;*********************************************************************** +WELS_EXTERN WelsI16x16LumaPredV_sse2 + %assign push_num 0 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movdqa xmm0, [r1] + + movdqa [r0], xmm0 + movdqa [r0+10h], xmm0 + movdqa [r0+20h], xmm0 + movdqa [r0+30h], xmm0 + movdqa [r0+40h], xmm0 + movdqa [r0+50h], xmm0 + movdqa [r0+60h], xmm0 + movdqa [r0+70h], xmm0 + movdqa [r0+80h], xmm0 + movdqa [r0+90h], xmm0 + movdqa [r0+160], xmm0 + movdqa [r0+176], xmm0 + movdqa [r0+192], xmm0 + movdqa [r0+208], xmm0 + movdqa [r0+224], xmm0 + movdqa [r0+240], xmm0 + + ret + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mb_copy.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mb_copy.asm new file mode 100644 index 000000000..a47529502 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mb_copy.asm @@ -0,0 +1,615 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* mb_copy.asm +;* +;* Abstract +;* mb_copy and mb_copy1 +;* +;* History +;* 15/09/2009 Created +;* 12/28/2009 Modified with larger throughput +;* 12/29/2011 Tuned WelsCopy16x16NotAligned_sse2, added UpdateMbMv_sse2 WelsCopy16x8NotAligned_sse2, +;* WelsCopy16x8_mmx, WelsCopy8x16_mmx etc; +;* +;* +;*********************************************************************************************/ +%include "asm_inc.asm" + +%ifdef __NASM_VER__ + %use smartalign +%endif + +;*********************************************************************** +; Macros and other preprocessor constants +;*********************************************************************** + +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + + +;*********************************************************************** +; void WelsCopy16x16_sse2( uint8_t* Dst, +; int32_t iStrideD, +; uint8_t* Src, +; int32_t iStrideS ) +;*********************************************************************** +WELS_EXTERN WelsCopy16x16_sse2 + + push r4 + push r5 + %assign push_num 2 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3 + lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3 + + movdqa xmm0, [r2] + movdqa xmm1, [r2+r3] + movdqa xmm2, [r2+2*r3] + movdqa xmm3, [r2+r5] + lea r2, [r2+4*r3] + movdqa xmm4, [r2] + movdqa xmm5, [r2+r3] + movdqa xmm6, [r2+2*r3] + movdqa xmm7, [r2+r5] + lea r2, [r2+4*r3] + + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm2 + movdqa [r0+r4], xmm3 + lea r0, [r0+4*r1] + movdqa [r0], xmm4 + movdqa [r0+r1], xmm5 + movdqa [r0+2*r1], xmm6 + movdqa [r0+r4], xmm7 + lea r0, [r0+4*r1] + + movdqa xmm0, [r2] + movdqa xmm1, [r2+r3] + movdqa xmm2, [r2+2*r3] + movdqa xmm3, [r2+r5] + lea r2, [r2+4*r3] + movdqa xmm4, [r2] + movdqa xmm5, [r2+r3] + movdqa xmm6, [r2+2*r3] + movdqa xmm7, [r2+r5] + + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm2 + movdqa [r0+r4], xmm3 + lea r0, [r0+4*r1] + movdqa [r0], xmm4 + movdqa [r0+r1], xmm5 + movdqa [r0+2*r1], xmm6 + movdqa [r0+r4], xmm7 + POP_XMM + LOAD_4_PARA_POP + pop r5 + pop r4 + ret + +;*********************************************************************** +; void WelsCopy16x16NotAligned_sse2( uint8_t* Dst, +; int32_t iStrideD, +; uint8_t* Src, +; int32_t iStrideS ) +;*********************************************************************** +; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011 +WELS_EXTERN WelsCopy16x16NotAligned_sse2 + push r4 + push r5 + %assign push_num 2 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3 + lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3 + + movdqu xmm0, [r2] + movdqu xmm1, [r2+r3] + movdqu xmm2, [r2+2*r3] + movdqu xmm3, [r2+r5] + lea r2, [r2+4*r3] + movdqu xmm4, [r2] + movdqu xmm5, [r2+r3] + movdqu xmm6, [r2+2*r3] + movdqu xmm7, [r2+r5] + lea r2, [r2+4*r3] + + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm2 + movdqa [r0+r4], xmm3 + lea r0, [r0+4*r1] + movdqa [r0], xmm4 + movdqa [r0+r1], xmm5 + movdqa [r0+2*r1], xmm6 + movdqa [r0+r4], xmm7 + lea r0, [r0+4*r1] + + movdqu xmm0, [r2] + movdqu xmm1, [r2+r3] + movdqu xmm2, [r2+2*r3] + movdqu xmm3, [r2+r5] + lea r2, [r2+4*r3] + movdqu xmm4, [r2] + movdqu xmm5, [r2+r3] + movdqu xmm6, [r2+2*r3] + movdqu xmm7, [r2+r5] + + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm2 + movdqa [r0+r4], xmm3 + lea r0, [r0+4*r1] + movdqa [r0], xmm4 + movdqa [r0+r1], xmm5 + movdqa [r0+2*r1], xmm6 + movdqa [r0+r4], xmm7 + POP_XMM + LOAD_4_PARA_POP + pop r5 + pop r4 + ret + +; , 12/29/2011 +;*********************************************************************** +; void WelsCopy16x8NotAligned_sse2(uint8_t* Dst, +; int32_t iStrideD, +; uint8_t* Src, +; int32_t iStrideS ) +;*********************************************************************** +WELS_EXTERN WelsCopy16x8NotAligned_sse2 + push r4 + push r5 + %assign push_num 2 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3 + lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3 + + movdqu xmm0, [r2] + movdqu xmm1, [r2+r3] + movdqu xmm2, [r2+2*r3] + movdqu xmm3, [r2+r5] + lea r2, [r2+4*r3] + movdqu xmm4, [r2] + movdqu xmm5, [r2+r3] + movdqu xmm6, [r2+2*r3] + movdqu xmm7, [r2+r5] + + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm2 + movdqa [r0+r4], xmm3 + lea r0, [r0+4*r1] + movdqa [r0], xmm4 + movdqa [r0+r1], xmm5 + movdqa [r0+2*r1], xmm6 + movdqa [r0+r4], xmm7 + POP_XMM + LOAD_4_PARA_POP + pop r5 + pop r4 + ret + + +;*********************************************************************** +; void WelsCopy8x16_mmx(uint8_t* Dst, +; int32_t iStrideD, +; uint8_t* Src, +; int32_t iStrideS ) +;*********************************************************************** +WELS_EXTERN WelsCopy8x16_mmx + %assign push_num 0 + LOAD_4_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + movq mm0, [r2] + movq mm1, [r2+r3] + lea r2, [r2+2*r3] + movq mm2, [r2] + movq mm3, [r2+r3] + lea r2, [r2+2*r3] + movq mm4, [r2] + movq mm5, [r2+r3] + lea r2, [r2+2*r3] + movq mm6, [r2] + movq mm7, [r2+r3] + lea r2, [r2+2*r3] + + movq [r0], mm0 + movq [r0+r1], mm1 + lea r0, [r0+2*r1] + movq [r0], mm2 + movq [r0+r1], mm3 + lea r0, [r0+2*r1] + movq [r0], mm4 + movq [r0+r1], mm5 + lea r0, [r0+2*r1] + movq [r0], mm6 + movq [r0+r1], mm7 + lea r0, [r0+2*r1] + + movq mm0, [r2] + movq mm1, [r2+r3] + lea r2, [r2+2*r3] + movq mm2, [r2] + movq mm3, [r2+r3] + lea r2, [r2+2*r3] + movq mm4, [r2] + movq mm5, [r2+r3] + lea r2, [r2+2*r3] + movq mm6, [r2] + movq mm7, [r2+r3] + + movq [r0], mm0 + movq [r0+r1], mm1 + lea r0, [r0+2*r1] + movq [r0], mm2 + movq [r0+r1], mm3 + lea r0, [r0+2*r1] + movq [r0], mm4 + movq [r0+r1], mm5 + lea r0, [r0+2*r1] + movq [r0], mm6 + movq [r0+r1], mm7 + + WELSEMMS + LOAD_4_PARA_POP + ret + +;*********************************************************************** +; void WelsCopy8x8_mmx( uint8_t* Dst, +; int32_t iStrideD, +; uint8_t* Src, +; int32_t iStrideS ) +;*********************************************************************** +WELS_EXTERN WelsCopy8x8_mmx + push r4 + %assign push_num 1 + LOAD_4_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + lea r4, [r3+2*r3] ;edx, [ebx+2*ebx] + + ; to prefetch next loop + prefetchnta [r2+2*r3] + prefetchnta [r2+r4] + movq mm0, [r2] + movq mm1, [r2+r3] + lea r2, [r2+2*r3] + ; to prefetch next loop + prefetchnta [r2+2*r3] + prefetchnta [r2+r4] + movq mm2, [r2] + movq mm3, [r2+r3] + lea r2, [r2+2*r3] + ; to prefetch next loop + prefetchnta [r2+2*r3] + prefetchnta [r2+r4] + movq mm4, [r2] + movq mm5, [r2+r3] + lea r2, [r2+2*r3] + movq mm6, [r2] + movq mm7, [r2+r3] + + movq [r0], mm0 + movq [r0+r1], mm1 + lea r0, [r0+2*r1] + movq [r0], mm2 + movq [r0+r1], mm3 + lea r0, [r0+2*r1] + movq [r0], mm4 + movq [r0+r1], mm5 + lea r0, [r0+2*r1] + movq [r0], mm6 + movq [r0+r1], mm7 + + WELSEMMS + LOAD_4_PARA_POP + pop r4 + ret + +; (dunhuang@cisco), 12/21/2011 +;*********************************************************************** +; void UpdateMbMv_sse2( SMVUnitXY *pMvBuffer, const SMVUnitXY sMv ) +;*********************************************************************** +WELS_EXTERN UpdateMbMv_sse2 + + %assign push_num 0 + LOAD_2_PARA + + movd xmm0, r1d ; _mv + pshufd xmm1, xmm0, $00 + movdqa [r0 ], xmm1 + movdqa [r0+0x10], xmm1 + movdqa [r0+0x20], xmm1 + movdqa [r0+0x30], xmm1 + ret + +;******************************************************************************* +; Macros and other preprocessor constants +;******************************************************************************* + +;******************************************************************************* +; Code +;******************************************************************************* + +SECTION .text + + + + +;******************************************************************************* +; void PixelAvgWidthEq4_mmx( uint8_t *pDst, int iDstStride, +; uint8_t *pSrcA, int iSrcAStride, +; uint8_t *pSrcB, int iSrcBStride, +; int iHeight ); +;******************************************************************************* +WELS_EXTERN PixelAvgWidthEq4_mmx + + %assign push_num 0 + LOAD_7_PARA + + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + SIGN_EXTENSION r6, r6d + +ALIGN 4 +.height_loop: + movd mm0, [r4] + pavgb mm0, [r2] + movd [r0], mm0 + + dec r6 + lea r0, [r0+r1] + lea r2, [r2+r3] + lea r4, [r4+r5] + jne .height_loop + + WELSEMMS + LOAD_7_PARA_POP + ret + + +;******************************************************************************* +; void PixelAvgWidthEq8_mmx( uint8_t *pDst, int iDstStride, +; uint8_t *pSrcA, int iSrcAStride, +; uint8_t *pSrcB, int iSrcBStride, +; int iHeight ); +;******************************************************************************* +WELS_EXTERN PixelAvgWidthEq8_mmx + %assign push_num 0 + LOAD_7_PARA + + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + SIGN_EXTENSION r6, r6d + +ALIGN 4 +.height_loop: + movq mm0, [r2] + pavgb mm0, [r4] + movq [r0], mm0 + movq mm0, [r2+r3] + pavgb mm0, [r4+r5] + movq [r0+r1], mm0 + + lea r2, [r2+2*r3] + lea r4, [r4+2*r5] + lea r0, [r0+2*r1] + + sub r6, 2 + jnz .height_loop + + WELSEMMS + LOAD_7_PARA_POP + ret + + + +;******************************************************************************* +; void PixelAvgWidthEq16_sse2( uint8_t *pDst, int iDstStride, +; uint8_t *pSrcA, int iSrcAStride, +; uint8_t *pSrcB, int iSrcBStride, +; int iHeight ); +;******************************************************************************* +WELS_EXTERN PixelAvgWidthEq16_sse2 + + %assign push_num 0 + LOAD_7_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + SIGN_EXTENSION r6, r6d +ALIGN 4 +.height_loop: + movdqu xmm0, [r2] + movdqu xmm1, [r4] + pavgb xmm0, xmm1 + ;pavgb xmm0, [r4] + movdqu [r0], xmm0 + + movdqu xmm0, [r2+r3] + movdqu xmm1, [r4+r5] + pavgb xmm0, xmm1 + movdqu [r0+r1], xmm0 + + movdqu xmm0, [r2+2*r3] + movdqu xmm1, [r4+2*r5] + pavgb xmm0, xmm1 + movdqu [r0+2*r1], xmm0 + + lea r2, [r2+2*r3] + lea r4, [r4+2*r5] + lea r0, [r0+2*r1] + + movdqu xmm0, [r2+r3] + movdqu xmm1, [r4+r5] + pavgb xmm0, xmm1 + movdqu [r0+r1], xmm0 + + lea r2, [r2+2*r3] + lea r4, [r4+2*r5] + lea r0, [r0+2*r1] + + sub r6, 4 + jne .height_loop + + WELSEMMS + LOAD_7_PARA_POP + ret + +; load_instr=%1 store_instr=%2 p_dst=%3 i_dststride=%4 p_src=%5 i_srcstride=%6 cnt=%7 r_tmp=%8,%9 mm_tmp=%10,%11 +%macro CopyStrided4N 11 + lea %8, [3 * %6] + lea %9, [3 * %4] +ALIGN 32 +%%loop: + %1 %10, [%5] + %1 %11, [%5 + %6] + %2 [%3], %10 + %2 [%3 + %4], %11 + %1 %10, [%5 + 2 * %6] + %1 %11, [%5 + %8] + %2 [%3 + 2 * %4], %10 + %2 [%3 + %9], %11 + lea %5, [%5 + 4 * %6] + lea %3, [%3 + 4 * %4] + sub %7, 4 + jg %%loop +%endmacro + +;******************************************************************************* +; void McCopyWidthEq8_mmx( uint8_t *pSrc, int iSrcStride, +; uint8_t *pDst, int iDstStride, int iHeight ) +;******************************************************************************* +WELS_EXTERN McCopyWidthEq8_mmx + %assign push_num 0 +%ifdef X86_32 + push r5 + push r6 + %assign push_num 2 +%endif + LOAD_5_PARA + + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + + CopyStrided4N movq, movq, r2, r3, r0, r1, r4, r5, r6, mm0, mm1 + + WELSEMMS + LOAD_5_PARA_POP +%ifdef X86_32 + pop r6 + pop r5 +%endif + ret + + +;******************************************************************************* +; void McCopyWidthEq16_sse2( uint8_t *pSrc, int iSrcStride, uint8_t *pDst, int iDstStride, int iHeight ) +;******************************************************************************* +;read unaligned memory +%macro SSE_READ_UNA 2 + movq %1, [%2] + movhps %1, [%2+8] +%endmacro + +;write unaligned memory +%macro SSE_WRITE_UNA 2 + movq [%1], %2 + movhps [%1+8], %2 +%endmacro +WELS_EXTERN McCopyWidthEq16_sse2 + %assign push_num 0 + LOAD_5_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d +ALIGN 4 +.height_loop: + SSE_READ_UNA xmm0, r0 + SSE_READ_UNA xmm1, r0+r1 + SSE_WRITE_UNA r2, xmm0 + SSE_WRITE_UNA r2+r3, xmm1 + + sub r4, 2 + lea r0, [r0+r1*2] + lea r2, [r2+r3*2] + jnz .height_loop + + LOAD_5_PARA_POP + ret + + +;******************************************************************************* +; void McCopyWidthEq16_sse3( uint8_t *pSrc, int iSrcStride, uint8_t *pDst, int iDstStride, int iHeight ) +;******************************************************************************* +WELS_EXTERN McCopyWidthEq16_sse3 + %assign push_num 0 +%ifdef X86_32 + push r5 + push r6 + %assign push_num 2 +%endif + LOAD_5_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + + CopyStrided4N lddqu, MOVDQ, r2, r3, r0, r1, r4, r5, r6, xmm0, xmm1 + + LOAD_5_PARA_POP +%ifdef X86_32 + pop r6 + pop r5 +%endif + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mc_chroma.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mc_chroma.asm new file mode 100644 index 000000000..02ab26a89 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mc_chroma.asm @@ -0,0 +1,313 @@ +;*! +;* \copy +;* Copyright (c) 2004-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* mc_chroma.asm +;* +;* Abstract +;* mmx motion compensation for chroma +;* +;* History +;* 10/13/2004 Created +;* +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;*********************************************************************** +; Local Data (Read Only) +;*********************************************************************** + +SECTION .rodata align=16 + +;*********************************************************************** +; Various memory constants (trigonometric values or rounding values) +;*********************************************************************** + +ALIGN 16 +h264_d0x20_sse2: + dw 32,32,32,32,32,32,32,32 +ALIGN 16 +h264_d0x20_mmx: + dw 32,32,32,32 + + +;============================================================================= +; Code +;============================================================================= + +SECTION .text + +;******************************************************************************* +; void McChromaWidthEq4_mmx( const uint8_t *src, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; const uint8_t *pABCD, +; int32_t iHeigh ); +;******************************************************************************* +WELS_EXTERN McChromaWidthEq4_mmx + %assign push_num 0 + LOAD_6_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + + movd mm3, [r4]; [eax] + WELS_Zero mm7 + punpcklbw mm3, mm3 + movq mm4, mm3 + punpcklwd mm3, mm3 + punpckhwd mm4, mm4 + + movq mm5, mm3 + punpcklbw mm3, mm7 + punpckhbw mm5, mm7 + + movq mm6, mm4 + punpcklbw mm4, mm7 + punpckhbw mm6, mm7 + + lea r4, [r0 + r1] ;lea ebx, [esi + eax] + movd mm0, [r0] + movd mm1, [r0+1] + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 +.xloop: + + pmullw mm0, mm3 + pmullw mm1, mm5 + paddw mm0, mm1 + + movd mm1, [r4] + punpcklbw mm1, mm7 + movq mm2, mm1 + pmullw mm1, mm4 + paddw mm0, mm1 + + movd mm1, [r4+1] + punpcklbw mm1, mm7 + movq mm7, mm1 + pmullw mm1,mm6 + paddw mm0, mm1 + movq mm1,mm7 + +%ifdef X86_32_PICASM + pcmpeqw mm7, mm7 + psrlw mm7, 15 + psllw mm7, 5 + paddw mm0, mm7 +%else + paddw mm0, [h264_d0x20_mmx] +%endif + psrlw mm0, 6 + + WELS_Zero mm7 + packuswb mm0, mm7 + movd [r2], mm0 + + movq mm0, mm2 + + lea r2, [r2 + r3] + lea r4, [r4 + r1] + + dec r5 + jnz near .xloop + WELSEMMS + LOAD_6_PARA_POP + ret + + +;******************************************************************************* +; void McChromaWidthEq8_sse2( const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; const uint8_t *pABCD, +; int32_t iheigh ); +;******************************************************************************* +WELS_EXTERN McChromaWidthEq8_sse2 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + + movd xmm3, [r4] + WELS_Zero xmm7 + punpcklbw xmm3, xmm3 + punpcklwd xmm3, xmm3 + + movdqa xmm4, xmm3 + punpckldq xmm3, xmm3 + punpckhdq xmm4, xmm4 + movdqa xmm5, xmm3 + movdqa xmm6, xmm4 + + punpcklbw xmm3, xmm7 + punpckhbw xmm5, xmm7 + punpcklbw xmm4, xmm7 + punpckhbw xmm6, xmm7 + + lea r4, [r0 + r1] ;lea ebx, [esi + eax] + movq xmm0, [r0] + movq xmm1, [r0+1] + punpcklbw xmm0, xmm7 + punpcklbw xmm1, xmm7 +.xloop: + + pmullw xmm0, xmm3 + pmullw xmm1, xmm5 + paddw xmm0, xmm1 + + movq xmm1, [r4] + punpcklbw xmm1, xmm7 + movdqa xmm2, xmm1 + pmullw xmm1, xmm4 + paddw xmm0, xmm1 + + movq xmm1, [r4+1] + punpcklbw xmm1, xmm7 + movdqa xmm7, xmm1 + pmullw xmm1, xmm6 + paddw xmm0, xmm1 + movdqa xmm1,xmm7 + +%ifdef X86_32_PICASM + pcmpeqw xmm7, xmm7 + psrlw xmm7, 15 + psllw xmm7, 5 + paddw xmm0, xmm7 +%else + paddw xmm0, [h264_d0x20_sse2] +%endif + psrlw xmm0, 6 + + WELS_Zero xmm7 + packuswb xmm0, xmm7 + movq [r2], xmm0 + + movdqa xmm0, xmm2 + + lea r2, [r2 + r3] + lea r4, [r4 + r1] + + dec r5 + jnz near .xloop + + POP_XMM + LOAD_6_PARA_POP + + ret + + + + +;*********************************************************************** +; void McChromaWidthEq8_ssse3( const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; const uint8_t *pABCD, +; int32_t iHeigh); +;*********************************************************************** +WELS_EXTERN McChromaWidthEq8_ssse3 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + + pxor xmm7, xmm7 + movd xmm5, [r4] + punpcklwd xmm5, xmm5 + punpckldq xmm5, xmm5 + movdqa xmm6, xmm5 + punpcklqdq xmm5, xmm5 + punpckhqdq xmm6, xmm6 + + sub r2, r3 ;sub esi, edi + sub r2, r3 +%ifdef X86_32_PICASM + pcmpeqw xmm7, xmm7 + psrlw xmm7, 15 + psllw xmm7, 5 +%else + movdqa xmm7, [h264_d0x20_sse2] +%endif + + movdqu xmm0, [r0] + movdqa xmm1, xmm0 + psrldq xmm1, 1 + punpcklbw xmm0, xmm1 + +.hloop_chroma: + lea r2, [r2+2*r3] + + movdqu xmm2, [r0+r1] + movdqa xmm3, xmm2 + psrldq xmm3, 1 + punpcklbw xmm2, xmm3 + movdqa xmm4, xmm2 + + pmaddubsw xmm0, xmm5 + pmaddubsw xmm2, xmm6 + paddw xmm0, xmm2 + paddw xmm0, xmm7 + psrlw xmm0, 6 + packuswb xmm0, xmm0 + movq [r2],xmm0 + + lea r0, [r0+2*r1] + movdqu xmm2, [r0] + movdqa xmm3, xmm2 + psrldq xmm3, 1 + punpcklbw xmm2, xmm3 + movdqa xmm0, xmm2 + + pmaddubsw xmm4, xmm5 + pmaddubsw xmm2, xmm6 + paddw xmm4, xmm2 + paddw xmm4, xmm7 + psrlw xmm4, 6 + packuswb xmm4, xmm4 + movq [r2+r3],xmm4 + + sub r5, 2 + jnz .hloop_chroma + + POP_XMM + LOAD_6_PARA_POP + + ret + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mc_luma.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mc_luma.asm new file mode 100644 index 000000000..624e1fef2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/mc_luma.asm @@ -0,0 +1,4490 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* mc_luma.asm +;* +;* Abstract +;* sse2 motion compensation +;* +;* History +;* 17/08/2009 Created +;* +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;******************************************************************************* +; Local Data (Read Only) +;******************************************************************************* +%ifdef X86_32_PICASM +SECTION .text align=32 +%else +SECTION .rodata align=32 +%endif + +;******************************************************************************* +; Various memory constants (trigonometric values or rounding values) +;******************************************************************************* + +%ifdef HAVE_AVX2 +ALIGN 32 +dwm32768_256: + times 16 dw -32768 +maddubsw_m2p10_m40m40_p10m2_p0p0_256: + times 4 db -2, 10, -40, -40, 10, -2, 0, 0 +dwm1024_256: + times 16 dw -1024 +dd32768_256: + times 8 dd 32768 +maddubsw_p1m5_256: + times 16 db 1, -5 +maddubsw_m5p1_256: + times 16 db -5, 1 +db20_256: + times 32 db 20 +maddubsw_m5p20_256: + times 16 db -5, 20 +maddubsw_p20m5_256: + times 16 db 20, -5 +h264_w0x10_256: + times 16 dw 16 +dw32_256: + times 16 dw 32 +%endif ; HAVE_AVX2 + +ALIGN 16 +shufb_32435465768798A9: + db 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9 +shufb_011267784556ABBC: + db 0, 1, 1, 2, 6, 7, 7, 8, 4, 5, 5, 6, 0Ah, 0Bh, 0Bh, 0Ch +maddubsw_p1m5_p1m5_m5p1_m5p1_128: + times 2 db 1, -5, 1, -5, -5, 1, -5, 1 +maddubsw_m2p10_m40m40_p10m2_p0p0_128: + times 2 db -2, 10, -40, -40, 10, -2, 0, 0 +dwm1024_128: + times 8 dw -1024 +dd32768_128: + times 4 dd 32768 +maddubsw_p1m5_128: + times 8 db 1, -5 +maddubsw_m5p1_128: + times 8 db -5, 1 +db20_128: + times 16 db 20 +maddubsw_m5p20_128: + times 8 db -5, 20 +maddubsw_p20m5_128: + times 8 db 20, -5 +h264_w0x10_1: + dw 16, 16, 16, 16, 16, 16, 16, 16 +ALIGN 16 +h264_mc_hc_32: + dw 32, 32, 32, 32, 32, 32, 32, 32 + + +;******************************************************************************* +; Code +;******************************************************************************* + +SECTION .text + +%ifdef X86_32_PICASM + +%macro MOVEIMM_DW16 1 + pcmpeqw %1, %1 + psrlw %1, 15 + psllw %1, 4 +%endmacro + +%endif + +;******************************************************************************* +; void McHorVer20WidthEq4_mmx( const uint8_t *pSrc, +; int iSrcStride, +; uint8_t *pDst, +; int iDstStride, +; int iHeight) +;******************************************************************************* +WELS_EXTERN McHorVer20WidthEq4_mmx + %assign push_num 0 + LOAD_5_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + + sub r0, 2 + WELS_Zero mm7 +%ifdef X86_32_PICASM + MOVEIMM_DW16 mm6 +%else + movq mm6, [h264_w0x10_1] +%endif +.height_loop: + movd mm0, [r0] + punpcklbw mm0, mm7 + movd mm1, [r0+5] + punpcklbw mm1, mm7 + movd mm2, [r0+1] + punpcklbw mm2, mm7 + movd mm3, [r0+4] + punpcklbw mm3, mm7 + movd mm4, [r0+2] + punpcklbw mm4, mm7 + movd mm5, [r0+3] + punpcklbw mm5, mm7 + + paddw mm2, mm3 + paddw mm4, mm5 + psllw mm4, 2 + psubw mm4, mm2 + paddw mm0, mm1 + paddw mm0, mm4 + psllw mm4, 2 + paddw mm0, mm4 + paddw mm0, mm6 + psraw mm0, 5 + packuswb mm0, mm7 + movd [r2], mm0 + + add r0, r1 + add r2, r3 + dec r4 + jnz .height_loop + + WELSEMMS + LOAD_5_PARA_POP + ret + +;******************************************************************************* +; Macros and other preprocessor constants +;******************************************************************************* + + +%macro SSE_LOAD_8P 3 + movq %1, %3 + punpcklbw %1, %2 +%endmacro + +%macro FILTER_HV_W8 9 + paddw %1, %6 + paddw %1, [pic(h264_w0x10_1)] + movdqa %8, %3 + movdqa %7, %2 + paddw %8, %4 + paddw %7, %5 + psllw %8, 2 + psubw %8, %7 + paddw %1, %8 + psllw %8, 2 + paddw %1, %8 + psraw %1, 5 + WELS_Zero %8 + packuswb %1, %8 + movq %9, %1 +%endmacro + + +%macro FILTER_HV_W4 9 +paddw %1, %6 +paddw %1, [pic(h264_w0x10_1)] +movdqa %8, %3 +movdqa %7, %2 +paddw %8, %4 +paddw %7, %5 +psllw %8, 2 +psubw %8, %7 +paddw %1, %8 +psllw %8, 2 +paddw %1, %8 +psraw %1, 5 +WELS_Zero %8 +packuswb %1, %8 +movd %9, %1 +%endmacro + + +;******************************************************************************* +; Code +;******************************************************************************* + +SECTION .text + +;*********************************************************************** +; void McHorVer22Width8HorFirst_sse2(const int16_t *pSrc, +; int16_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride +; int32_t iHeight +; ) +;*********************************************************************** +WELS_EXTERN McHorVer22Width8HorFirst_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + pxor xmm7, xmm7 + + sub r0, r1 ;;;;;;;;need more 5 lines. + sub r0, r1 + +.yloop_width_8: + movq xmm0, [r0] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3] + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + psllw xmm4, 2 + psubw xmm4, xmm2 + paddw xmm0, xmm1 + paddw xmm0, xmm4 + psllw xmm4, 2 + paddw xmm0, xmm4 + movdqa [r2], xmm0 + + add r0, r1 + add r2, r3 + dec r4 + jnz .yloop_width_8 + POP_XMM + LOAD_5_PARA_POP + ret + +;******************************************************************************* +; void McHorVer20WidthEq8_sse2( const uint8_t *pSrc, +; int iSrcStride, +; uint8_t *pDst, +; int iDstStride, +; int iHeight, +; ); +;******************************************************************************* +WELS_EXTERN McHorVer20WidthEq8_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + lea r0, [r0-2] ;pSrc -= 2; + + pxor xmm7, xmm7 +%ifdef X86_32_PICASM + MOVEIMM_DW16 xmm6 +%else + movdqa xmm6, [h264_w0x10_1] +%endif +.y_loop: + movq xmm0, [r0] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3] + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + psllw xmm4, 2 + psubw xmm4, xmm2 + paddw xmm0, xmm1 + paddw xmm0, xmm4 + psllw xmm4, 2 + paddw xmm0, xmm4 + paddw xmm0, xmm6 + psraw xmm0, 5 + + packuswb xmm0, xmm7 + movq [r2], xmm0 + + lea r2, [r2+r3] + lea r0, [r0+r1] + dec r4 + jnz near .y_loop + + POP_XMM + LOAD_5_PARA_POP + ret + +;******************************************************************************* +; void McHorVer20WidthEq16_sse2( const uint8_t *pSrc, +; int iSrcStride, +; uint8_t *pDst, +; int iDstStride, +; int iHeight, +; ); +;******************************************************************************* +WELS_EXTERN McHorVer20WidthEq16_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + lea r0, [r0-2] ;pSrc -= 2; + + pxor xmm7, xmm7 +%ifdef X86_32_PICASM + MOVEIMM_DW16 xmm6 +%else + movdqa xmm6, [h264_w0x10_1] +%endif +.y_loop: + + movq xmm0, [r0] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3] + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + psllw xmm4, 2 + psubw xmm4, xmm2 + paddw xmm0, xmm1 + paddw xmm0, xmm4 + psllw xmm4, 2 + paddw xmm0, xmm4 + paddw xmm0, xmm6 + psraw xmm0, 5 + packuswb xmm0, xmm7 + movq [r2], xmm0 + + movq xmm0, [r0+8] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5+8] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1+8] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4+8] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2+8] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3+8] + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + psllw xmm4, 2 + psubw xmm4, xmm2 + paddw xmm0, xmm1 + paddw xmm0, xmm4 + psllw xmm4, 2 + paddw xmm0, xmm4 + paddw xmm0, xmm6 + psraw xmm0, 5 + packuswb xmm0, xmm7 + movq [r2+8], xmm0 + + lea r2, [r2+r3] + lea r0, [r0+r1] + dec r4 + jnz near .y_loop + + POP_XMM + LOAD_5_PARA_POP + ret + + +;******************************************************************************* +; void McHorVer02WidthEq8_sse2( const uint8_t *pSrc, +; int iSrcStride, +; uint8_t *pDst, +; int iDstStride, +; int iHeight ) +;******************************************************************************* +WELS_EXTERN McHorVer02WidthEq8_sse2 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + sub r0, r1 + sub r0, r1 + + WELS_Zero xmm7 + + SSE_LOAD_8P xmm0, xmm7, [r0] + SSE_LOAD_8P xmm1, xmm7, [r0+r1] + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm2, xmm7, [r0] + SSE_LOAD_8P xmm3, xmm7, [r0+r1] + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm4, xmm7, [r0] + SSE_LOAD_8P xmm5, xmm7, [r0+r1] + +.start: + FILTER_HV_W8 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] + dec r4 + jz near .xx_exit + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm6, xmm7, [r0] + FILTER_HV_W8 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r2+r3] + dec r4 + jz near .xx_exit + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm7, xmm0, [r0+r1] + FILTER_HV_W8 xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [r2] + dec r4 + jz near .xx_exit + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm0, xmm1, [r0] + FILTER_HV_W8 xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, [r2+r3] + dec r4 + jz near .xx_exit + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm1, xmm2, [r0+r1] + FILTER_HV_W8 xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, [r2] + dec r4 + jz near .xx_exit + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm2, xmm3, [r0] + FILTER_HV_W8 xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, [r2+r3] + dec r4 + jz near .xx_exit + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm3, xmm4, [r0+r1] + FILTER_HV_W8 xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, [r2] + dec r4 + jz near .xx_exit + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm4, xmm5, [r0] + FILTER_HV_W8 xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, [r2+r3] + dec r4 + jz near .xx_exit + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm5, xmm6, [r0+r1] + jmp near .start + +.xx_exit: + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + + + +;*********************************************************************** +; void McHorVer02Height9Or17_sse2( const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight ) +;*********************************************************************** +WELS_EXTERN McHorVer02Height9Or17_sse2 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + push r13 + push r14 + mov r12, r0 + mov r13, r2 + mov r14, r5 +%endif + + shr r4, 3 + sub r0, r1 + sub r0, r1 + +.xloop: + WELS_Zero xmm7 + SSE_LOAD_8P xmm0, xmm7, [r0] + SSE_LOAD_8P xmm1, xmm7, [r0+r1] + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm2, xmm7, [r0] + SSE_LOAD_8P xmm3, xmm7, [r0+r1] + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm4, xmm7, [r0] + SSE_LOAD_8P xmm5, xmm7, [r0+r1] + + FILTER_HV_W8 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] + dec r5 + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm6, xmm7, [r0] + movdqa xmm0,xmm1 + movdqa xmm1,xmm2 + movdqa xmm2,xmm3 + movdqa xmm3,xmm4 + movdqa xmm4,xmm5 + movdqa xmm5,xmm6 + add r2, r3 + sub r0, r1 + +.start: + FILTER_HV_W8 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm6, xmm7, [r0] + FILTER_HV_W8 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm7, xmm0, [r0+r1] + FILTER_HV_W8 xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm0, xmm1, [r0] + FILTER_HV_W8 xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, [r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm1, xmm2, [r0+r1] + FILTER_HV_W8 xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm2, xmm3, [r0] + FILTER_HV_W8 xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, [r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm3, xmm4, [r0+r1] + FILTER_HV_W8 xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + SSE_LOAD_8P xmm4, xmm5, [r0] + FILTER_HV_W8 xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, [r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + SSE_LOAD_8P xmm5, xmm6, [r0+r1] + jmp near .start + +.x_loop_dec: + dec r4 + jz near .xx_exit +%ifdef X86_32 + mov r0, arg1 + mov r2, arg3 + mov r5, arg6 +%else + mov r0, r12 + mov r2, r13 + mov r5, r14 +%endif + sub r0, r1 + sub r0, r1 + add r0, 8 + add r2, 8 + jmp near .xloop + +.xx_exit: +%ifndef X86_32 + pop r14 + pop r13 + pop r12 +%endif + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret + + +;*********************************************************************** +; void McHorVer02Height5_sse2( const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight ) +;*********************************************************************** +WELS_EXTERN McHorVer02Height5_sse2 +%assign push_num 0 +INIT_X86_32_PIC r6 +LOAD_6_PARA +PUSH_XMM 8 +SIGN_EXTENSION r1, r1d +SIGN_EXTENSION r3, r3d +SIGN_EXTENSION r4, r4d +SIGN_EXTENSION r5, r5d + +%ifndef X86_32 +push r12 +push r13 +push r14 +mov r12, r0 +mov r13, r2 +mov r14, r5 +%endif + +shr r4, 2 +sub r0, r1 +sub r0, r1 + +.xloop: +WELS_Zero xmm7 +SSE_LOAD_8P xmm0, xmm7, [r0] +SSE_LOAD_8P xmm1, xmm7, [r0+r1] +lea r0, [r0+2*r1] +SSE_LOAD_8P xmm2, xmm7, [r0] +SSE_LOAD_8P xmm3, xmm7, [r0+r1] +lea r0, [r0+2*r1] +SSE_LOAD_8P xmm4, xmm7, [r0] +SSE_LOAD_8P xmm5, xmm7, [r0+r1] + +FILTER_HV_W4 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] +dec r5 +lea r0, [r0+2*r1] +SSE_LOAD_8P xmm6, xmm7, [r0] +movdqa xmm0,xmm1 +movdqa xmm1,xmm2 +movdqa xmm2,xmm3 +movdqa xmm3,xmm4 +movdqa xmm4,xmm5 +movdqa xmm5,xmm6 +add r2, r3 +sub r0, r1 + +.start: +FILTER_HV_W4 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +SSE_LOAD_8P xmm6, xmm7, [r0] +FILTER_HV_W4 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +SSE_LOAD_8P xmm7, xmm0, [r0+r1] +FILTER_HV_W4 xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +SSE_LOAD_8P xmm0, xmm1, [r0] +FILTER_HV_W4 xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, [r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +SSE_LOAD_8P xmm1, xmm2, [r0+r1] +FILTER_HV_W4 xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +SSE_LOAD_8P xmm2, xmm3, [r0] +FILTER_HV_W4 xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, [r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +SSE_LOAD_8P xmm3, xmm4, [r0+r1] +FILTER_HV_W4 xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +SSE_LOAD_8P xmm4, xmm5, [r0] +FILTER_HV_W4 xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, [r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +SSE_LOAD_8P xmm5, xmm6, [r0+r1] +jmp near .start + +.x_loop_dec: +dec r4 +jz near .xx_exit +%ifdef X86_32 +mov r0, arg1 +mov r2, arg3 +mov r5, arg6 +%else +mov r0, r12 +mov r2, r13 +mov r5, r14 +%endif +sub r0, r1 +sub r0, r1 +add r0, 4 +add r2, 4 +jmp near .xloop + +.xx_exit: +%ifndef X86_32 +pop r14 +pop r13 +pop r12 +%endif +POP_XMM +LOAD_6_PARA_POP +DEINIT_X86_32_PIC +ret + + +;*********************************************************************** +; void McHorVer20Width9Or17_sse2( const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight +; ); +;*********************************************************************** +WELS_EXTERN McHorVer20Width9Or17_sse2 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + sub r0, 2 + pxor xmm7, xmm7 + + cmp r4, 9 + jne near .width_17 + +.yloop_width_9: + movq xmm0, [r0] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3] + punpcklbw xmm5, xmm7 + + movdqa xmm7, xmm2 + paddw xmm7, xmm3 + movdqa xmm6, xmm4 + paddw xmm6, xmm5 + psllw xmm6, 2 + psubw xmm6, xmm7 + paddw xmm0, xmm1 + paddw xmm0, xmm6 + psllw xmm6, 2 + paddw xmm0, xmm6 + paddw xmm0, [pic(h264_w0x10_1)] + psraw xmm0, 5 + packuswb xmm0, xmm0 + movd [r2], xmm0 + + pxor xmm7, xmm7 + movq xmm0, [r0+6] + punpcklbw xmm0, xmm7 + + paddw xmm4, xmm1 + paddw xmm5, xmm3 + psllw xmm5, 2 + psubw xmm5, xmm4 + paddw xmm2, xmm0 + paddw xmm2, xmm5 + psllw xmm5, 2 + paddw xmm2, xmm5 + paddw xmm2, [pic(h264_w0x10_1)] + psraw xmm2, 5 + packuswb xmm2, xmm2 + movq [r2+1], xmm2 + + add r0, r1 + add r2, r3 + dec r5 + jnz .yloop_width_9 + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret + + +.width_17: +.yloop_width_17: + movq xmm0, [r0] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3] + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + psllw xmm4, 2 + psubw xmm4, xmm2 + paddw xmm0, xmm1 + paddw xmm0, xmm4 + psllw xmm4, 2 + paddw xmm0, xmm4 + paddw xmm0, [pic(h264_w0x10_1)] + psraw xmm0, 5 + packuswb xmm0, xmm0 + movq [r2], xmm0 + + movq xmm0, [r0+8] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5+8] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1+8] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4+8] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2+8] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3+8] + punpcklbw xmm5, xmm7 + + movdqa xmm7, xmm2 + paddw xmm7, xmm3 + movdqa xmm6, xmm4 + paddw xmm6, xmm5 + psllw xmm6, 2 + psubw xmm6, xmm7 + paddw xmm0, xmm1 + paddw xmm0, xmm6 + psllw xmm6, 2 + paddw xmm0, xmm6 + paddw xmm0, [pic(h264_w0x10_1)] + psraw xmm0, 5 + packuswb xmm0, xmm0 + movd [r2+8], xmm0 + + + pxor xmm7, xmm7 + movq xmm0, [r0+6+8] + punpcklbw xmm0, xmm7 + + paddw xmm4, xmm1 + paddw xmm5, xmm3 + psllw xmm5, 2 + psubw xmm5, xmm4 + paddw xmm2, xmm0 + paddw xmm2, xmm5 + psllw xmm5, 2 + paddw xmm2, xmm5 + paddw xmm2, [pic(h264_w0x10_1)] + psraw xmm2, 5 + packuswb xmm2, xmm2 + movq [r2+9], xmm2 + add r0, r1 + add r2, r3 + dec r5 + jnz .yloop_width_17 + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret + + +;*********************************************************************** +; void McHorVer20Width5_sse2( const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight +; ); +;*********************************************************************** +WELS_EXTERN McHorVer20Width5_sse2 +%assign push_num 0 +INIT_X86_32_PIC r6 +LOAD_6_PARA +PUSH_XMM 8 +SIGN_EXTENSION r1, r1d +SIGN_EXTENSION r3, r3d +SIGN_EXTENSION r4, r4d +SIGN_EXTENSION r5, r5d +sub r0, 2 +pxor xmm7, xmm7 + +.yloop_width_5: +movq xmm0, [r0] +punpcklbw xmm0, xmm7 +movq xmm1, [r0+5] +punpcklbw xmm1, xmm7 +movq xmm2, [r0+1] +punpcklbw xmm2, xmm7 +movq xmm3, [r0+4] +punpcklbw xmm3, xmm7 +movq xmm4, [r0+2] +punpcklbw xmm4, xmm7 +movq xmm5, [r0+3] +punpcklbw xmm5, xmm7 + +movdqa xmm7, xmm2 +paddw xmm7, xmm3 +movdqa xmm6, xmm4 +paddw xmm6, xmm5 +psllw xmm6, 2 +psubw xmm6, xmm7 +paddw xmm0, xmm1 +paddw xmm0, xmm6 +psllw xmm6, 2 +paddw xmm0, xmm6 +paddw xmm0, [pic(h264_w0x10_1)] +psraw xmm0, 5 +packuswb xmm0, xmm0 +movd [r2], xmm0 + +pxor xmm7, xmm7 +movq xmm0, [r0+6] +punpcklbw xmm0, xmm7 + +paddw xmm4, xmm1 +paddw xmm5, xmm3 +psllw xmm5, 2 +psubw xmm5, xmm4 +paddw xmm2, xmm0 +paddw xmm2, xmm5 +psllw xmm5, 2 +paddw xmm2, xmm5 +paddw xmm2, [pic(h264_w0x10_1)] +psraw xmm2, 5 +packuswb xmm2, xmm2 +movd [r2+1], xmm2 + +add r0, r1 +add r2, r3 +dec r5 +jnz .yloop_width_5 +POP_XMM +LOAD_6_PARA_POP +DEINIT_X86_32_PIC +ret + + +;*********************************************************************** +;void McHorVer22HorFirst_sse2 +; (const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t * pTap, +; int32_t iTapStride, +; int32_t iWidth,int32_t iHeight); +;*********************************************************************** +WELS_EXTERN McHorVer22HorFirst_sse2 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + pxor xmm7, xmm7 + sub r0, r1 ;;;;;;;;need more 5 lines. + sub r0, r1 + + cmp r4, 9 + jne near .width_17 + +.yloop_width_9: + movq xmm0, [r0] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3] + punpcklbw xmm5, xmm7 + + movdqa xmm7, xmm2 + paddw xmm7, xmm3 + movdqa xmm6, xmm4 + paddw xmm6, xmm5 + psllw xmm6, 2 + psubw xmm6, xmm7 + paddw xmm0, xmm1 + paddw xmm0, xmm6 + psllw xmm6, 2 + paddw xmm0, xmm6 + movd [r2], xmm0 + + pxor xmm7, xmm7 + movq xmm0, [r0+6] + punpcklbw xmm0, xmm7 + + paddw xmm4, xmm1 + paddw xmm5, xmm3 + psllw xmm5, 2 + psubw xmm5, xmm4 + paddw xmm2, xmm0 + paddw xmm2, xmm5 + psllw xmm5, 2 + paddw xmm2, xmm5 + movq [r2+2], xmm2 + movhps [r2+2+8], xmm2 + + add r0, r1 + add r2, r3 + dec r5 + jnz .yloop_width_9 + POP_XMM + LOAD_6_PARA_POP + ret + + +.width_17: +.yloop_width_17: + movq xmm0, [r0] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3] + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + psllw xmm4, 2 + psubw xmm4, xmm2 + paddw xmm0, xmm1 + paddw xmm0, xmm4 + psllw xmm4, 2 + paddw xmm0, xmm4 + movdqa [r2], xmm0 + + movq xmm0, [r0+8] + punpcklbw xmm0, xmm7 + movq xmm1, [r0+5+8] + punpcklbw xmm1, xmm7 + movq xmm2, [r0+1+8] + punpcklbw xmm2, xmm7 + movq xmm3, [r0+4+8] + punpcklbw xmm3, xmm7 + movq xmm4, [r0+2+8] + punpcklbw xmm4, xmm7 + movq xmm5, [r0+3+8] + punpcklbw xmm5, xmm7 + + movdqa xmm7, xmm2 + paddw xmm7, xmm3 + movdqa xmm6, xmm4 + paddw xmm6, xmm5 + psllw xmm6, 2 + psubw xmm6, xmm7 + paddw xmm0, xmm1 + paddw xmm0, xmm6 + psllw xmm6, 2 + paddw xmm0, xmm6 + movd [r2+16], xmm0 + + + pxor xmm7, xmm7 + movq xmm0, [r0+6+8] + punpcklbw xmm0, xmm7 + + paddw xmm4, xmm1 + paddw xmm5, xmm3 + psllw xmm5, 2 + psubw xmm5, xmm4 + paddw xmm2, xmm0 + paddw xmm2, xmm5 + psllw xmm5, 2 + paddw xmm2, xmm5 + movq [r2+18], xmm2 + movhps [r2+18+8], xmm2 + + add r0, r1 + add r2, r3 + dec r5 + jnz .yloop_width_17 + POP_XMM + LOAD_6_PARA_POP + ret + + +%macro FILTER_VER 9 + paddw %1, %6 + movdqa %7, %2 + movdqa %8, %3 + + + paddw %7, %5 + paddw %8, %4 + + psubw %1, %7 + psraw %1, 2 + paddw %1, %8 + psubw %1, %7 + psraw %1, 2 + paddw %8, %1 + paddw %8, [pic(h264_mc_hc_32)] + psraw %8, 6 + packuswb %8, %8 + movq %9, %8 +%endmacro +;*********************************************************************** +;void McHorVer22Width8VerLastAlign_sse2( +; const uint8_t *pTap, +; int32_t iTapStride, +; uint8_t * pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer22Width8VerLastAlign_sse2 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d +%ifndef X86_32 + push r12 + push r13 + push r14 + mov r12, r0 + mov r13, r2 + mov r14, r5 +%endif + + shr r4, 3 + +.width_loop: + movdqa xmm0, [r0] + movdqa xmm1, [r0+r1] + lea r0, [r0+2*r1] + movdqa xmm2, [r0] + movdqa xmm3, [r0+r1] + lea r0, [r0+2*r1] + movdqa xmm4, [r0] + movdqa xmm5, [r0+r1] + + FILTER_VER xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] + dec r5 + lea r0, [r0+2*r1] + movdqa xmm6, [r0] + + movdqa xmm0, xmm1 + movdqa xmm1, xmm2 + movdqa xmm2, xmm3 + movdqa xmm3, xmm4 + movdqa xmm4, xmm5 + movdqa xmm5, xmm6 + + add r2, r3 + sub r0, r1 + +.start: + FILTER_VER xmm0,xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqa xmm6, [r0] + FILTER_VER xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,[r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqa xmm7, [r0+r1] + FILTER_VER xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqa xmm0, [r0] + FILTER_VER xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2,[r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqa xmm1, [r0+r1] + FILTER_VER xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,[r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqa xmm2, [r0] + FILTER_VER xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,[r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqa xmm3, [r0+r1] + FILTER_VER xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,[r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqa xmm4, [r0] + FILTER_VER xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,xmm6, [r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqa xmm5, [r0+r1] + jmp near .start + +.x_loop_dec: + dec r4 + jz near .exit +%ifdef X86_32 + mov r0, arg1 + mov r2, arg3 + mov r5, arg6 +%else + mov r0, r12 + mov r2, r13 + mov r5, r14 +%endif + add r0, 16 + add r2, 8 + jmp .width_loop + +.exit: +%ifndef X86_32 + pop r14 + pop r13 + pop r12 +%endif + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +;void McHorVer22Width8VerLastUnAlign_sse2( +; const uint8_t *pTap, +; int32_t iTapStride, +; uint8_t * pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer22Width8VerLastUnAlign_sse2 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d +%ifndef X86_32 + push r12 + push r13 + push r14 + mov r12, r0 + mov r13, r2 + mov r14, r5 +%endif + shr r4, 3 + +.width_loop: + movdqu xmm0, [r0] + movdqu xmm1, [r0+r1] + lea r0, [r0+2*r1] + movdqu xmm2, [r0] + movdqu xmm3, [r0+r1] + lea r0, [r0+2*r1] + movdqu xmm4, [r0] + movdqu xmm5, [r0+r1] + + FILTER_VER xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] + dec r5 + lea r0, [r0+2*r1] + movdqu xmm6, [r0] + + movdqa xmm0, xmm1 + movdqa xmm1, xmm2 + movdqa xmm2, xmm3 + movdqa xmm3, xmm4 + movdqa xmm4, xmm5 + movdqa xmm5, xmm6 + + add r2, r3 + sub r0, r1 + +.start: + FILTER_VER xmm0,xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqu xmm6, [r0] + FILTER_VER xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,[r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqu xmm7, [r0+r1] + FILTER_VER xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqu xmm0, [r0] + FILTER_VER xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2,[r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqu xmm1, [r0+r1] + FILTER_VER xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,[r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqu xmm2, [r0] + FILTER_VER xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,[r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqu xmm3, [r0+r1] + FILTER_VER xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,[r2] + dec r5 + jz near .x_loop_dec + + lea r0, [r0+2*r1] + movdqu xmm4, [r0] + FILTER_VER xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,xmm6, [r2+r3] + dec r5 + jz near .x_loop_dec + + lea r2, [r2+2*r3] + movdqu xmm5, [r0+r1] + jmp near .start + +.x_loop_dec: + dec r4 + jz near .exit +%ifdef X86_32 + mov r0, arg1 + mov r2, arg3 + mov r5, arg6 +%else + mov r0, r12 + mov r2, r13 + mov r5, r14 +%endif + add r0, 16 + add r2, 8 + jmp .width_loop + +.exit: +%ifndef X86_32 + pop r14 + pop r13 + pop r12 +%endif + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret + + +;*********************************************************************** +;void McHorVer22Width5HorFirst_sse2 +; (const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t * pTap, +; int32_t iTapStride, +; int32_t iWidth,int32_t iHeight); +;*********************************************************************** +WELS_EXTERN McHorVer22Width5HorFirst_sse2 +%assign push_num 0 +LOAD_6_PARA +PUSH_XMM 8 +SIGN_EXTENSION r1, r1d +SIGN_EXTENSION r3, r3d +SIGN_EXTENSION r4, r4d +SIGN_EXTENSION r5, r5d +pxor xmm7, xmm7 +sub r0, r1 ;;;;;;;;need more 5 lines. +sub r0, r1 + +.yloop_width_5: +movq xmm0, [r0] +punpcklbw xmm0, xmm7 +movq xmm1, [r0+5] +punpcklbw xmm1, xmm7 +movq xmm2, [r0+1] +punpcklbw xmm2, xmm7 +movq xmm3, [r0+4] +punpcklbw xmm3, xmm7 +movq xmm4, [r0+2] +punpcklbw xmm4, xmm7 +movq xmm5, [r0+3] +punpcklbw xmm5, xmm7 + +movdqa xmm7, xmm2 +paddw xmm7, xmm3 +movdqa xmm6, xmm4 +paddw xmm6, xmm5 +psllw xmm6, 2 +psubw xmm6, xmm7 +paddw xmm0, xmm1 +paddw xmm0, xmm6 +psllw xmm6, 2 +paddw xmm0, xmm6 +movd [r2], xmm0 + +pxor xmm7, xmm7 +movq xmm0, [r0+6] +punpcklbw xmm0, xmm7 + +paddw xmm4, xmm1 +paddw xmm5, xmm3 +psllw xmm5, 2 +psubw xmm5, xmm4 +paddw xmm2, xmm0 +paddw xmm2, xmm5 +psllw xmm5, 2 +paddw xmm2, xmm5 +movq [r2+2], xmm2 +movhps [r2+2+8], xmm2 + +add r0, r1 +add r2, r3 +dec r5 +jnz .yloop_width_5 +POP_XMM +LOAD_6_PARA_POP +ret + + +%macro FILTER_VER_4 9 +paddw %1, %6 +movdqa %7, %2 +movdqa %8, %3 + + +paddw %7, %5 +paddw %8, %4 + +psubw %1, %7 +psraw %1, 2 +paddw %1, %8 +psubw %1, %7 +psraw %1, 2 +paddw %8, %1 +paddw %8, [pic(h264_mc_hc_32)] +psraw %8, 6 +packuswb %8, %8 +movd %9, %8 +%endmacro + + +;*********************************************************************** +;void McHorVer22Width4VerLastAlign_sse2( +; const uint8_t *pTap, +; int32_t iTapStride, +; uint8_t * pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer22Width4VerLastAlign_sse2 +%assign push_num 0 +INIT_X86_32_PIC r6 +LOAD_6_PARA +PUSH_XMM 8 +SIGN_EXTENSION r1, r1d +SIGN_EXTENSION r3, r3d +SIGN_EXTENSION r4, r4d +SIGN_EXTENSION r5, r5d +%ifndef X86_32 +push r12 +push r13 +push r14 +mov r12, r0 +mov r13, r2 +mov r14, r5 +%endif + +shr r4, 2 + +.width_loop: +movdqa xmm0, [r0] +movdqa xmm1, [r0+r1] +lea r0, [r0+2*r1] +movdqa xmm2, [r0] +movdqa xmm3, [r0+r1] +lea r0, [r0+2*r1] +movdqa xmm4, [r0] +movdqa xmm5, [r0+r1] + +FILTER_VER_4 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] +dec r5 +lea r0, [r0+2*r1] +movdqa xmm6, [r0] + +movdqa xmm0, xmm1 +movdqa xmm1, xmm2 +movdqa xmm2, xmm3 +movdqa xmm3, xmm4 +movdqa xmm4, xmm5 +movdqa xmm5, xmm6 + +add r2, r3 +sub r0, r1 + +.start: +FILTER_VER_4 xmm0,xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqa xmm6, [r0] +FILTER_VER_4 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,[r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqa xmm7, [r0+r1] +FILTER_VER_4 xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqa xmm0, [r0] +FILTER_VER_4 xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2,[r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqa xmm1, [r0+r1] +FILTER_VER_4 xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,[r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqa xmm2, [r0] +FILTER_VER_4 xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,[r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqa xmm3, [r0+r1] +FILTER_VER_4 xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,[r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqa xmm4, [r0] +FILTER_VER_4 xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,xmm6, [r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqa xmm5, [r0+r1] +jmp near .start + +.x_loop_dec: +dec r4 +jz near .exit +%ifdef X86_32 +mov r0, arg1 +mov r2, arg3 +mov r5, arg6 +%else +mov r0, r12 +mov r2, r13 +mov r5, r14 +%endif +add r0, 8 +add r2, 4 +jmp .width_loop + +.exit: +%ifndef X86_32 +pop r14 +pop r13 +pop r12 +%endif +POP_XMM +LOAD_6_PARA_POP +DEINIT_X86_32_PIC +ret + + +;*********************************************************************** +;void McHorVer22Width4VerLastUnAlign_sse2( +; const uint8_t *pTap, +; int32_t iTapStride, +; uint8_t * pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer22Width4VerLastUnAlign_sse2 +%assign push_num 0 +INIT_X86_32_PIC r6 +LOAD_6_PARA +PUSH_XMM 8 +SIGN_EXTENSION r1, r1d +SIGN_EXTENSION r3, r3d +SIGN_EXTENSION r4, r4d +SIGN_EXTENSION r5, r5d +%ifndef X86_32 +push r12 +push r13 +push r14 +mov r12, r0 +mov r13, r2 +mov r14, r5 +%endif +shr r4, 2 + +.width_loop: +movdqu xmm0, [r0] +movdqu xmm1, [r0+r1] +lea r0, [r0+2*r1] +movdqu xmm2, [r0] +movdqu xmm3, [r0+r1] +lea r0, [r0+2*r1] +movdqu xmm4, [r0] +movdqu xmm5, [r0+r1] + +FILTER_VER_4 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] +dec r5 +lea r0, [r0+2*r1] +movdqu xmm6, [r0] + +movdqa xmm0, xmm1 +movdqa xmm1, xmm2 +movdqa xmm2, xmm3 +movdqa xmm3, xmm4 +movdqa xmm4, xmm5 +movdqa xmm5, xmm6 + +add r2, r3 +sub r0, r1 + +.start: +FILTER_VER_4 xmm0,xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqu xmm6, [r0] +FILTER_VER_4 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,[r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqu xmm7, [r0+r1] +FILTER_VER_4 xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqu xmm0, [r0] +FILTER_VER_4 xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2,[r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqu xmm1, [r0+r1] +FILTER_VER_4 xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,[r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqu xmm2, [r0] +FILTER_VER_4 xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,[r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqu xmm3, [r0+r1] +FILTER_VER_4 xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,[r2] +dec r5 +jz near .x_loop_dec + +lea r0, [r0+2*r1] +movdqu xmm4, [r0] +FILTER_VER_4 xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,xmm6, [r2+r3] +dec r5 +jz near .x_loop_dec + +lea r2, [r2+2*r3] +movdqu xmm5, [r0+r1] +jmp near .start + +.x_loop_dec: +dec r4 +jz near .exit +%ifdef X86_32 +mov r0, arg1 +mov r2, arg3 +mov r5, arg6 +%else +mov r0, r12 +mov r2, r13 +mov r5, r14 +%endif +add r0, 8 +add r2, 4 +jmp .width_loop + +.exit: +%ifndef X86_32 +pop r14 +pop r13 +pop r12 +%endif +POP_XMM +LOAD_6_PARA_POP +DEINIT_X86_32_PIC +ret + + +; px_ab=%1 px_cd=%2 px_ef=%3 maddubsw_ab=%4 maddubsw_cd=%5 maddubsw_ef=%6 tmp=%7 +%macro SSSE3_FilterVertical_8px 7 + pmaddubsw %1, %4 + movdqa %7, %2 + pmaddubsw %7, %5 + paddw %1, %7 + movdqa %7, %3 + pmaddubsw %7, %6 + paddw %1, %7 + paddw %1, [pic(h264_w0x10_1)] + psraw %1, 5 +%endmacro + +; px_a=%1 px_f=%2 px_bc=%3 px_de=%4 maddubsw_bc=%5 maddubsw_de=%6 tmp=%7,%8 +%macro SSSE3_FilterVertical2_8px 8 + movdqa %8, %2 + pxor %7, %7 + punpcklbw %1, %7 + punpcklbw %8, %7 + paddw %1, %8 + movdqa %7, %3 + pmaddubsw %7, %5 + paddw %1, %7 + movdqa %7, %4 + pmaddubsw %7, %6 + paddw %1, %7 + paddw %1, [pic(h264_w0x10_1)] + psraw %1, 5 +%endmacro + +; pixels=%1 shufb_32435465768798A9=%2 shufb_011267784556ABBC=%3 maddubsw_p1m5_p1m5_m5p1_m5p1=%4 tmp=%5,%6 +%macro SSSE3_FilterHorizontalbw_8px 6 + movdqa %5, %1 + pshufb %1, %2 + pshufb %5, %3 + pshufd %6, %1, 10110001b + pmaddubsw %1, [pic(db20_128)] + pmaddubsw %5, %4 + pmaddubsw %6, %4 + paddw %1, %5 + paddw %1, %6 +%endmacro + +; pixels=%1 shufb_32435465768798A9=%2 shufb_011267784556ABBC=%3 maddubsw_p1m5_p1m5_m5p1_m5p1=%4 tmp=%5,%6 +%macro SSSE3_FilterHorizontal_8px 6 + SSSE3_FilterHorizontalbw_8px %1, %2, %3, %4, %5, %6 + paddw %1, [pic(h264_w0x10_1)] + psraw %1, 5 +%endmacro + +; px0=%1 px1=%2 shufb_32435465768798A9=%3 shufb_011267784556ABBC=%4 maddubsw_p1m5_p1m5_m5p1_m5p1=%5 tmp=%6,%7 +%macro SSSE3_FilterHorizontalbw_2x4px 7 + movdqa %6, %1 + movdqa %7, %2 + pshufb %1, %3 + pshufb %2, %3 + punpcklqdq %1, %2 + pshufb %6, %4 + pshufb %7, %4 + punpcklqdq %6, %7 + pshufd %7, %1, 10110001b + pmaddubsw %1, [pic(db20_128)] + pmaddubsw %6, %5 + pmaddubsw %7, %5 + paddw %1, %6 + paddw %1, %7 +%endmacro + +; px0=%1 px1=%2 shufb_32435465768798A9=%3 shufb_011267784556ABBC=%4 maddubsw_p1m5_p1m5_m5p1_m5p1=%5 tmp=%6,%7 +%macro SSSE3_FilterHorizontal_2x4px 7 + SSSE3_FilterHorizontalbw_2x4px %1, %2, %3, %4, %5, %6, %7 + paddw %1, [pic(h264_w0x10_1)] + psraw %1, 5 +%endmacro + +; pixels=%1 -32768>>scale=%2 tmp=%3 +%macro SSSE3_FilterHorizontalbw_2px 3 + pmaddubsw %1, [pic(maddubsw_m2p10_m40m40_p10m2_p0p0_128)] + pmaddwd %1, %2 + pshufd %3, %1, 10110001b + paddd %1, %3 +%endmacro + +; pixels=%1 tmp=%2 +%macro SSSE3_FilterHorizontal_2px 2 + SSSE3_FilterHorizontalbw_2px %1, [pic(dwm1024_128)], %2 + paddd %1, [pic(dd32768_128)] +%endmacro + +; px0=%1 px1=%2 px2=%3 px3=%4 px4=%5 px5=%6 tmp=%7 +%macro SSE2_FilterVerticalw_8px 7 + paddw %1, %6 + movdqa %7, %2 + paddw %7, %5 + psubw %1, %7 + psraw %1, 2 + psubw %1, %7 + movdqa %7, %3 + paddw %7, %4 + paddw %1, %7 + psraw %1, 2 + paddw %7, [pic(h264_mc_hc_32)] + paddw %1, %7 + psraw %1, 6 +%endmacro + +;*********************************************************************** +; void McHorVer02_ssse3(const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight) +;*********************************************************************** + +WELS_EXTERN McHorVer02_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%ifdef X86_32_PICASM +%define i_width dword arg5 +%else +%define i_width r4 +%endif +%define i_height r5 +%define i_srcstride3 r6 + %assign push_num 0 +%ifdef X86_32 + push r6 + %assign push_num 1 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + INIT_X86_32_PIC_NOPRESERVE r4 + sub p_src, i_srcstride + sub p_src, i_srcstride + lea i_srcstride3, [3 * i_srcstride] + %assign push_num_begin push_num + cmp i_width, 4 + jg .width8or16 + + movd xmm0, [p_src] + movd xmm4, [p_src + i_srcstride] + punpcklbw xmm0, xmm4 + movd xmm1, [p_src + 2 * i_srcstride] + punpcklbw xmm4, xmm1 + punpcklqdq xmm0, xmm4 + movd xmm4, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + punpcklbw xmm1, xmm4 + movd xmm2, [p_src] + punpcklbw xmm4, xmm2 + punpcklqdq xmm1, xmm4 + movd xmm4, [p_src + i_srcstride] + lea p_src, [p_src + 2 * i_srcstride] + punpcklbw xmm2, xmm4 + movd xmm3, [p_src] + punpcklbw xmm4, xmm3 + punpcklqdq xmm2, xmm4 + movdqa xmm5, [pic(db20_128)] + SSSE3_FilterVertical_8px xmm0, xmm1, xmm2, [pic(maddubsw_p1m5_128)], xmm5, [pic(maddubsw_m5p1_128)], xmm4 + packuswb xmm0, xmm0 + movd [p_dst], xmm0 + psrlq xmm0, 32 + movd [p_dst + i_dststride], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + movd xmm4, [p_src + i_srcstride] + punpcklbw xmm3, xmm4 + movd xmm0, [p_src + 2 * i_srcstride] + punpcklbw xmm4, xmm0 + punpcklqdq xmm3, xmm4 + SSSE3_FilterVertical_8px xmm1, xmm2, xmm3, [pic(maddubsw_p1m5_128)], xmm5, [pic(maddubsw_m5p1_128)], xmm4 + packuswb xmm1, xmm1 + movd [p_dst], xmm1 + psrlq xmm1, 32 + movd [p_dst + i_dststride], xmm1 + cmp i_height, 5 + jl .width4_height_le5_done + lea p_dst, [p_dst + 2 * i_dststride] + movd xmm4, [p_src + i_srcstride3] + punpcklbw xmm0, xmm4 + jg .width4_height_ge8 + SSSE3_FilterVertical_8px xmm2, xmm3, xmm0, [pic(maddubsw_p1m5_128)], xmm5, [pic(maddubsw_m5p1_128)], xmm4 + packuswb xmm2, xmm2 + movd [p_dst], xmm2 +.width4_height_le5_done: + DEINIT_X86_32_PIC_KEEPDEF + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret +.width4_height_ge8: + lea p_src, [p_src + 4 * i_srcstride] + movd xmm1, [p_src] + punpcklbw xmm4, xmm1 + punpcklqdq xmm0, xmm4 + SSSE3_FilterVertical_8px xmm2, xmm3, xmm0, [pic(maddubsw_p1m5_128)], xmm5, [pic(maddubsw_m5p1_128)], xmm4 + packuswb xmm2, xmm2 + movd [p_dst], xmm2 + psrlq xmm2, 32 + movd [p_dst + i_dststride], xmm2 + lea p_dst, [p_dst + 2 * i_dststride] + movd xmm4, [p_src + i_srcstride] + punpcklbw xmm1, xmm4 + movd xmm2, [p_src + 2 * i_srcstride] + punpcklbw xmm4, xmm2 + punpcklqdq xmm1, xmm4 + SSSE3_FilterVertical_8px xmm3, xmm0, xmm1, [pic(maddubsw_p1m5_128)], xmm5, [pic(maddubsw_m5p1_128)], xmm4 + packuswb xmm3, xmm3 + movd [p_dst], xmm3 + psrlq xmm3, 32 + movd [p_dst + i_dststride], xmm3 + cmp i_height, 9 + jl .width4_height_ge8_done + lea p_dst, [p_dst + 2 * i_dststride] + movd xmm4, [p_src + i_srcstride3] + punpcklbw xmm2, xmm4 + SSSE3_FilterVertical_8px xmm0, xmm1, xmm2, [pic(maddubsw_p1m5_128)], xmm5, [pic(maddubsw_m5p1_128)], xmm4 + packuswb xmm0, xmm0 + movd [p_dst], xmm0 +.width4_height_ge8_done: + DEINIT_X86_32_PIC_KEEPDEF + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +.width8or16: + %assign push_num push_num_begin + sub i_height, 1 + push i_height + %assign push_num push_num + 1 +%xdefine i_ycnt i_height +%define i_height [r7] +.xloop: + push p_src + push p_dst + %assign push_num push_num + 2 + test i_ycnt, 1 + jnz .yloop_begin_even + movq xmm0, [p_src] + movq xmm1, [p_src + i_srcstride] + punpcklbw xmm0, xmm1 + movq xmm2, [p_src + 2 * i_srcstride] + movq xmm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + punpcklbw xmm2, xmm3 + movq xmm4, [p_src] + movq xmm5, [p_src + i_srcstride] + lea p_src, [p_src + 2 * i_srcstride] + punpcklbw xmm4, xmm5 + SSSE3_FilterVertical_8px xmm0, xmm2, xmm4, [pic(maddubsw_p1m5_128)], [pic(db20_128)], [pic(maddubsw_m5p1_128)], xmm7 + packuswb xmm0, xmm0 + movlps [p_dst], xmm0 + add p_dst, i_dststride + jmp .yloop +.yloop_begin_even: + movq xmm1, [p_src] + movq xmm2, [p_src + i_srcstride] + movq xmm3, [p_src + 2 * i_srcstride] + add p_src, i_srcstride3 + punpcklbw xmm2, xmm3 + movq xmm4, [p_src] + movq xmm5, [p_src + i_srcstride] + lea p_src, [p_src + 2 * i_srcstride] + punpcklbw xmm4, xmm5 +.yloop: + movq xmm6, [p_src] + SSSE3_FilterVertical2_8px xmm1, xmm6, xmm2, xmm4, [pic(maddubsw_m5p20_128)], [pic(maddubsw_p20m5_128)], xmm0, xmm7 + movq xmm7, [p_src + i_srcstride] + punpcklbw xmm6, xmm7 + SSSE3_FilterVertical_8px xmm2, xmm4, xmm6, [pic(maddubsw_p1m5_128)], [pic(db20_128)], [pic(maddubsw_m5p1_128)], xmm0 + packuswb xmm1, xmm2 + movlps [p_dst], xmm1 + movhps [p_dst + i_dststride], xmm1 + lea p_dst, [p_dst + 2 * i_dststride] + movq xmm0, [p_src + 2 * i_srcstride] + SSSE3_FilterVertical2_8px xmm3, xmm0, xmm4, xmm6, [pic(maddubsw_m5p20_128)], [pic(maddubsw_p20m5_128)], xmm2, xmm1 + movq xmm1, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + punpcklbw xmm0, xmm1 + SSSE3_FilterVertical_8px xmm4, xmm6, xmm0, [pic(maddubsw_p1m5_128)], [pic(db20_128)], [pic(maddubsw_m5p1_128)], xmm2 + packuswb xmm3, xmm4 + movlps [p_dst], xmm3 + movhps [p_dst + i_dststride], xmm3 + cmp i_ycnt, 4 + jle .yloop_exit + lea p_dst, [p_dst + 2 * i_dststride] + movq xmm2, [p_src] + SSSE3_FilterVertical2_8px xmm5, xmm2, xmm6, xmm0, [pic(maddubsw_m5p20_128)], [pic(maddubsw_p20m5_128)], xmm4, xmm3 + movq xmm3, [p_src + i_srcstride] + punpcklbw xmm2, xmm3 + SSSE3_FilterVertical_8px xmm6, xmm0, xmm2, [pic(maddubsw_p1m5_128)], [pic(db20_128)], [pic(maddubsw_m5p1_128)], xmm4 + packuswb xmm5, xmm6 + movlps [p_dst], xmm5 + movhps [p_dst + i_dststride], xmm5 + lea p_dst, [p_dst + 2 * i_dststride] + movq xmm4, [p_src + 2 * i_srcstride] + SSSE3_FilterVertical2_8px xmm7, xmm4, xmm0, xmm2, [pic(maddubsw_m5p20_128)], [pic(maddubsw_p20m5_128)], xmm6, xmm5 + movq xmm5, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + punpcklbw xmm4, xmm5 + SSSE3_FilterVertical_8px xmm0, xmm2, xmm4, [pic(maddubsw_p1m5_128)], [pic(db20_128)], [pic(maddubsw_m5p1_128)], xmm6 + packuswb xmm7, xmm0 + movlps [p_dst], xmm7 + movhps [p_dst + i_dststride], xmm7 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_ycnt, 8 + jg .yloop +.yloop_exit: + pop p_dst + pop p_src + %assign push_num push_num - 2 + sub i_width, 8 + jle .width8or16_done + add p_src, 8 + add p_dst, 8 + mov i_ycnt, i_height + jmp .xloop +.width8or16_done: + pop i_ycnt + %assign push_num push_num - 1 + DEINIT_X86_32_PIC + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret +%undef p_src +%undef i_srcstride +%undef i_srcstride3 +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height +%undef i_ycnt + + +;******************************************************************************* +; void McHorVer20_ssse3(const uint8_t *pSrc, +; int iSrcStride, +; uint8_t *pDst, +; int iDstStride, +; int iWidth, +; int iHeight); +;******************************************************************************* + +WELS_EXTERN McHorVer20_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%define i_width r4 +%define i_height r5 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + movdqa xmm4, [pic(shufb_32435465768798A9)] + movdqa xmm5, [pic(shufb_011267784556ABBC)] + movdqa xmm6, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + cmp i_width, 8 + je .width8_yloop + jg .width16_yloop +.width4_yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm1, [p_src + i_srcstride - 2] + lea p_src, [p_src + 2 * i_srcstride] + SSSE3_FilterHorizontal_2x4px xmm0, xmm1, xmm4, xmm5, xmm6, xmm2, xmm3 + packuswb xmm0, xmm0 + movd [p_dst], xmm0 + psrlq xmm0, 32 + movd [p_dst + i_dststride], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 2 + jg .width4_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +.width8_yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm1, [p_src + i_srcstride - 2] + lea p_src, [p_src + 2 * i_srcstride] + SSSE3_FilterHorizontal_8px xmm0, xmm4, xmm5, xmm6, xmm2, xmm3 + SSSE3_FilterHorizontal_8px xmm1, xmm4, xmm5, xmm6, xmm2, xmm3 + packuswb xmm0, xmm1 + movlps [p_dst], xmm0 + movhps [p_dst + i_dststride], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 2 + jg .width8_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +.width16_yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm1, [p_src + 6] + add p_src, i_srcstride + SSSE3_FilterHorizontal_8px xmm0, xmm4, xmm5, xmm6, xmm2, xmm3 + SSSE3_FilterHorizontal_8px xmm1, xmm4, xmm5, xmm6, xmm2, xmm3 + packuswb xmm0, xmm1 + MOVDQ [p_dst], xmm0 + add p_dst, i_dststride + sub i_height, 1 + jg .width16_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height + + +;*********************************************************************** +; void McHorVer20Width5Or9Or17_ssse3(const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer20Width5Or9Or17_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%define i_width r4 +%define i_height r5 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + movdqa xmm5, [pic(shufb_32435465768798A9)] + movdqa xmm6, [pic(shufb_011267784556ABBC)] + movdqa xmm7, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + cmp i_width, 9 + je .width9_yloop + jg .width17_yloop +.width5_yloop: + movdqu xmm0, [p_src - 2] + add p_src, i_srcstride + SSSE3_FilterHorizontal_8px xmm0, xmm5, xmm6, xmm7, xmm1, xmm2 + packuswb xmm0, xmm0 + movdqa xmm1, xmm0 + psrlq xmm1, 8 + movd [p_dst], xmm0 + movd [p_dst + 1], xmm1 + add p_dst, i_dststride + sub i_height, 1 + jg .width5_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +.width9_yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm4, [p_src + i_srcstride - 2] + lea p_src, [p_src + 2 * i_srcstride] + movdqa xmm3, xmm0 + punpckhqdq xmm3, xmm4 + SSSE3_FilterHorizontal_2px xmm3, xmm2 + SSSE3_FilterHorizontal_8px xmm0, xmm5, xmm6, xmm7, xmm1, xmm2 + packuswb xmm3, xmm0 + movd [p_dst + 5], xmm3 + movhps [p_dst], xmm3 + add p_dst, i_dststride + SSSE3_FilterHorizontal_8px xmm4, xmm5, xmm6, xmm7, xmm1, xmm2 + packuswb xmm4, xmm4 + psrldq xmm3, 4 + movd [p_dst + 5], xmm3 + movlps [p_dst], xmm4 + add p_dst, i_dststride + sub i_height, 2 + jg .width9_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +.width17_yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm3, [p_src + 6] + add p_src, i_srcstride + movdqa xmm4, xmm3 + SSSE3_FilterHorizontal_8px xmm0, xmm5, xmm6, xmm7, xmm1, xmm2 + SSSE3_FilterHorizontal_8px xmm3, xmm5, xmm6, xmm7, xmm1, xmm2 + packuswb xmm0, xmm3 + movdqu xmm1, [p_src - 2] + movdqu xmm3, [p_src + 6] + add p_src, i_srcstride + punpckhqdq xmm4, xmm3 + SSSE3_FilterHorizontal_2px xmm4, xmm2 + packuswb xmm4, xmm4 + movd [p_dst + 13], xmm4 + MOVDQ [p_dst], xmm0 + add p_dst, i_dststride + psrldq xmm4, 4 + movd [p_dst + 13], xmm4 + SSSE3_FilterHorizontal_8px xmm1, xmm5, xmm6, xmm7, xmm0, xmm2 + SSSE3_FilterHorizontal_8px xmm3, xmm5, xmm6, xmm7, xmm0, xmm2 + packuswb xmm1, xmm3 + MOVDQ [p_dst], xmm1 + add p_dst, i_dststride + sub i_height, 2 + jg .width17_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height + + +;******************************************************************************* +; void McHorVer20Width4U8ToS16_ssse3(const uint8_t *pSrc, +; int iSrcStride, +; int16_t *pDst, +; int iHeight); +;******************************************************************************* + +WELS_EXTERN McHorVer20Width4U8ToS16_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_height r3 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + sub p_src, i_srcstride + sub p_src, i_srcstride + movdqa xmm4, [pic(shufb_32435465768798A9)] + movdqa xmm5, [pic(shufb_011267784556ABBC)] + movdqa xmm6, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + sub i_height, 1 +.yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm1, [p_src + i_srcstride - 2] + lea p_src, [p_src + 2 * i_srcstride] + SSSE3_FilterHorizontalbw_2x4px xmm0, xmm1, xmm4, xmm5, xmm6, xmm2, xmm3 + movdqa [p_dst], xmm0 + add p_dst, 16 + sub i_height, 2 + jg .yloop + ; Height % 2 remainder. + movdqu xmm0, [p_src - 2] + SSSE3_FilterHorizontalbw_8px xmm0, xmm4, xmm5, xmm6, xmm2, xmm3 + movlps [p_dst], xmm0 + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_height + + +;*********************************************************************** +; void McHorVer02Width4S16ToU8_ssse3(const int16_t *pSrc, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02Width4S16ToU8_ssse3 +%define p_src r0 +%define p_dst r1 +%define i_dststride r2 +%define i_height r3 +%define i_srcstride 8 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + movdqa xmm0, [p_src + 0 * i_srcstride] + movdqu xmm1, [p_src + 1 * i_srcstride] + movdqa xmm2, [p_src + 2 * i_srcstride] + movdqu xmm3, [p_src + 3 * i_srcstride] + movdqa xmm4, [p_src + 4 * i_srcstride] + movdqu xmm5, [p_src + 5 * i_srcstride] + movdqa xmm6, [p_src + 6 * i_srcstride] + SSE2_FilterVerticalw_8px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm7 + packuswb xmm0, xmm0 + movd [p_dst], xmm0 + psrlq xmm0, 32 + movd [p_dst + i_dststride], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + movdqu xmm7, [p_src + 7 * i_srcstride] + movdqa xmm0, [p_src + 8 * i_srcstride] + SSE2_FilterVerticalw_8px xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm1 + packuswb xmm2, xmm2 + movd [p_dst], xmm2 + psrlq xmm2, 32 + movd [p_dst + i_dststride], xmm2 + cmp i_height, 4 + jle .done + lea p_dst, [p_dst + 2 * i_dststride] + movdqu xmm1, [p_src + 9 * i_srcstride] + movdqa xmm2, [p_src + 10 * i_srcstride] + SSE2_FilterVerticalw_8px xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm3 + packuswb xmm4, xmm4 + movd [p_dst], xmm4 + psrlq xmm4, 32 + movd [p_dst + i_dststride], xmm4 + lea p_dst, [p_dst + 2 * i_dststride] + movdqu xmm3, [p_src + 11 * i_srcstride] + SSE2_FilterVerticalw_8px xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm5 + packuswb xmm6, xmm6 + movd [p_dst], xmm6 + psrlq xmm6, 32 + movd [p_dst + i_dststride], xmm6 +.done: + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef p_dst +%undef i_dststride +%undef i_height +%undef i_srcstride + + +;*********************************************************************** +; void McHorVer20Width8U8ToS16_ssse3(const uint8_t *pSrc, +; int16_t iSrcStride, +; int16_t *pDst, +; int32_t iDstStride, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer20Width8U8ToS16_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%define i_height r4 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_5_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + sub p_src, i_srcstride + sub p_src, i_srcstride + movdqa xmm4, [pic(shufb_32435465768798A9)] + movdqa xmm5, [pic(shufb_011267784556ABBC)] + movdqa xmm6, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + sub i_height, 1 +.yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm1, [p_src + i_srcstride - 2] + lea p_src, [p_src + 2 * i_srcstride] + SSSE3_FilterHorizontalbw_8px xmm0, xmm4, xmm5, xmm6, xmm2, xmm3 + MOVDQ [p_dst], xmm0 + add p_dst, i_dststride + SSSE3_FilterHorizontalbw_8px xmm1, xmm4, xmm5, xmm6, xmm2, xmm3 + MOVDQ [p_dst], xmm1 + add p_dst, i_dststride + sub i_height, 2 + jg .yloop + jl .done + movdqu xmm0, [p_src - 2] + SSSE3_FilterHorizontalbw_8px xmm0, xmm4, xmm5, xmm6, xmm2, xmm3 + MOVDQ [p_dst], xmm0 +.done: + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_height + + +;*********************************************************************** +; void McHorVer02Width5S16ToU8_ssse3(const int16_t *pSrc, +; int32_t iTapStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02Width5S16ToU8_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%define i_height r4 +%define i_srcstride3 r5 + %assign push_num 0 +%ifdef X86_32 + push r5 + %assign push_num 1 +%endif + INIT_X86_32_PIC r6 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + lea i_srcstride3, [3 * i_srcstride] + movdqa xmm0, [p_src] + movdqa xmm1, [p_src + i_srcstride] + movdqa xmm2, [p_src + 2 * i_srcstride] + movdqa xmm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + movdqa xmm4, [p_src] + movdqa xmm5, [p_src + i_srcstride] + SSE2_FilterVerticalw_8px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 + movdqa xmm6, [p_src + 2 * i_srcstride] + packuswb xmm0, xmm0 + movdqa xmm7, xmm0 + psrlq xmm7, 8 + movd [p_dst + 1], xmm7 + movd [p_dst], xmm0 + add p_dst, i_dststride + SSE2_FilterVerticalw_8px xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + movdqa xmm7, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + packuswb xmm1, xmm1 + movdqa xmm0, xmm1 + psrlq xmm0, 8 + movd [p_dst + 1], xmm0 + movd [p_dst], xmm1 + add p_dst, i_dststride + SSE2_FilterVerticalw_8px xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0 + movdqa xmm0, [p_src] + packuswb xmm2, xmm2 + movdqa xmm1, xmm2 + psrlq xmm1, 8 + movd [p_dst + 1], xmm1 + movd [p_dst], xmm2 + add p_dst, i_dststride + SSE2_FilterVerticalw_8px xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1 + packuswb xmm3, xmm3 + movdqa xmm2, xmm3 + psrlq xmm2, 8 + movd [p_dst + 1], xmm2 + movd [p_dst], xmm3 + add p_dst, i_dststride + movdqa xmm1, [p_src + i_srcstride] + SSE2_FilterVerticalw_8px xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2 + packuswb xmm4, xmm4 + movdqa xmm3, xmm4 + psrlq xmm3, 8 + movd [p_dst + 1], xmm3 + movd [p_dst], xmm4 + cmp i_height, 5 + jle .done + add p_dst, i_dststride + movdqa xmm2, [p_src + 2 * i_srcstride] + SSE2_FilterVerticalw_8px xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3 + movdqa xmm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + packuswb xmm5, xmm5 + movdqa xmm4, xmm5 + psrlq xmm4, 8 + movd [p_dst + 1], xmm4 + movd [p_dst], xmm5 + add p_dst, i_dststride + SSE2_FilterVerticalw_8px xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4 + movdqa xmm4, [p_src] + packuswb xmm6, xmm6 + movdqa xmm5, xmm6 + psrlq xmm5, 8 + movd [p_dst + 1], xmm5 + movd [p_dst], xmm6 + add p_dst, i_dststride + SSE2_FilterVerticalw_8px xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + packuswb xmm7, xmm7 + movdqa xmm6, xmm7 + psrlq xmm6, 8 + movd [p_dst + 1], xmm6 + movd [p_dst], xmm7 + add p_dst, i_dststride + movdqa xmm5, [p_src + i_srcstride] + SSE2_FilterVerticalw_8px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 + packuswb xmm0, xmm0 + movdqa xmm7, xmm0 + psrlq xmm7, 8 + movd [p_dst + 1], xmm7 + movd [p_dst], xmm0 +.done: + POP_XMM + LOAD_5_PARA_POP + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r5 +%endif + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_height +%undef i_srcstride3 + + +;*********************************************************************** +; void McHorVer20Width9Or17U8ToS16_ssse3(const uint8_t *pSrc, +; int32_t iSrcStride, +; int16_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer20Width9Or17U8ToS16_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%define i_width r4 +%define i_height r5 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + sub p_src, i_srcstride + sub p_src, i_srcstride + pcmpeqw xmm4, xmm4 + psllw xmm4, 15 ; dw -32768 + movdqa xmm5, [pic(shufb_32435465768798A9)] + movdqa xmm6, [pic(shufb_011267784556ABBC)] + movdqa xmm7, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + cmp i_width, 9 + jne .width17_yloop + +.width9_yloop: + movdqu xmm0, [p_src - 2] + movdqa xmm3, xmm0 + SSSE3_FilterHorizontalbw_8px xmm0, xmm5, xmm6, xmm7, xmm1, xmm2 + movdqu xmm2, [p_src + i_srcstride - 2] + lea p_src, [p_src + 2 * i_srcstride] + punpckhqdq xmm3, xmm2 + SSSE3_FilterHorizontalbw_2px xmm3, xmm4, xmm1 + movlps [p_dst + 10], xmm3 + MOVDQ [p_dst], xmm0 + add p_dst, i_dststride + movhps [p_dst + 10], xmm3 + SSSE3_FilterHorizontalbw_8px xmm2, xmm5, xmm6, xmm7, xmm1, xmm0 + MOVDQ [p_dst], xmm2 + add p_dst, i_dststride + sub i_height, 2 + jg .width9_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret + +.width17_yloop: + movdqu xmm0, [p_src - 2] + movdqu xmm3, [p_src + 6] + add p_src, i_srcstride + SSSE3_FilterHorizontalbw_8px xmm0, xmm5, xmm6, xmm7, xmm1, xmm2 + MOVDQ [p_dst], xmm0 + movdqa xmm0, xmm3 + SSSE3_FilterHorizontalbw_8px xmm3, xmm5, xmm6, xmm7, xmm1, xmm2 + movdqu xmm2, [p_src + 6] + punpckhqdq xmm0, xmm2 + SSSE3_FilterHorizontalbw_2px xmm0, xmm4, xmm1 + movdqu xmm1, [p_src - 2] + add p_src, i_srcstride + movlps [p_dst + 26], xmm0 + MOVDQ [p_dst + 16], xmm3 + add p_dst, i_dststride + movhps [p_dst + 26], xmm0 + SSSE3_FilterHorizontalbw_8px xmm1, xmm5, xmm6, xmm7, xmm0, xmm3 + MOVDQ [p_dst], xmm1 + SSSE3_FilterHorizontalbw_8px xmm2, xmm5, xmm6, xmm7, xmm0, xmm3 + MOVDQ [p_dst + 16], xmm2 + add p_dst, i_dststride + sub i_height, 2 + jg .width17_yloop + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height + + +;*********************************************************************** +; void McHorVer02WidthGe8S16ToU8_ssse3(const int16_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02WidthGe8S16ToU8_ssse3 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%ifdef X86_32_PICASM +%define i_width dword arg5 +%else +%define i_width r4 +%endif +%define i_height r5 +%define i_srcstride3 r6 + %assign push_num 0 +%ifdef X86_32 + push r6 + %assign push_num 1 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + INIT_X86_32_PIC_NOPRESERVE r4 + sub i_height, 1 + push i_height + %assign push_num push_num + 1 + lea i_srcstride3, [3 * i_srcstride] + test i_width, 1 + jz .width_loop + push p_src + push p_dst + %assign push_num push_num + 2 +%ifdef X86_32_PICASM + add p_src, i_width + add p_src, i_width + sub p_src, 2 +%else + lea p_src, [p_src + 2 * i_width - 2] +%endif + add p_dst, i_width + movd xmm0, [p_src] + punpcklwd xmm0, [p_src + i_srcstride] + movd xmm1, [p_src + 2 * i_srcstride] + add p_src, i_srcstride3 + punpcklwd xmm1, [p_src] + punpckldq xmm0, xmm1 + movd xmm1, [p_src + i_srcstride] + cmp i_height, 4 + je .filter5_unalign + punpcklwd xmm1, [p_src + 2 * i_srcstride] + movd xmm2, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + punpcklwd xmm2, [p_src] + punpckldq xmm1, xmm2 + punpcklqdq xmm0, xmm1 +.height_loop_unalign: + movd xmm1, [p_src + i_srcstride] + palignr xmm1, xmm0, 2 + movd xmm2, [p_src + 2 * i_srcstride] + palignr xmm2, xmm1, 2 + movd xmm3, [p_src + i_srcstride3] + palignr xmm3, xmm2, 2 + lea p_src, [p_src + 4 * i_srcstride] + movd xmm4, [p_src] + palignr xmm4, xmm3, 2 + movd xmm5, [p_src + i_srcstride] + palignr xmm5, xmm4, 2 + SSE2_FilterVerticalw_8px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm7 + packuswb xmm0, xmm0 + movdqa xmm6, xmm0 + pslld xmm6, 24 + movd [p_dst - 4], xmm6 + movlps [p_dst + 4 * i_dststride - 8], xmm6 + add p_dst, i_dststride + movdqa xmm6, xmm0 + pslld xmm6, 16 + movd [p_dst - 4], xmm6 + movlps [p_dst + 4 * i_dststride - 8], xmm6 + add p_dst, i_dststride + movdqa xmm6, xmm0 + pslld xmm6, 8 + movd [p_dst - 4], xmm6 + movd [p_dst + i_dststride - 4], xmm0 + lea p_dst, [p_dst + 4 * i_dststride] + movlps [p_dst - 8], xmm6 + movlps [p_dst + i_dststride - 8], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 8 + jle .height_loop_unalign_exit + movd xmm1, [p_src + 2 * i_srcstride] + palignr xmm1, xmm5, 2 + movd xmm0, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + punpcklwd xmm0, [p_src] + palignr xmm0, xmm1, 4 + jmp .height_loop_unalign +.height_loop_unalign_exit: + movddup xmm6, [p_src + 2 * i_srcstride - 6] + SSE2_FilterVerticalw_8px xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + packuswb xmm1, xmm1 + movlps [p_dst - 8], xmm1 + jmp .unalign_done +.filter5_unalign: + pslldq xmm0, 8 + palignr xmm1, xmm0, 2 + movd xmm2, [p_src + 2 * i_srcstride] + palignr xmm2, xmm1, 2 + movd xmm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + palignr xmm3, xmm2, 2 + movd xmm4, [p_src] + palignr xmm4, xmm3, 2 + movd xmm5, [p_src + i_srcstride] + palignr xmm5, xmm4, 2 + movd xmm6, [p_src + 2 * i_srcstride] + palignr xmm6, xmm5, 2 + SSE2_FilterVerticalw_8px xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + packuswb xmm1, xmm1 + movdqa xmm0, xmm1 + psrlq xmm1, 8 + movdqa xmm2, xmm0 + psrlq xmm2, 16 + movdqa xmm3, xmm0 + psrlq xmm3, 24 + movd [p_dst - 4], xmm0 + movd [p_dst + i_dststride - 4], xmm1 + lea p_dst, [p_dst + 2 * i_dststride] + movd [p_dst - 4], xmm2 + movd [p_dst + i_dststride - 4], xmm3 + movlps [p_dst + 2 * i_dststride - 8], xmm0 +.unalign_done: + pop p_dst + pop p_src + %assign push_num push_num - 2 + mov i_height, [r7] + sub i_width, 1 +.width_loop: + push p_src + push p_dst + %assign push_num push_num + 2 + movdqa xmm0, [p_src] + movdqa xmm1, [p_src + i_srcstride] + movdqa xmm2, [p_src + 2 * i_srcstride] + movdqa xmm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + movdqa xmm4, [p_src] +.height_loop: + movdqa xmm5, [p_src + i_srcstride] + SSE2_FilterVerticalw_8px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 + movdqa xmm6, [p_src + 2 * i_srcstride] + SSE2_FilterVerticalw_8px xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + movdqa xmm7, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + packuswb xmm0, xmm1 + movlps [p_dst], xmm0 + movhps [p_dst + i_dststride], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + SSE2_FilterVerticalw_8px xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0 + movdqa xmm0, [p_src] + SSE2_FilterVerticalw_8px xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1 + packuswb xmm2, xmm3 + movlps [p_dst], xmm2 + movhps [p_dst + i_dststride], xmm2 + cmp i_height, 4 + jl .x_loop_dec + lea p_dst, [p_dst + 2 * i_dststride] + movdqa xmm1, [p_src + i_srcstride] + SSE2_FilterVerticalw_8px xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2 + je .store_xmm4_exit + movdqa xmm2, [p_src + 2 * i_srcstride] + SSE2_FilterVerticalw_8px xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3 + movdqa xmm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + packuswb xmm4, xmm5 + movlps [p_dst], xmm4 + movhps [p_dst + i_dststride], xmm4 + lea p_dst, [p_dst + 2 * i_dststride] + SSE2_FilterVerticalw_8px xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4 + movdqa xmm4, [p_src] + SSE2_FilterVerticalw_8px xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + packuswb xmm6, xmm7 + movlps [p_dst], xmm6 + movhps [p_dst + i_dststride], xmm6 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 8 + jg .height_loop + jl .x_loop_dec + movdqa xmm5, [p_src + i_srcstride] + SSE2_FilterVerticalw_8px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 + packuswb xmm0, xmm0 + movlps [p_dst], xmm0 +.x_loop_dec: + pop p_dst + pop p_src + %assign push_num push_num - 2 + sub i_width, 8 + jle .done + mov i_height, [r7] + add p_src, 16 + add p_dst, 8 + jmp .width_loop +.store_xmm4_exit: + packuswb xmm4, xmm4 + movlps [p_dst], xmm4 + pop p_dst + pop p_src +.done: + pop i_height + %assign push_num push_num - 1 + DEINIT_X86_32_PIC + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height +%undef i_srcstride3 + + +%ifdef HAVE_AVX2 + +; pixels=%1 shufb_32435465768798A9=%2 shufb_011267784556ABBC=%3 maddubsw_p1m5_p1m5_m5p1_m5p1=%4 tmp=%5,%6 +%macro AVX2_FilterHorizontalbw_16px 6 + vpshufb %5, %1, %3 + vpshufb %1, %1, %2 + vpshufd %6, %1, 10110001b + vpmaddubsw %1, %1, [pic(db20_256)] + vpmaddubsw %5, %5, %4 + vpmaddubsw %6, %6, %4 + vpaddw %1, %1, %5 + vpaddw %1, %1, %6 +%endmacro + +; pixels=%1 shufb_32435465768798A9=%2 shufb_011267784556ABBC=%3 db20=%4 tmp=%5,%6 +%macro AVX2_FilterHorizontal_16px 6 + AVX2_FilterHorizontalbw_16px %1, %2, %3, %4, %5, %6 + vpaddw %1, %1, [pic(h264_w0x10_256)] + vpsraw %1, %1, 5 +%endmacro + +; px0=%1 px1=%2 shufb_32435465768798A9=%3 shufb_011267784556ABBC=%4 maddubsw_p1m5_p1m5_m5p1_m5p1=%5 tmp=%6,%7 +%macro AVX2_FilterHorizontalbw_4x4px 7 + vpshufb %6, %1, %4 + vpshufb %7, %2, %4 + vpshufb %1, %1, %3 + vpshufb %2, %2, %3 + vpunpcklqdq %1, %1, %2 + vpunpcklqdq %6, %6, %7 + vpshufd %7, %1, 10110001b + vpmaddubsw %1, %1, [pic(db20_256)] + vpmaddubsw %6, %6, %5 + vpmaddubsw %7, %7, %5 + vpaddw %1, %1, %6 + vpaddw %1, %1, %7 +%endmacro + +; px0=%1 px1=%2 shufb_32435465768798A9=%3 shufb_011267784556ABBC=%4 db20=%5 tmp=%6,%7 +%macro AVX2_FilterHorizontal_4x4px 7 + AVX2_FilterHorizontalbw_4x4px %1, %2, %3, %4, %5, %6, %7 + vpaddw %1, %1, [pic(h264_w0x10_256)] + vpsraw %1, %1, 5 +%endmacro + +; pixels=%1 -32768>>scale=%2 tmp=%3 +%macro AVX2_FilterHorizontalbw_4px 3 + vpmaddubsw %1, %1, [pic(maddubsw_m2p10_m40m40_p10m2_p0p0_256)] + vpmaddwd %1, %1, %2 + vpshufd %3, %1, 10110001b + vpaddd %1, %1, %3 +%endmacro + +; pixels=%1 tmp=%2 +%macro AVX2_FilterHorizontal_4px 2 + AVX2_FilterHorizontalbw_4px %1, [pic(dwm1024_256)], %2 + vpaddd %1, %1, [pic(dd32768_256)] +%endmacro + +; px_ab=%1 px_cd=%2 px_ef=%3 maddubsw_ab=%4 maddubsw_cd=%5 maddubsw_ef=%6 tmp=%7 +%macro AVX2_FilterVertical_16px 7 + vpmaddubsw %1, %1, %4 + vpmaddubsw %7, %2, %5 + vpaddw %1, %1, %7 + vpmaddubsw %7, %3, %6 + vpaddw %1, %1, %7 + vpaddw %1, %1, [pic(h264_w0x10_256)] + vpsraw %1, %1, 5 +%endmacro + +; px_a=%1 px_f=%2 px_bc=%3 px_de=%4 maddubsw_bc=%5 maddubsw_de=%6 tmp=%7,%8 +%macro AVX2_FilterVertical2_16px 8 + vpxor %7, %7, %7 + vpunpcklbw %1, %1, %7 + vpunpcklbw %8, %2, %7 + vpaddw %1, %1, %8 + vpmaddubsw %7, %3, %5 + vpaddw %1, %1, %7 + vpmaddubsw %7, %4, %6 + vpaddw %1, %1, %7 + vpaddw %1, %1, [pic(h264_w0x10_256)] + vpsraw %1, %1, 5 +%endmacro + +; px0=%1 px1=%2 px2=%3 px3=%4 px4=%5 px5=%6 tmp=%7 +%macro AVX2_FilterVerticalw_16px 7 + vpaddw %1, %1, %6 + vpaddw %7, %2, %5 + vpsubw %1, %1, %7 + vpsraw %1, %1, 2 + vpsubw %1, %1, %7 + vpaddw %7, %3, %4 + vpaddw %1, %1, %7 + vpsraw %1, %1, 2 + vpaddw %7, %7, [pic(dw32_256)] + vpaddw %1, %1, %7 + vpsraw %1, %1, 6 +%endmacro + +;*********************************************************************** +; void McHorVer02_avx2(const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight) +;*********************************************************************** + +WELS_EXTERN McHorVer02_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%ifdef X86_32_PICASM +%define i_width dword arg5 +%else +%define i_width r4 +%endif +%define i_height r5 +%define i_srcstride3 r6 + %assign push_num 0 +%ifdef X86_32 + push r6 + %assign push_num 1 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + INIT_X86_32_PIC_NOPRESERVE r4 + sub p_src, i_srcstride + sub p_src, i_srcstride + lea i_srcstride3, [3 * i_srcstride] + cmp i_width, 8 + je .width8 + jg .width16 +; .width4: + vmovd xmm0, [p_src] + vpbroadcastd xmm5, [p_src + i_srcstride] + vpunpcklbw xmm0, xmm0, xmm5 + vpbroadcastd ymm1, [p_src + 2 * i_srcstride] + vpunpcklbw xmm5, xmm5, xmm1 + vpblendd xmm0, xmm0, xmm5, 1100b + vpbroadcastd ymm5, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpunpcklbw ymm1, ymm1, ymm5 + vpbroadcastd ymm2, [p_src] + vpunpcklbw ymm5, ymm5, ymm2 + vpblendd ymm1, ymm1, ymm5, 11001100b + vpblendd ymm0, ymm0, ymm1, 11110000b + vpbroadcastd ymm5, [p_src + i_srcstride] + lea p_src, [p_src + 2 * i_srcstride] + vpunpcklbw ymm2, ymm2, ymm5 + vpbroadcastd ymm3, [p_src] + vpunpcklbw ymm5, ymm5, ymm3 + vpblendd ymm2, ymm2, ymm5, 11001100b + vpblendd ymm1, ymm1, ymm2, 11110000b + vpbroadcastd ymm5, [p_src + i_srcstride] + vpunpcklbw ymm3, ymm3, ymm5 + vpbroadcastd ymm4, [p_src + 2 * i_srcstride] + vpunpcklbw ymm5, ymm5, ymm4 + vpblendd ymm3, ymm3, ymm5, 11001100b + vpblendd ymm2, ymm2, ymm3, 11110000b + vbroadcasti128 ymm6, [pic(db20_128)] + AVX2_FilterVertical_16px ymm0, ymm1, ymm2, [pic(maddubsw_p1m5_256)], ymm6, [pic(maddubsw_m5p1_256)], ymm5 + vpackuswb ymm0, ymm0, ymm0 + vmovd [p_dst], xmm0 + vpsrlq xmm5, xmm0, 32 + vmovd [p_dst + i_dststride], xmm5 + lea p_dst, [p_dst + 2 * i_dststride] + vextracti128 xmm0, ymm0, 1 + vmovd [p_dst], xmm0 + vpsrlq xmm5, xmm0, 32 + vmovd [p_dst + i_dststride], xmm5 + cmp i_height, 5 + jl .width4_done + lea p_dst, [p_dst + 2 * i_dststride] + vpbroadcastd ymm5, [p_src + i_srcstride3] + vpunpcklbw ymm4, ymm4, ymm5 + jg .width4_height_ge8 + AVX2_FilterVertical_16px xmm2, xmm3, xmm4, [pic(maddubsw_p1m5_256)], xmm6, [pic(maddubsw_m5p1_256)], xmm5 + vpackuswb xmm2, xmm2, xmm2 + vmovd [p_dst], xmm2 + jmp .width4_done +.width4_height_ge8: + lea p_src, [p_src + 4 * i_srcstride] + vpbroadcastd ymm1, [p_src] + vpunpcklbw ymm5, ymm5, ymm1 + vpblendd ymm4, ymm4, ymm5, 11001100b + vpblendd ymm3, ymm3, ymm4, 11110000b + vpbroadcastd ymm5, [p_src + i_srcstride] + vpunpcklbw ymm1, ymm5 + vpbroadcastd ymm0, [p_src + 2 * i_srcstride] + vpunpcklbw ymm5, ymm5, ymm0 + vpblendd ymm1, ymm1, ymm5, 11001100b + vpblendd ymm4, ymm4, ymm1, 11110000b + AVX2_FilterVertical_16px ymm2, ymm3, ymm4, [pic(maddubsw_p1m5_256)], ymm6, [pic(maddubsw_m5p1_256)], ymm5 + vpackuswb ymm2, ymm2, ymm2 + vmovd [p_dst], xmm2 + vpsrlq xmm5, xmm2, 32 + vmovd [p_dst + i_dststride], xmm5 + lea p_dst, [p_dst + 2 * i_dststride] + vextracti128 xmm2, ymm2, 1 + vmovd [p_dst], xmm2 + vpsrlq xmm5, xmm2, 32 + vmovd [p_dst + i_dststride], xmm5 + cmp i_height, 9 + jl .width4_done + lea p_dst, [p_dst + 2 * i_dststride] + vmovd xmm5, [p_src + i_srcstride3] + vpunpcklbw xmm0, xmm0, xmm5 + AVX2_FilterVertical_16px xmm4, xmm1, xmm0, [pic(maddubsw_p1m5_256)], xmm6, [pic(maddubsw_m5p1_256)], xmm5 + vpackuswb xmm4, xmm4, xmm4 + vmovd [p_dst], xmm4 +.width4_done: + vzeroupper + DEINIT_X86_32_PIC_KEEPDEF + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +.width8: + sub i_height, 1 + vmovq xmm0, [p_src] + vmovq xmm4, [p_src + i_srcstride] + vpunpcklbw xmm0, xmm0, xmm4 + vmovq xmm1, [p_src + 2 * i_srcstride] + vpunpcklbw xmm4, xmm4, xmm1 + vinserti128 ymm0, ymm0, xmm4, 1 + vmovq xmm4, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpunpcklbw xmm1, xmm1, xmm4 + vmovq xmm6, [p_src] + vpunpcklbw xmm4, xmm4, xmm6 + vinserti128 ymm1, ymm1, xmm4, 1 +.width8_yloop: + vmovq xmm4, [p_src + i_srcstride] + vpunpcklbw xmm2, xmm6, xmm4 + vmovq xmm3, [p_src + 2 * i_srcstride] + vpunpcklbw xmm4, xmm4, xmm3 + vinserti128 ymm2, ymm2, xmm4, 1 + vbroadcasti128 ymm5, [pic(db20_128)] + AVX2_FilterVertical_16px ymm0, ymm1, ymm2, [pic(maddubsw_p1m5_256)], ymm5, [pic(maddubsw_m5p1_256)], ymm4 + vmovq xmm4, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpunpcklbw xmm3, xmm3, xmm4 + vmovq xmm6, [p_src] + vpunpcklbw xmm4, xmm4, xmm6 + vinserti128 ymm3, ymm3, xmm4, 1 + AVX2_FilterVertical_16px ymm1, ymm2, ymm3, [pic(maddubsw_p1m5_256)], ymm5, [pic(maddubsw_m5p1_256)], ymm4 + vpackuswb ymm0, ymm0, ymm1 + vmovlps [p_dst], xmm0 + vextracti128 xmm1, ymm0, 1 + vmovlps [p_dst + i_dststride], xmm1 + lea p_dst, [p_dst + 2 * i_dststride] + vmovhps [p_dst], xmm0 + vmovhps [p_dst + i_dststride], xmm1 + cmp i_height, 4 + jl .width8_done + lea p_dst, [p_dst + 2 * i_dststride] + vmovq xmm4, [p_src + i_srcstride] + vpunpcklbw xmm0, xmm6, xmm4 + jg .width8_height_ge8 + AVX2_FilterVertical_16px xmm2, xmm3, xmm0, [pic(maddubsw_p1m5_256)], xmm5, [pic(maddubsw_m5p1_256)], xmm4 + vpackuswb xmm2, xmm2, xmm2 + vmovlps [p_dst], xmm2 + jmp .width8_done +.width8_height_ge8: + vmovq xmm1, [p_src + 2 * i_srcstride] + vpunpcklbw xmm4, xmm4, xmm1 + vinserti128 ymm0, ymm0, xmm4, 1 + AVX2_FilterVertical_16px ymm2, ymm3, ymm0, [pic(maddubsw_p1m5_256)], ymm5, [pic(maddubsw_m5p1_256)], ymm4 + vmovq xmm4, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpunpcklbw xmm1, xmm1, xmm4 + vmovq xmm6, [p_src] + vpunpcklbw xmm4, xmm4, xmm6 + vinserti128 ymm1, ymm1, xmm4, 1 + AVX2_FilterVertical_16px ymm3, ymm0, ymm1, [pic(maddubsw_p1m5_256)], ymm5, [pic(maddubsw_m5p1_256)], ymm4 + vpackuswb ymm2, ymm2, ymm3 + vmovlps [p_dst], xmm2 + vextracti128 xmm3, ymm2, 1 + vmovlps [p_dst + i_dststride], xmm3 + lea p_dst, [p_dst + 2 * i_dststride] + vmovhps [p_dst], xmm2 + vmovhps [p_dst + i_dststride], xmm3 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 8 + jg .width8_yloop + jl .width8_done + vmovq xmm4, [p_src + i_srcstride] + vpunpcklbw xmm2, xmm6, xmm4 + AVX2_FilterVertical_16px xmm0, xmm1, xmm2, [pic(maddubsw_p1m5_256)], xmm5, [pic(maddubsw_m5p1_256)], xmm4 + vpackuswb xmm0, xmm0, xmm0 + vmovlps [p_dst], xmm0 +.width8_done: + vzeroupper + DEINIT_X86_32_PIC_KEEPDEF + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +.width16: + sub i_height, 1 + test i_height, 1 + jnz .width16_yloop_begin_even + vmovq xmm0, [p_src] + vpbroadcastq ymm1, [p_src + 8] + vpblendd ymm0, ymm0, ymm1, 11110000b + vmovq xmm1, [p_src + i_srcstride] + vpbroadcastq ymm2, [p_src + i_srcstride + 8] + vpblendd ymm1, ymm1, ymm2, 11110000b + vpunpcklbw ymm0, ymm0, ymm1 + vmovq xmm2, [p_src + 2 * i_srcstride] + vpbroadcastq ymm3, [p_src + 2 * i_srcstride + 8] + vpblendd ymm2, ymm2, ymm3, 11110000b + vmovq xmm3, [p_src + i_srcstride3] + vpbroadcastq ymm4, [p_src + i_srcstride3 + 8] + lea p_src, [p_src + 4 * i_srcstride] + vpblendd ymm3, ymm3, ymm4, 11110000b + vpunpcklbw ymm2, ymm2, ymm3 + vmovq xmm4, [p_src] + vpbroadcastq ymm5, [p_src + 8] + vpblendd ymm4, ymm4, ymm5, 11110000b + vmovq xmm5, [p_src + i_srcstride] + vpbroadcastq ymm6, [p_src + i_srcstride + 8] + lea p_src, [p_src + 2 * i_srcstride] + vpblendd ymm5, ymm5, ymm6, 11110000b + vpunpcklbw ymm4, ymm4, ymm5 + AVX2_FilterVertical_16px ymm0, ymm2, ymm4, [pic(maddubsw_p1m5_256)], [pic(db20_256)], [pic(maddubsw_m5p1_256)], ymm7 + vpackuswb ymm0, ymm0, ymm0 + vpermq ymm0, ymm0, 1000b + vmovdqa [p_dst], xmm0 + add p_dst, i_dststride + jmp .width16_yloop +.width16_yloop_begin_even: + vmovq xmm1, [p_src] + vpbroadcastq ymm2, [p_src + 8] + vpblendd ymm1, ymm1, ymm2, 11110000b + vmovq xmm2, [p_src + i_srcstride] + vpbroadcastq ymm3, [p_src + i_srcstride + 8] + vpblendd ymm2, ymm2, ymm3, 11110000b + vmovq xmm3, [p_src + 2 * i_srcstride] + vpbroadcastq ymm4, [p_src + 2 * i_srcstride + 8] + add p_src, i_srcstride3 + vpblendd ymm3, ymm3, ymm4, 11110000b + vpunpcklbw ymm2, ymm2, ymm3 + vmovq xmm4, [p_src] + vpbroadcastq ymm5, [p_src + 8] + vpblendd ymm4, ymm4, ymm5, 11110000b + vmovq xmm5, [p_src + i_srcstride] + vpbroadcastq ymm6, [p_src + i_srcstride + 8] + lea p_src, [p_src + 2 * i_srcstride] + vpblendd ymm5, ymm5, ymm6, 11110000b + vpunpcklbw ymm4, ymm4, ymm5 +.width16_yloop: + vmovq xmm6, [p_src] + vpbroadcastq ymm7, [p_src + 8] + vpblendd ymm6, ymm6, ymm7, 11110000b + AVX2_FilterVertical2_16px ymm1, ymm6, ymm2, ymm4, [pic(maddubsw_m5p20_256)], [pic(maddubsw_p20m5_256)], ymm0, ymm7 + vmovq xmm7, [p_src + i_srcstride] + vpbroadcastq ymm0, [p_src + i_srcstride + 8] + vpblendd ymm7, ymm7, ymm0, 11110000b + vpunpcklbw ymm6, ymm6, ymm7 + AVX2_FilterVertical_16px ymm2, ymm4, ymm6, [pic(maddubsw_p1m5_256)], [pic(db20_256)], [pic(maddubsw_m5p1_256)], ymm0 + vpackuswb ymm1, ymm1, ymm2 + vpermq ymm1, ymm1, 11011000b + vmovdqa [p_dst], xmm1 + vextracti128 [p_dst + i_dststride], ymm1, 1 + lea p_dst, [p_dst + 2 * i_dststride] + vmovq xmm0, [p_src + 2 * i_srcstride] + vpbroadcastq ymm1, [p_src + 2 * i_srcstride + 8] + vpblendd ymm0, ymm0, ymm1, 11110000b + AVX2_FilterVertical2_16px ymm3, ymm0, ymm4, ymm6, [pic(maddubsw_m5p20_256)], [pic(maddubsw_p20m5_256)], ymm2, ymm1 + vmovq xmm1, [p_src + i_srcstride3] + vpbroadcastq ymm2, [p_src + i_srcstride3 + 8] + lea p_src, [p_src + 4 * i_srcstride] + vpblendd ymm1, ymm1, ymm2, 11110000b + vpunpcklbw ymm0, ymm0, ymm1 + AVX2_FilterVertical_16px ymm4, ymm6, ymm0, [pic(maddubsw_p1m5_256)], [pic(db20_256)], [pic(maddubsw_m5p1_256)], ymm2 + vpackuswb ymm3, ymm3, ymm4 + vpermq ymm3, ymm3, 11011000b + vmovdqa [p_dst], xmm3 + vextracti128 [p_dst + i_dststride], ymm3, 1 + lea p_dst, [p_dst + 2 * i_dststride] + vmovq xmm2, [p_src] + vpbroadcastq ymm3, [p_src + 8] + vpblendd ymm2, ymm2, ymm3, 11110000b + AVX2_FilterVertical2_16px ymm5, ymm2, ymm6, ymm0, [pic(maddubsw_m5p20_256)], [pic(maddubsw_p20m5_256)], ymm4, ymm3 + vmovq xmm3, [p_src + i_srcstride] + vpbroadcastq ymm4, [p_src + i_srcstride + 8] + vpblendd ymm3, ymm3, ymm4, 11110000b + vpunpcklbw ymm2, ymm2, ymm3 + AVX2_FilterVertical_16px ymm6, ymm0, ymm2, [pic(maddubsw_p1m5_256)], [pic(db20_256)], [pic(maddubsw_m5p1_256)], ymm4 + vpackuswb ymm5, ymm5, ymm6 + vpermq ymm5, ymm5, 11011000b + vmovdqa [p_dst], xmm5 + vextracti128 [p_dst + i_dststride], ymm5, 1 + lea p_dst, [p_dst + 2 * i_dststride] + vmovq xmm4, [p_src + 2 * i_srcstride] + vpbroadcastq ymm5, [p_src + 2 * i_srcstride + 8] + vpblendd ymm4, ymm4, ymm5, 11110000b + AVX2_FilterVertical2_16px ymm7, ymm4, ymm0, ymm2, [pic(maddubsw_m5p20_256)], [pic(maddubsw_p20m5_256)], ymm6, ymm5 + vmovq xmm5, [p_src + i_srcstride3] + vpbroadcastq ymm6, [p_src + i_srcstride3 + 8] + lea p_src, [p_src + 4 * i_srcstride] + vpblendd ymm5, ymm5, ymm6, 11110000b + vpunpcklbw ymm4, ymm4, ymm5 + AVX2_FilterVertical_16px ymm0, ymm2, ymm4, [pic(maddubsw_p1m5_256)], [pic(db20_256)], [pic(maddubsw_m5p1_256)], ymm6 + vpackuswb ymm7, ymm7, ymm0 + vpermq ymm7, ymm7, 11011000b + vmovdqa [p_dst], xmm7 + vextracti128 [p_dst + i_dststride], ymm7, 1 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 8 + jg .width16_yloop + vzeroupper + DEINIT_X86_32_PIC + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret +%undef p_src +%undef i_srcstride +%undef i_srcstride3 +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height +%undef i_ycnt + + +;******************************************************************************* +; void McHorVer20_avx2(const uint8_t *pSrc, +; int iSrcStride, +; uint8_t *pDst, +; int iDstStride, +; int iWidth, +; int iHeight); +;******************************************************************************* + +WELS_EXTERN McHorVer20_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%define i_width r4 +%define i_height r5 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + vbroadcasti128 ymm4, [pic(shufb_32435465768798A9)] + vbroadcasti128 ymm5, [pic(shufb_011267784556ABBC)] + vbroadcasti128 ymm6, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + cmp i_width, 8 + je .width8 + jg .width16_yloop +%xdefine i_srcstride3 i_width +%undef i_width + lea i_srcstride3, [3 * i_srcstride] +.width4_yloop: + vmovdqu xmm0, [p_src - 2] + vmovdqu xmm1, [p_src + i_srcstride - 2] + vinserti128 ymm0, ymm0, [p_src + 2 * i_srcstride - 2], 1 + vinserti128 ymm1, ymm1, [p_src + i_srcstride3 - 2], 1 + lea p_src, [p_src + 4 * i_srcstride] + AVX2_FilterHorizontal_4x4px ymm0, ymm1, ymm4, ymm5, ymm6, ymm2, ymm3 + vpackuswb ymm0, ymm0, ymm0 + vmovd [p_dst], xmm0 + vpsrlq xmm1, xmm0, 32 + vmovd [p_dst + i_dststride], xmm1 + lea p_dst, [p_dst + 2 * i_dststride] + vextracti128 xmm0, ymm0, 1 + vmovd [p_dst], xmm0 + vpsrlq xmm1, xmm0, 32 + vmovd [p_dst + i_dststride], xmm1 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 4 + jg .width4_yloop + vzeroupper + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +.width8: + lea i_srcstride3, [3 * i_srcstride] +.width8_yloop: + vmovdqu xmm0, [p_src - 2] + vmovdqu xmm1, [p_src + i_srcstride - 2] + vinserti128 ymm0, ymm0, [p_src + 2 * i_srcstride - 2], 1 + vinserti128 ymm1, ymm1, [p_src + i_srcstride3 - 2], 1 + lea p_src, [p_src + 4 * i_srcstride] + AVX2_FilterHorizontal_16px ymm0, ymm4, ymm5, ymm6, ymm2, ymm3 + AVX2_FilterHorizontal_16px ymm1, ymm4, ymm5, ymm6, ymm2, ymm3 + vpackuswb ymm0, ymm0, ymm1 + vmovlps [p_dst], xmm0 + vmovhps [p_dst + i_dststride], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + vextracti128 xmm0, ymm0, 1 + vmovlps [p_dst], xmm0 + vmovhps [p_dst + i_dststride], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 4 + jg .width8_yloop + vzeroupper + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +%undef i_srcstride3 +.width16_yloop: + vmovdqu xmm0, [p_src - 2] + vmovdqu xmm1, [p_src + 6] + vinserti128 ymm0, ymm0, [p_src + i_srcstride - 2], 1 + vinserti128 ymm1, ymm1, [p_src + i_srcstride + 6], 1 + lea p_src, [p_src + 2 * i_srcstride] + AVX2_FilterHorizontal_16px ymm0, ymm4, ymm5, ymm6, ymm2, ymm3 + AVX2_FilterHorizontal_16px ymm1, ymm4, ymm5, ymm6, ymm2, ymm3 + vpackuswb ymm0, ymm0, ymm1 + vmovdqa [p_dst], xmm0 + vextracti128 [p_dst + i_dststride], ymm0, 1 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 2 + jg .width16_yloop + vzeroupper + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height + + +;*********************************************************************** +; void McHorVer20Width5Or9Or17_avx2(const uint8_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer20Width5Or9Or17_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%define i_width r4 +%define i_height r5 + %assign push_num 0 + INIT_X86_32_PIC r6 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + vbroadcasti128 ymm5, [pic(shufb_32435465768798A9)] + vbroadcasti128 ymm6, [pic(shufb_011267784556ABBC)] + vbroadcasti128 ymm7, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + cmp i_width, 9 + je .width9 + jg .width17 +.width5_yloop: + vmovdqu xmm0, [p_src - 2] + vinserti128 ymm0, ymm0, [p_src + i_srcstride - 2], 1 + lea p_src, [p_src + 2 * i_srcstride] + AVX2_FilterHorizontal_16px ymm0, ymm5, ymm6, ymm7, ymm1, ymm2 + vpackuswb ymm0, ymm0, ymm0 + vpsrlq xmm1, xmm0, 8 + vmovd [p_dst + 1], xmm1 + vmovd [p_dst], xmm0 + add p_dst, i_dststride + vextracti128 xmm0, ymm0, 1 + vpsrlq xmm1, xmm0, 8 + vmovd [p_dst + 1], xmm1 + vmovd [p_dst], xmm0 + add p_dst, i_dststride + sub i_height, 2 + jg .width5_yloop + vzeroupper + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +.width9: +%xdefine i_srcstride3 i_width +%undef i_width + lea i_srcstride3, [3 * i_srcstride] +.width9_yloop: + vmovdqu xmm0, [p_src - 2] + vmovdqu xmm4, [p_src + i_srcstride - 2] + vinserti128 ymm0, ymm0, [p_src + 2 * i_srcstride - 2], 1 + vinserti128 ymm4, ymm4, [p_src + i_srcstride3 - 2], 1 + lea p_src, [p_src + 4 * i_srcstride] + vpunpckhqdq ymm3, ymm0, ymm4 + AVX2_FilterHorizontal_4px ymm3, ymm2 + AVX2_FilterHorizontal_16px ymm0, ymm5, ymm6, ymm7, ymm1, ymm2 + vpackuswb ymm3, ymm3, ymm0 + vmovd [p_dst + 5], xmm3 + vmovhps [p_dst], xmm3 + add p_dst, i_dststride + AVX2_FilterHorizontal_16px ymm4, ymm5, ymm6, ymm7, ymm1, ymm2 + vpackuswb ymm4, ymm4, ymm4 + vpsrlq xmm2, xmm3, 32 + vmovd [p_dst + 5], xmm2 + vmovlps [p_dst], xmm4 + add p_dst, i_dststride + vextracti128 xmm3, ymm3, 1 + vextracti128 xmm4, ymm4, 1 + vmovd [p_dst + 5], xmm3 + vmovhps [p_dst], xmm3 + add p_dst, i_dststride + vpsrlq xmm2, xmm3, 32 + vmovd [p_dst + 5], xmm2 + vmovlps [p_dst], xmm4 + add p_dst, i_dststride + sub i_height, 4 + jg .width9_yloop + vzeroupper + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC_KEEPDEF + ret +.width17: + lea i_srcstride3, [3 * i_srcstride] +.width17_yloop: + vmovdqu xmm0, [p_src - 2] + vmovdqu xmm3, [p_src + 6] + vinserti128 ymm0, ymm0, [p_src + i_srcstride - 2], 1 + vinserti128 ymm3, ymm3, [p_src + i_srcstride + 6], 1 + vmovdqa ymm4, ymm3 + AVX2_FilterHorizontal_16px ymm0, ymm5, ymm6, ymm7, ymm1, ymm2 + AVX2_FilterHorizontal_16px ymm3, ymm5, ymm6, ymm7, ymm1, ymm2 + vpackuswb ymm0, ymm0, ymm3 + vmovdqu xmm1, [p_src + 2 * i_srcstride - 2] + vmovdqu xmm3, [p_src + 2 * i_srcstride + 6] + vinserti128 ymm1, ymm1, [p_src + i_srcstride3 - 2], 1 + vinserti128 ymm3, ymm3, [p_src + i_srcstride3 + 6], 1 + lea p_src, [p_src + 4 * i_srcstride] + vpunpckhqdq ymm4, ymm4, ymm3 + AVX2_FilterHorizontal_4px ymm4, ymm2 + vpackuswb ymm4, ymm4, ymm4 + vmovd [p_dst + 13], xmm4 + vmovdqa [p_dst], xmm0 + add p_dst, i_dststride + vextracti128 xmm2, ymm4, 1 + vmovd [p_dst + 13], xmm2 + vextracti128 [p_dst], ymm0, 1 + add p_dst, i_dststride + vpsrlq xmm4, xmm4, 32 + vmovd [p_dst + 13], xmm4 + AVX2_FilterHorizontal_16px ymm1, ymm5, ymm6, ymm7, ymm0, ymm4 + AVX2_FilterHorizontal_16px ymm3, ymm5, ymm6, ymm7, ymm0, ymm4 + vpackuswb ymm1, ymm1, ymm3 + vmovdqa [p_dst], xmm1 + add p_dst, i_dststride + vpsrlq xmm2, xmm2, 32 + vmovd [p_dst + 13], xmm2 + vextracti128 [p_dst], ymm1, 1 + add p_dst, i_dststride + sub i_height, 4 + jg .width17_yloop + vzeroupper + POP_XMM + LOAD_6_PARA_POP + DEINIT_X86_32_PIC + ret +%undef i_srcstride3 +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height + + +;******************************************************************************* +; void McHorVer20Width4U8ToS16_avx2(const uint8_t *pSrc, +; int iSrcStride, +; int16_t *pDst, +; int iHeight); +;******************************************************************************* + +WELS_EXTERN McHorVer20Width4U8ToS16_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_height r3 +%define i_srcstride3 r4 +%define i_dststride 8 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + INIT_X86_32_PIC r5 + LOAD_4_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + sub p_src, i_srcstride + sub p_src, i_srcstride + lea i_srcstride3, [3 * i_srcstride] + vbroadcasti128 ymm4, [pic(shufb_32435465768798A9)] + vbroadcasti128 ymm5, [pic(shufb_011267784556ABBC)] + vbroadcasti128 ymm6, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + sub i_height, 3 +.yloop: + vmovdqu xmm0, [p_src - 2] + vmovdqu xmm1, [p_src + i_srcstride - 2] + vinserti128 ymm0, ymm0, [p_src + 2 * i_srcstride - 2], 1 + vinserti128 ymm1, ymm1, [p_src + i_srcstride3 - 2], 1 + lea p_src, [p_src + 4 * i_srcstride] + AVX2_FilterHorizontalbw_4x4px ymm0, ymm1, ymm4, ymm5, ymm6, ymm2, ymm3 + vmovdqa [p_dst], ymm0 + add p_dst, 4 * i_dststride + sub i_height, 4 + jg .yloop + ; Height % 4 remaining single. + vmovdqu xmm0, [p_src - 2] + AVX2_FilterHorizontalbw_16px xmm0, xmm4, xmm5, xmm6, xmm2, xmm3 + vmovlps [p_dst], xmm0 + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r4 +%endif + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_height +%undef i_srcstride3 +%undef i_dststride + + +;*********************************************************************** +; void McHorVer02Width4S16ToU8_avx2(const int16_t *pSrc, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02Width4S16ToU8_avx2 +%define p_src r0 +%define p_dst r1 +%define i_dststride r2 +%define i_height r3 +%define i_dststride3 r4 +%define i_srcstride 8 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + INIT_X86_32_PIC r5 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + lea i_dststride3, [3 * i_dststride] + vmovdqu ymm0, [p_src + 0 * i_srcstride] + vmovdqu ymm1, [p_src + 1 * i_srcstride] + vmovdqu ymm2, [p_src + 2 * i_srcstride] + vmovdqu ymm3, [p_src + 3 * i_srcstride] + vmovdqu ymm4, [p_src + 4 * i_srcstride] + vmovdqu ymm5, [p_src + 5 * i_srcstride] + vmovdqu ymm6, [p_src + 6 * i_srcstride] + AVX2_FilterVerticalw_16px ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm7 + vpackuswb ymm0, ymm0, ymm0 + vmovd [p_dst], xmm0 + vpsrlq xmm7, xmm0, 32 + vmovd [p_dst + i_dststride], xmm7 + vextracti128 xmm0, ymm0, 1 + vmovd [p_dst + 2 * i_dststride], xmm0 + vpsrlq xmm7, xmm0, 32 + vmovd [p_dst + i_dststride3], xmm7 + cmp i_height, 4 + jle .done + lea p_dst, [p_dst + 4 * i_dststride] + vmovdqu ymm7, [p_src + 7 * i_srcstride] + vmovdqu ymm0, [p_src + 8 * i_srcstride] + vmovdqu ymm1, [p_src + 9 * i_srcstride] + AVX2_FilterVerticalw_16px ymm4, ymm5, ymm6, ymm7, ymm0, ymm1, ymm3 + vpackuswb ymm4, ymm4, ymm4 + vmovd [p_dst], xmm4 + vpsrlq xmm3, xmm4, 32 + vmovd [p_dst + i_dststride], xmm3 + vextracti128 xmm4, ymm4, 1 + vmovd [p_dst + 2 * i_dststride], xmm4 + vpsrlq xmm3, xmm4, 32 + vmovd [p_dst + i_dststride3], xmm3 +.done: + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r4 +%endif + ret +%undef p_src +%undef p_dst +%undef i_dststride +%undef i_height +%undef i_srcstride +%undef i_dststride3 + + +;******************************************************************************* +; void McHorVer20Width8U8ToS16_avx2(const uint8_t *pSrc, +; int iSrcStride, +; int16_t *pDst, +; int iHeight); +;******************************************************************************* + +WELS_EXTERN McHorVer20Width8U8ToS16_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_height r3 +%define i_dststride 16 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 6 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + sub p_src, i_srcstride + sub p_src, i_srcstride + vbroadcasti128 ymm3, [pic(shufb_32435465768798A9)] + vbroadcasti128 ymm4, [pic(shufb_011267784556ABBC)] + vbroadcasti128 ymm5, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + sub i_height, 1 +.yloop: + vmovdqu xmm0, [p_src - 2] + vinserti128 ymm0, ymm0, [p_src + i_srcstride - 2], 1 + lea p_src, [p_src + 2 * i_srcstride] + AVX2_FilterHorizontalbw_16px ymm0, ymm3, ymm4, ymm5, ymm1, ymm2 + vmovdqu [p_dst], ymm0 + add p_dst, 2 * i_dststride + sub i_height, 2 + jg .yloop + jl .done + vmovdqu xmm0, [p_src - 2] + AVX2_FilterHorizontalbw_16px xmm0, xmm3, xmm4, xmm5, xmm1, xmm2 + vmovdqa [p_dst], xmm0 +.done: + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_height +%undef i_dststride + + +;*********************************************************************** +; void McHorVer02Width5S16ToU8_avx2(const int16_t *pSrc, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02Width5S16ToU8_avx2 +%define p_src r0 +%define p_dst r1 +%define i_dststride r2 +%define i_height r3 +%define i_srcstride 16 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + vmovdqu ymm0, [p_src + 0 * i_srcstride] + vmovdqu ymm2, [p_src + 2 * i_srcstride] + vmovdqu ymm4, [p_src + 4 * i_srcstride] + vmovdqu ymm6, [p_src + 6 * i_srcstride] + vperm2i128 ymm1, ymm0, ymm2, 00100001b + vperm2i128 ymm3, ymm2, ymm4, 00100001b + vperm2i128 ymm5, ymm4, ymm6, 00100001b + AVX2_FilterVerticalw_16px ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm7 + vpackuswb ymm0, ymm0, ymm0 + vpsrlq xmm7, xmm0, 8 + vmovd [p_dst + 1], xmm7 + vmovd [p_dst], xmm0 + add p_dst, i_dststride + vextracti128 xmm0, ymm0, 1 + vpsrlq xmm7, xmm0, 8 + vmovd [p_dst + 1], xmm7 + vmovd [p_dst], xmm0 + add p_dst, i_dststride + vmovdqu ymm7, [p_src + 7 * i_srcstride] + vmovdqu ymm0, [p_src + 8 * i_srcstride] + AVX2_FilterVerticalw_16px ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm1 + vpackuswb ymm2, ymm2, ymm2 + vpsrlq xmm1, xmm2, 8 + vmovd [p_dst + 1], xmm1 + vmovd [p_dst], xmm2 + add p_dst, i_dststride + vextracti128 xmm2, ymm2, 1 + vpsrlq xmm1, xmm2, 8 + vmovd [p_dst + 1], xmm1 + vmovd [p_dst], xmm2 + add p_dst, i_dststride + vmovdqu ymm1, [p_src + 9 * i_srcstride] + vmovdqu ymm2, [p_src + 10 * i_srcstride] + AVX2_FilterVerticalw_16px ymm4, ymm5, ymm6, ymm7, ymm0, ymm1, ymm3 + vpackuswb ymm4, ymm4, ymm4 + vpsrlq xmm3, xmm4, 8 + vmovd [p_dst + 1], xmm3 + vmovd [p_dst], xmm4 + cmp i_height, 5 + jle .done + add p_dst, i_dststride + vextracti128 xmm4, ymm4, 1 + vpsrlq xmm3, xmm4, 8 + vmovd [p_dst + 1], xmm3 + vmovd [p_dst], xmm4 + add p_dst, i_dststride + vmovdqu ymm3, [p_src + 11 * i_srcstride] + vmovdqu xmm4, [p_src + 12 * i_srcstride] + AVX2_FilterVerticalw_16px ymm6, ymm7, ymm0, ymm1, ymm2, ymm3, ymm5 + vpackuswb ymm6, ymm6, ymm6 + vpsrlq xmm5, xmm6, 8 + vmovd [p_dst + 1], xmm5 + vmovd [p_dst], xmm6 + add p_dst, i_dststride + vextracti128 xmm6, ymm6, 1 + vpsrlq xmm5, xmm6, 8 + vmovd [p_dst + 1], xmm5 + vmovd [p_dst], xmm6 + add p_dst, i_dststride + vmovdqu xmm5, [p_src + 13 * i_srcstride] + AVX2_FilterVerticalw_16px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm7 + vpackuswb xmm0, xmm0, xmm0 + vpsrlq xmm7, xmm0, 8 + vmovd [p_dst + 1], xmm7 + vmovd [p_dst], xmm0 +.done: + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef p_dst +%undef i_dststride +%undef i_height +%undef i_srcstride + + +;*********************************************************************** +; void McHorVer02Width8S16ToU8_avx2(const int16_t *pSrc, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02Width8S16ToU8_avx2 +%define p_src r0 +%define p_dst r1 +%define i_dststride r2 +%define i_height r3 +%define i_dststride3 r4 +%define i_srcstride 16 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + INIT_X86_32_PIC r5 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + lea i_dststride3, [3 * i_dststride] + vmovdqa ymm0, [p_src + 0 * i_srcstride] + vmovdqa ymm2, [p_src + 2 * i_srcstride] + vmovdqa ymm4, [p_src + 4 * i_srcstride] + vperm2i128 ymm1, ymm0, ymm2, 00100001b + vperm2i128 ymm3, ymm2, ymm4, 00100001b +.yloop: + vmovdqa ymm6, [p_src + 6 * i_srcstride] + vperm2i128 ymm5, ymm4, ymm6, 00100001b + AVX2_FilterVerticalw_16px ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm7 + vmovdqu ymm7, [p_src + 7 * i_srcstride] + AVX2_FilterVerticalw_16px ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm1 + vpackuswb ymm1, ymm0, ymm2 + vmovdqa ymm0, [p_src + 8 * i_srcstride] + vextracti128 xmm2, ymm1, 1 + vmovlps [p_dst], xmm1 + vmovlps [p_dst + i_dststride], xmm2 + vmovhps [p_dst + 2 * i_dststride], xmm1 + vmovhps [p_dst + i_dststride3], xmm2 + cmp i_height, 4 + jle .done + lea p_dst, [p_dst + 4 * i_dststride] + vmovdqu ymm1, [p_src + 9 * i_srcstride] + vmovdqa ymm2, [p_src + 10 * i_srcstride] + AVX2_FilterVerticalw_16px ymm4, ymm5, ymm6, ymm7, ymm0, ymm1, ymm3 + vmovdqu ymm3, [p_src + 11 * i_srcstride] + AVX2_FilterVerticalw_16px ymm6, ymm7, ymm0, ymm1, ymm2, ymm3, ymm5 + vpackuswb ymm5, ymm4, ymm6 + vmovdqa ymm4, [p_src + 12 * i_srcstride] + add p_src, 8 * i_srcstride + vextracti128 xmm6, ymm5, 1 + vmovlps [p_dst], xmm5 + vmovlps [p_dst + i_dststride], xmm6 + vmovhps [p_dst + 2 * i_dststride], xmm5 + vmovhps [p_dst + i_dststride3], xmm6 + lea p_dst, [p_dst + 4 * i_dststride] + sub i_height, 8 + jg .yloop +.done: + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r4 +%endif + ret +%undef p_src +%undef p_dst +%undef i_dststride +%undef i_height +%undef i_dststride3 +%undef i_srcstride + + +;******************************************************************************* +; void McHorVer20Width16U8ToS16_avx2(const uint8_t *pSrc, +; int32_t iSrcStride, +; int16_t *pDst, +; int32_t iHeight); +;******************************************************************************* + +WELS_EXTERN McHorVer20Width16U8ToS16_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_height r3 +%define i_dststride 32 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + sub p_src, i_srcstride + sub p_src, i_srcstride + vbroadcasti128 ymm4, [pic(shufb_32435465768798A9)] + vbroadcasti128 ymm5, [pic(shufb_011267784556ABBC)] + vbroadcasti128 ymm6, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + sub i_height, 1 +.yloop: + vmovdqu xmm0, [p_src - 2] + vinserti128 ymm0, ymm0, [p_src + 6], 1 + vmovdqu xmm1, [p_src + i_srcstride - 2] + vinserti128 ymm1, ymm1, [p_src + i_srcstride + 6], 1 + lea p_src, [p_src + 2 * i_srcstride] + AVX2_FilterHorizontalbw_16px ymm0, ymm4, ymm5, ymm6, ymm2, ymm3 + vmovdqa [p_dst], ymm0 + AVX2_FilterHorizontalbw_16px ymm1, ymm4, ymm5, ymm6, ymm2, ymm3 + vmovdqa [p_dst + i_dststride], ymm1 + add p_dst, 2 * i_dststride + sub i_height, 2 + jg .yloop + jl .done + vmovdqu xmm0, [p_src - 2] + vinserti128 ymm0, ymm0, [p_src + 6], 1 + AVX2_FilterHorizontalbw_16px ymm0, ymm4, ymm5, ymm6, ymm1, ymm2 + vmovdqa [p_dst], ymm0 +.done: + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_height +%undef i_dststride + + +;*********************************************************************** +; void McHorVer02Width9S16ToU8_avx2(const int16_t *pSrc, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02Width9S16ToU8_avx2 +%define p_src r0 +%define p_dst r1 +%define i_dststride r2 +%define i_height r3 +%define i_srcstride 32 + %assign push_num 0 + INIT_X86_32_PIC r4 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + vmovdqa ymm0, [p_src + 0 * i_srcstride] + vmovdqa ymm1, [p_src + 1 * i_srcstride] + vmovdqa ymm2, [p_src + 2 * i_srcstride] + vmovdqa ymm3, [p_src + 3 * i_srcstride] + vmovdqa ymm4, [p_src + 4 * i_srcstride] + sub i_height, 1 +.height_loop: + vmovdqa ymm5, [p_src + 5 * i_srcstride] + AVX2_FilterVerticalw_16px ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6 + vmovdqa ymm6, [p_src + 6 * i_srcstride] + AVX2_FilterVerticalw_16px ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7 + vmovdqa ymm7, [p_src + 7 * i_srcstride] + vpackuswb ymm0, ymm0, ymm1 + vextracti128 xmm1, ymm0, 1 + vpsllq xmm1, xmm1, 56 + vmovlps [p_dst + 1], xmm1 + vmovlps [p_dst], xmm0 + add p_dst, i_dststride + vmovhps [p_dst + 1], xmm1 + vmovhps [p_dst], xmm0 + add p_dst, i_dststride + AVX2_FilterVerticalw_16px ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm0 + vmovdqa ymm0, [p_src + 8 * i_srcstride] + AVX2_FilterVerticalw_16px ymm3, ymm4, ymm5, ymm6, ymm7, ymm0, ymm1 + vpackuswb ymm2, ymm2, ymm3 + vextracti128 xmm3, ymm2, 1 + vpsllq xmm3, xmm3, 56 + vmovlps [p_dst + 1], xmm3 + vmovlps [p_dst], xmm2 + add p_dst, i_dststride + vmovhps [p_dst + 1], xmm3 + vmovhps [p_dst], xmm2 + add p_dst, i_dststride + vmovdqa ymm1, [p_src + 9 * i_srcstride] + AVX2_FilterVerticalw_16px ymm4, ymm5, ymm6, ymm7, ymm0, ymm1, ymm2 + vmovdqa ymm2, [p_src + 10 * i_srcstride] + AVX2_FilterVerticalw_16px ymm5, ymm6, ymm7, ymm0, ymm1, ymm2, ymm3 + vmovdqa ymm3, [p_src + 11 * i_srcstride] + vpackuswb ymm4, ymm4, ymm5 + vextracti128 xmm5, ymm4, 1 + vpsllq xmm5, xmm5, 56 + vmovlps [p_dst + 1], xmm5 + vmovlps [p_dst], xmm4 + cmp i_height, 4 + jle .done + add p_dst, i_dststride + vmovhps [p_dst + 1], xmm5 + vmovhps [p_dst], xmm4 + add p_dst, i_dststride + AVX2_FilterVerticalw_16px ymm6, ymm7, ymm0, ymm1, ymm2, ymm3, ymm4 + vmovdqa ymm4, [p_src + 12 * i_srcstride] + add p_src, 8 * i_srcstride + AVX2_FilterVerticalw_16px ymm7, ymm0, ymm1, ymm2, ymm3, ymm4, ymm5 + vpackuswb ymm6, ymm6, ymm7 + vextracti128 xmm7, ymm6, 1 + vpsllq xmm7, xmm7, 56 + vmovlps [p_dst + 1], xmm7 + vmovlps [p_dst], xmm6 + add p_dst, i_dststride + vmovhps [p_dst + 1], xmm7 + vmovhps [p_dst], xmm6 + add p_dst, i_dststride + sub i_height, 8 + jg .height_loop + vmovdqa ymm5, [p_src + 5 * i_srcstride] + AVX2_FilterVerticalw_16px ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6 + vpackuswb ymm0, ymm0, ymm0 + vextracti128 xmm1, ymm0, 1 + vpsllq xmm1, xmm1, 56 + vmovlps [p_dst + 1], xmm1 + vmovlps [p_dst], xmm0 +.done: + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_height + + +;******************************************************************************* +; void McHorVer20Width17U8ToS16_avx2(const uint8_t *pSrc, +; int32_t iSrcStride, +; int16_t *pDst, +; int32_t iHeight); +;******************************************************************************* + +WELS_EXTERN McHorVer20Width17U8ToS16_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_height r3 +%define i_srcstride3 r4 +%define i_dststride 64 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + INIT_X86_32_PIC r5 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + sub p_src, i_srcstride + sub p_src, i_srcstride + lea i_srcstride3, [3 * i_srcstride] + vbroadcasti128 ymm5, [pic(shufb_32435465768798A9)] + vbroadcasti128 ymm6, [pic(shufb_011267784556ABBC)] + vbroadcasti128 ymm7, [pic(maddubsw_p1m5_p1m5_m5p1_m5p1_128)] + sub i_height, 3 +.yloop: + vmovdqu xmm0, [p_src - 2] + vmovdqu xmm3, [p_src + 6] + vinserti128 ymm0, ymm0, [p_src + i_srcstride - 2], 1 + vinserti128 ymm3, ymm3, [p_src + i_srcstride + 6], 1 + vmovdqa ymm4, ymm3 + AVX2_FilterHorizontalbw_16px ymm0, ymm5, ymm6, ymm7, ymm1, ymm2 + vmovdqa [p_dst], xmm0 + vextracti128 [p_dst + i_dststride], ymm0, 1 + AVX2_FilterHorizontalbw_16px ymm3, ymm5, ymm6, ymm7, ymm1, ymm2 + vmovdqu xmm1, [p_src + 2 * i_srcstride - 2] + vmovdqu xmm0, [p_src + 2 * i_srcstride + 6] + vinserti128 ymm1, ymm1, [p_src + i_srcstride3 - 2], 1 + vinserti128 ymm0, ymm0, [p_src + i_srcstride3 + 6], 1 + lea p_src, [p_src + 4 * i_srcstride] + vpunpckhqdq ymm4, ymm4, ymm0 + AVX2_FilterHorizontalbw_4px ymm4, [pic(dwm32768_256)], ymm2 + vmovlps [p_dst + 26], xmm4 + vmovdqa [p_dst + 16], xmm3 + vextracti128 xmm2, ymm4, 1 + vmovlps [p_dst + i_dststride + 26], xmm2 + vextracti128 [p_dst + i_dststride + 16], ymm3, 1 + vmovhps [p_dst + 2 * i_dststride + 26], xmm4 + AVX2_FilterHorizontalbw_16px ymm1, ymm5, ymm6, ymm7, ymm3, ymm4 + vmovdqa [p_dst + 2 * i_dststride], xmm1 + AVX2_FilterHorizontalbw_16px ymm0, ymm5, ymm6, ymm7, ymm3, ymm4 + vmovdqa [p_dst + 2 * i_dststride + 16], xmm0 + vextracti128 [p_dst + 3 * i_dststride], ymm1, 1 + vmovhps [p_dst + 3 * i_dststride + 26], xmm2 + vextracti128 [p_dst + 3 * i_dststride + 16], ymm0, 1 + add p_dst, 4 * i_dststride + sub i_height, 4 + jg .yloop + ; Handle remaining 2 lines after 4x unrolled loop. + vmovdqu xmm0, [p_src - 2] + vinserti128 ymm0, ymm0, [p_src + 6], 1 + vmovdqu xmm3, [p_src + i_srcstride - 2] + vinserti128 ymm3, ymm3, [p_src + i_srcstride + 6], 1 + vpunpckhqdq ymm4, ymm0, ymm3 + AVX2_FilterHorizontalbw_4px ymm4, [pic(dwm32768_256)], ymm2 + AVX2_FilterHorizontalbw_16px ymm0, ymm5, ymm6, ymm7, ymm1, ymm2 + AVX2_FilterHorizontalbw_16px ymm3, ymm5, ymm6, ymm7, ymm1, ymm2 + vextracti128 xmm4, ymm4, 1 + vmovlps [p_dst + 26], xmm4 + vmovdqa [p_dst], ymm0 + vmovhps [p_dst + i_dststride + 26], xmm4 + vmovdqa [p_dst + i_dststride], ymm3 + vzeroupper + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r4 +%endif + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_height +%undef i_srcstride3 + + +;*********************************************************************** +; void McHorVer02Width16Or17S16ToU8_avx2(const int16_t *pSrc, +; int32_t iSrcStride, +; uint8_t *pDst, +; int32_t iDstStride, +; int32_t iWidth, +; int32_t iHeight); +;*********************************************************************** + +WELS_EXTERN McHorVer02Width16Or17S16ToU8_avx2 +%define p_src r0 +%define i_srcstride r1 +%define p_dst r2 +%define i_dststride r3 +%ifdef X86_32_PICASM +%define i_width dword arg5 +%else +%define i_width r4 +%endif +%define i_height r5 +%define i_srcstride3 r6 + %assign push_num 0 +%ifdef X86_32 + push r6 + %assign push_num 1 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + INIT_X86_32_PIC_NOPRESERVE r4 + sub i_height, 1 + lea i_srcstride3, [3 * i_srcstride] + test i_width, 1 + jz .align_begin + push i_height + push p_src + push p_dst + %assign push_num push_num + 3 +%ifdef X86_32_PICASM + add p_src, i_width + add p_src, i_width + sub p_src, 2 +%else + lea p_src, [p_src + 2 * i_width - 2] +%endif + add p_dst, i_width + vmovd xmm0, [p_src] + vpunpcklwd xmm0, xmm0, [p_src + i_srcstride] + vmovd xmm1, [p_src + 2 * i_srcstride] + add p_src, i_srcstride3 + vpunpcklwd xmm1, xmm1, [p_src] + vpunpckldq xmm0, xmm0, xmm1 + vmovd xmm1, [p_src + i_srcstride] + vpunpcklwd xmm1, xmm1, [p_src + 2 * i_srcstride] + vmovd xmm2, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpunpcklwd xmm2, xmm2, [p_src] + vpunpckldq xmm1, xmm1, xmm2 + vpunpcklqdq xmm0, xmm0, xmm1 +.height_loop_unalign: + vmovd xmm1, [p_src + i_srcstride] + vpalignr xmm1, xmm1, xmm0, 2 + vmovd xmm2, [p_src + 2 * i_srcstride] + vpalignr xmm2, xmm2, xmm1, 2 + vmovd xmm3, [p_src + i_srcstride3] + vpalignr xmm3, xmm3, xmm2, 2 + lea p_src, [p_src + 4 * i_srcstride] + vmovd xmm4, [p_src] + vpalignr xmm4, xmm4, xmm3, 2 + vmovd xmm5, [p_src + i_srcstride] + vpalignr xmm5, xmm5, xmm4, 2 + AVX2_FilterVerticalw_16px xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm7 + vpackuswb xmm0, xmm0, xmm0 + vpslld xmm6, xmm0, 24 + vmovd [p_dst - 4], xmm6 + vmovlps [p_dst + 4 * i_dststride - 8], xmm6 + add p_dst, i_dststride + vpslld xmm6, xmm0, 16 + vmovd [p_dst - 4], xmm6 + vmovlps [p_dst + 4 * i_dststride - 8], xmm6 + add p_dst, i_dststride + vpslld xmm6, xmm0, 8 + vmovd [p_dst - 4], xmm6 + vmovd [p_dst + i_dststride - 4], xmm0 + lea p_dst, [p_dst + 4 * i_dststride] + vmovlps [p_dst - 8], xmm6 + vmovlps [p_dst + i_dststride - 8], xmm0 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 8 + jle .height_loop_unalign_exit + vmovd xmm1, [p_src + 2 * i_srcstride] + vpalignr xmm1, xmm1, xmm5, 2 + vmovd xmm0, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpunpcklwd xmm0, xmm0, [p_src] + vpalignr xmm0, xmm0, xmm1, 4 + jmp .height_loop_unalign +.height_loop_unalign_exit: + vpbroadcastq xmm6, [p_src + 2 * i_srcstride - 6] + AVX2_FilterVerticalw_16px xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + vpackuswb xmm1, xmm1, xmm1 + vmovlps [p_dst - 8], xmm1 + pop p_dst + pop p_src + pop i_height + %assign push_num push_num - 3 +.align_begin: + vmovdqa ymm0, [p_src] + vmovdqa ymm1, [p_src + i_srcstride] + vmovdqa ymm2, [p_src + 2 * i_srcstride] + vmovdqa ymm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vmovdqa ymm4, [p_src] +.height_loop: + vmovdqa ymm5, [p_src + i_srcstride] + AVX2_FilterVerticalw_16px ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6 + vmovdqa ymm6, [p_src + 2 * i_srcstride] + AVX2_FilterVerticalw_16px ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7 + vmovdqa ymm7, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpackuswb ymm0, ymm0, ymm1 + vpermq ymm0, ymm0, 11011000b + vmovdqa [p_dst], xmm0 + vextracti128 [p_dst + i_dststride], ymm0, 1 + lea p_dst, [p_dst + 2 * i_dststride] + AVX2_FilterVerticalw_16px ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm0 + vmovdqa ymm0, [p_src] + AVX2_FilterVerticalw_16px ymm3, ymm4, ymm5, ymm6, ymm7, ymm0, ymm1 + vpackuswb ymm2, ymm2, ymm3 + vpermq ymm2, ymm2, 11011000b + vmovdqa [p_dst], xmm2 + vextracti128 [p_dst + i_dststride], ymm2, 1 + lea p_dst, [p_dst + 2 * i_dststride] + vmovdqa ymm1, [p_src + i_srcstride] + AVX2_FilterVerticalw_16px ymm4, ymm5, ymm6, ymm7, ymm0, ymm1, ymm2 + vmovdqa ymm2, [p_src + 2 * i_srcstride] + AVX2_FilterVerticalw_16px ymm5, ymm6, ymm7, ymm0, ymm1, ymm2, ymm3 + vmovdqa ymm3, [p_src + i_srcstride3] + lea p_src, [p_src + 4 * i_srcstride] + vpackuswb ymm4, ymm4, ymm5 + vpermq ymm4, ymm4, 11011000b + vmovdqa [p_dst], xmm4 + vextracti128 [p_dst + i_dststride], ymm4, 1 + lea p_dst, [p_dst + 2 * i_dststride] + AVX2_FilterVerticalw_16px ymm6, ymm7, ymm0, ymm1, ymm2, ymm3, ymm4 + vmovdqa ymm4, [p_src] + AVX2_FilterVerticalw_16px ymm7, ymm0, ymm1, ymm2, ymm3, ymm4, ymm5 + vpackuswb ymm6, ymm6, ymm7 + vpermq ymm6, ymm6, 11011000b + vmovdqa [p_dst], xmm6 + vextracti128 [p_dst + i_dststride], ymm6, 1 + lea p_dst, [p_dst + 2 * i_dststride] + sub i_height, 8 + jg .height_loop + jl .done + vmovdqa ymm5, [p_src + i_srcstride] + AVX2_FilterVerticalw_16px ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6 + vpackuswb ymm0, ymm0, ymm0 + vpermq ymm0, ymm0, 11011000b + vmovdqa [p_dst], xmm0 +.done: + vzeroupper + DEINIT_X86_32_PIC + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret +%undef p_src +%undef i_srcstride +%undef p_dst +%undef i_dststride +%undef i_width +%undef i_height +%undef i_srcstride3 + +%endif ; HAVE_AVX2 diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/satd_sad.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/satd_sad.asm new file mode 100644 index 000000000..3dfc532b4 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/satd_sad.asm @@ -0,0 +1,2734 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* satd_sad.asm +;* +;* Abstract +;* WelsSampleSatd4x4_sse2 +;* WelsSampleSatd8x8_sse2 +;* WelsSampleSatd16x8_sse2 +;* WelsSampleSatd8x16_sse2 +;* WelsSampleSatd16x16_sse2 +;* +;* WelsSampleSad16x8_sse2 +;* WelsSampleSad16x16_sse2 +;* +;* History +;* 8/5/2009 Created +;* 24/9/2009 modified +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + +;*********************************************************************** +; Data +;*********************************************************************** +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +align 16 +HSumSubDB1: db 1,1,1,1,1,1,1,1,1,-1,1,-1,1,-1,1,-1 +align 16 +HSumSubDW1: dw 1,-1,1,-1,1,-1,1,-1 +align 16 +PDW1: dw 1,1,1,1,1,1,1,1 +align 16 +PDQ2: dw 2,0,0,0,2,0,0,0 +align 16 +HSwapSumSubDB1: times 2 db 1, 1, 1, 1, 1, -1, 1, -1 + +;*********************************************************************** +; Code +;*********************************************************************** +SECTION .text + +;*********************************************************************** +; +;Pixel_satd_wxh_sse2 BEGIN +; +;*********************************************************************** +%macro MMX_DW_1_2REG 2 + pxor %1, %1 + pcmpeqw %2, %2 + psubw %1, %2 +%endmacro + +%macro SSE2_SumWHorizon1 2 + movdqa %2, %1 + psrldq %2, 8 + paddusw %1, %2 + movdqa %2, %1 + psrldq %2, 4 + paddusw %1, %2 + movdqa %2, %1 + psrldq %2, 2 + paddusw %1, %2 +%endmacro + +%macro SSE2_HDMTwo4x4 5 ;in: xmm1,xmm2,xmm3,xmm4 pOut: xmm4,xmm2,xmm1,xmm3 + SSE2_SumSub %1, %2, %5 + SSE2_SumSub %3, %4, %5 + SSE2_SumSub %2, %4, %5 + SSE2_SumSub %1, %3, %5 +%endmacro + +%macro SSE2_SumAbs4 7 + WELS_AbsW %1, %3 + WELS_AbsW %2, %3 + WELS_AbsW %4, %6 + WELS_AbsW %5, %6 + paddusw %1, %2 + paddusw %4, %5 + paddusw %7, %1 + paddusw %7, %4 +%endmacro + +%macro SSE2_SumWHorizon 3 + movhlps %2, %1 ; x2 = xx xx xx xx d7 d6 d5 d4 + paddw %1, %2 ; x1 = xx xx xx xx d37 d26 d15 d04 + punpcklwd %1, %3 ; x1 = d37 d26 d15 d04 + movhlps %2, %1 ; x2 = xxxx xxxx d37 d26 + paddd %1, %2 ; x1 = xxxx xxxx d1357 d0246 + pshuflw %2, %1, 0x4e ; x2 = xxxx xxxx d0246 d1357 + paddd %1, %2 ; x1 = xxxx xxxx xxxx d01234567 +%endmacro + +%macro SSE2_GetSatd8x8 0 + SSE2_LoadDiff8P xmm0,xmm4,xmm7,[r0],[r2] + SSE2_LoadDiff8P xmm1,xmm5,xmm7,[r0+r1],[r2+r3] + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_LoadDiff8P xmm2,xmm4,xmm7,[r0],[r2] + SSE2_LoadDiff8P xmm3,xmm5,xmm7,[r0+r1],[r2+r3] + + SSE2_HDMTwo4x4 xmm0,xmm1,xmm2,xmm3,xmm4 + SSE2_TransTwo4x4W xmm3,xmm1,xmm0,xmm2,xmm4 + SSE2_HDMTwo4x4 xmm3,xmm1,xmm2,xmm4,xmm5 + SSE2_SumAbs4 xmm4,xmm1,xmm0,xmm2,xmm3,xmm5,xmm6 + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_LoadDiff8P xmm0,xmm4,xmm7,[r0],[r2] + SSE2_LoadDiff8P xmm1,xmm5,xmm7,[r0+r1],[r2+r3] + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_LoadDiff8P xmm2,xmm4,xmm7,[r0],[r2] + SSE2_LoadDiff8P xmm3,xmm5,xmm7,[r0+r1],[r2+r3] + + SSE2_HDMTwo4x4 xmm0,xmm1,xmm2,xmm3,xmm4 + SSE2_TransTwo4x4W xmm3,xmm1,xmm0,xmm2,xmm4 + SSE2_HDMTwo4x4 xmm3,xmm1,xmm2,xmm4,xmm5 + SSE2_SumAbs4 xmm4,xmm1,xmm0,xmm2,xmm3,xmm5,xmm6 +%endmacro + +;*********************************************************************** +; +;int32_t WelsSampleSatd4x4_sse2( uint8_t *, int32_t, uint8_t *, int32_t ); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatd4x4_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + movd xmm0, [r0] + movd xmm1, [r0+r1] + lea r0 , [r0+2*r1] + movd xmm2, [r0] + movd xmm3, [r0+r1] + punpckldq xmm0, xmm2 + punpckldq xmm1, xmm3 + + movd xmm4, [r2] + movd xmm5, [r2+r3] + lea r2 , [r2+2*r3] + movd xmm6, [r2] + movd xmm7, [r2+r3] + punpckldq xmm4, xmm6 + punpckldq xmm5, xmm7 + + pxor xmm6, xmm6 + punpcklbw xmm0, xmm6 + punpcklbw xmm1, xmm6 + punpcklbw xmm4, xmm6 + punpcklbw xmm5, xmm6 + + psubw xmm0, xmm4 + psubw xmm1, xmm5 + + movdqa xmm2, xmm0 + paddw xmm0, xmm1 + psubw xmm2, xmm1 + SSE2_XSawp qdq, xmm0, xmm2, xmm3 + + movdqa xmm4, xmm0 + paddw xmm0, xmm3 + psubw xmm4, xmm3 + + movdqa xmm2, xmm0 + punpcklwd xmm0, xmm4 + punpckhwd xmm4, xmm2 + + SSE2_XSawp dq, xmm0, xmm4, xmm3 + SSE2_XSawp qdq, xmm0, xmm3, xmm5 + + movdqa xmm7, xmm0 + paddw xmm0, xmm5 + psubw xmm7, xmm5 + + SSE2_XSawp qdq, xmm0, xmm7, xmm1 + + movdqa xmm2, xmm0 + paddw xmm0, xmm1 + psubw xmm2, xmm1 + + WELS_AbsW xmm0, xmm3 + paddusw xmm6, xmm0 + WELS_AbsW xmm2, xmm4 + paddusw xmm6, xmm2 + SSE2_SumWHorizon1 xmm6, xmm4 + movd retrd, xmm6 + and retrd, 0xffff + shr retrd, 1 + POP_XMM + LOAD_4_PARA_POP + ret + + ;*********************************************************************** + ; + ;int32_t WelsSampleSatd8x8_sse2( uint8_t *, int32_t, uint8_t *, int32_t, ); + ; + ;*********************************************************************** +WELS_EXTERN WelsSampleSatd8x8_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm6, xmm6 + pxor xmm7, xmm7 + SSE2_GetSatd8x8 + psrlw xmm6, 1 + SSE2_SumWHorizon xmm6,xmm4,xmm7 + movd retrd, xmm6 + POP_XMM + LOAD_4_PARA_POP + ret + + ;*********************************************************************** + ; + ;int32_t WelsSampleSatd8x16_sse2( uint8_t *, int32_t, uint8_t *, int32_t, ); + ; + ;*********************************************************************** +WELS_EXTERN WelsSampleSatd8x16_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm6, xmm6 + pxor xmm7, xmm7 + + SSE2_GetSatd8x8 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_GetSatd8x8 + + psrlw xmm6, 1 + SSE2_SumWHorizon xmm6,xmm4,xmm7 + movd retrd, xmm6 + POP_XMM + LOAD_4_PARA_POP + ret + +;*********************************************************************** +; +;int32_t WelsSampleSatd16x8_sse2( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatd16x8_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + push r0 + push r2 + pxor xmm6, xmm6 + pxor xmm7, xmm7 + + SSE2_GetSatd8x8 + + pop r2 + pop r0 + add r0, 8 + add r2, 8 + SSE2_GetSatd8x8 + + psrlw xmm6, 1 + SSE2_SumWHorizon xmm6,xmm4,xmm7 + movd retrd, xmm6 + POP_XMM + LOAD_4_PARA_POP + ret + +;*********************************************************************** +; +;int32_t WelsSampleSatd16x16_sse2( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatd16x16_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + push r0 + push r2 + pxor xmm6, xmm6 + pxor xmm7, xmm7 + + SSE2_GetSatd8x8 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_GetSatd8x8 + + pop r2 + pop r0 + add r0, 8 + add r2, 8 + + SSE2_GetSatd8x8 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_GetSatd8x8 + + ; each column sum of SATD is necessarily even, so we don't lose any precision by shifting first. + psrlw xmm6, 1 + SSE2_SumWHorizon xmm6,xmm4,xmm7 + movd retrd, xmm6 + POP_XMM + LOAD_4_PARA_POP + ret + +;*********************************************************************** +; +;Pixel_satd_wxh_sse2 END +; +;*********************************************************************** + +;*********************************************************************** +; +;Pixel_satd_intra_sse2 BEGIN +; +;*********************************************************************** + + +%macro SSE_DB_1_2REG 2 + pxor %1, %1 + pcmpeqw %2, %2 + psubb %1, %2 +%endmacro + +;*********************************************************************** +; +;int32_t WelsSampleSatdThree4x4_sse2( uint8_t *pDec, int32_t iLineSizeDec, uint8_t *pEnc, int32_t iLinesizeEnc, +; uint8_t* pRed, int32_t* pBestMode, int32_t, int32_t, int32_t); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatdThree4x4_sse2 + +%ifdef X86_32 + push r3 + push r4 + push r5 + push r6 + %assign push_num 4 +%else + %assign push_num 0 +%endif + PUSH_XMM 8 + + mov r2, arg3 + mov r3, arg4 + SIGN_EXTENSION r3, r3d + + ; load source 4x4 samples and Hadamard transform + movd xmm0, [r2] + movd xmm1, [r2+r3] + lea r2 , [r2+2*r3] + movd xmm2, [r2] + movd xmm3, [r2+r3] + punpckldq xmm0, xmm2 + punpckldq xmm1, xmm3 + + pxor xmm6, xmm6 + punpcklbw xmm0, xmm6 + punpcklbw xmm1, xmm6 + + movdqa xmm2, xmm0 + paddw xmm0, xmm1 + psubw xmm2, xmm1 + SSE2_XSawp qdq, xmm0, xmm2, xmm3 + + movdqa xmm4, xmm0 + paddw xmm0, xmm3 + psubw xmm4, xmm3 + + movdqa xmm2, xmm0 + punpcklwd xmm0, xmm4 + punpckhwd xmm4, xmm2 + + SSE2_XSawp dq, xmm0, xmm4, xmm3 + SSE2_XSawp qdq, xmm0, xmm3, xmm5 + + movdqa xmm7, xmm0 + paddw xmm0, xmm5 + psubw xmm7, xmm5 + + SSE2_XSawp qdq, xmm0, xmm7, xmm1 + + ; Hadamard transform results are saved in xmm0 and xmm2 + movdqa xmm2, xmm0 + paddw xmm0, xmm1 + psubw xmm2, xmm1 + + ;load top boundary samples: [a b c d] + mov r0, arg1 + mov r1, arg2 + SIGN_EXTENSION r1, r1d + sub r0, r1 +%ifdef UNIX64 + push r4 + push r5 +%endif + + movzx r2d, byte [r0] + movzx r3d, byte [r0+1] + movzx r4d, byte [r0+2] + movzx r5d, byte [r0+3] + + ; get the transform results of top boundary samples: [a b c d] + add r3d, r2d ; r3d = a + b + add r5d, r4d ; r5d = c + d + add r2d, r2d ; r2d = a + a + add r4d, r4d ; r4d = c + c + sub r2d, r3d ; r2d = a + a - a - b = a - b + sub r4d, r5d ; r4d = c + c - c - d = c - d + add r5d, r3d ; r5d = (a + b) + (c + d) + add r3d, r3d + sub r3d, r5d ; r3d = (a + b) - (c + d) + add r4d, r2d ; r4d = (a - b) + (c - d) + add r2d, r2d + sub r2d, r4d ; r2d = (a - b) - (c - d) ; [r5d r3d r2d r4d] + + movdqa xmm6, xmm0 + movdqa xmm7, xmm2 + movd xmm5, r5d ; store the edi for DC mode + pxor xmm3, xmm3 + pxor xmm4, xmm4 + pinsrw xmm3, r5d, 0 + pinsrw xmm3, r4d, 4 + psllw xmm3, 2 + pinsrw xmm4, r3d, 0 + pinsrw xmm4, r2d, 4 + psllw xmm4, 2 + + ; get the satd of H + psubw xmm0, xmm3 + psubw xmm2, xmm4 + + WELS_AbsW xmm0, xmm1 + WELS_AbsW xmm2, xmm1 + paddusw xmm0, xmm2 + SSE2_SumWHorizon1 xmm0, xmm1 ; satd of V is stored in xmm0 + + ;load left boundary samples: [a b c d]' + add r0, r1 + + movzx r2d, byte [r0-1] + movzx r3d, byte [r0+r1-1] + lea r0 , [r0+2*r1] + movzx r4d, byte [r0-1] + movzx r5d, byte [r0+r1-1] + + ; get the transform results of left boundary samples: [a b c d]' + add r3d, r2d ; r3d = a + b + add r5d, r4d ; r5d = c + d + add r2d, r2d ; r2d = a + a + add r4d, r4d ; r4d = c + c + sub r2d, r3d ; r2d = a + a - a - b = a - b + sub r4d, r5d ; r4d = c + c - c - d = c - d + add r5d, r3d ; r5d = (a + b) + (c + d) + add r3d, r3d + sub r3d, r5d ; r3d = (a + b) - (c + d) + add r4d, r2d ; r4d = (a - b) + (c - d) + add r2d, r2d + sub r2d, r4d ; r2d = (a - b) - (c - d) ; [r5d r3d r2d r4d] + + ; store the transform results in xmm3 + movd xmm3, r5d + pinsrw xmm3, r3d, 1 + pinsrw xmm3, r2d, 2 + pinsrw xmm3, r4d, 3 + psllw xmm3, 2 + + ; get the satd of V + movdqa xmm2, xmm6 + movdqa xmm4, xmm7 + psubw xmm2, xmm3 + WELS_AbsW xmm2, xmm1 + WELS_AbsW xmm4, xmm1 + paddusw xmm2, xmm4 + SSE2_SumWHorizon1 xmm2, xmm1 ; satd of H is stored in xmm2 + + ; DC result is stored in xmm1 + add r5d, 4 + movd xmm1, r5d + paddw xmm1, xmm5 + psrlw xmm1, 3 + movdqa xmm5, xmm1 + psllw xmm1, 4 + + ; get the satd of DC + psubw xmm6, xmm1 + WELS_AbsW xmm6, xmm1 + WELS_AbsW xmm7, xmm1 + paddusw xmm6, xmm7 + SSE2_SumWHorizon1 xmm6, xmm1 ; satd of DC is stored in xmm6 +%ifdef UNIX64 + pop r5 + pop r4 +%endif + ; comparing order: DC H V + + mov r4, arg5 + movd r2d, xmm6 + movd r3d, xmm2 + movd r6d, xmm0 + + and r2d, 0xffff + shr r2d, 1 + and r3d, 0xffff + shr r3d, 1 + and r6d, 0xffff + shr r6d, 1 + add r2d, dword arg7 + add r3d, dword arg8 + add r6d, dword arg9 + cmp r2w, r3w + jg near not_dc + cmp r2w, r6w + jg near not_dc_h + + ; for DC mode + movd r3d, xmm5 + imul r3d, 0x01010101 + movd xmm5, r3d + pshufd xmm5, xmm5, 0 + movdqa [r4], xmm5 + mov r5, arg6 + mov dword [r5], 0x02 + mov retrd, r2d + POP_XMM +%ifdef X86_32 + pop r6 + pop r5 + pop r4 + pop r3 +%endif + ret + +not_dc: + cmp r3w, r6w + jg near not_dc_h + + ; for H mode + SSE_DB_1_2REG xmm6, xmm7 + sub r0, r1 + sub r0, r1 + movzx r6d, byte [r0-1] + movd xmm0, r6d + pmuludq xmm0, xmm6 + + movzx r6d, byte [r0+r1-1] + movd xmm1, r6d + pmuludq xmm1, xmm6 + punpckldq xmm0, xmm1 + + lea r0, [r0+r1*2] + movzx r6d, byte [r0-1] + movd xmm2, r6d + pmuludq xmm2, xmm6 + + movzx r6d, byte [r0+r1-1] + movd xmm3, r6d + pmuludq xmm3, xmm6 + punpckldq xmm2, xmm3 + punpcklqdq xmm0, xmm2 + + movdqa [r4],xmm0 + + mov retrd, r3d + mov r5, arg6 + mov dword [r5], 0x01 + POP_XMM +%ifdef X86_32 + pop r6 + pop r5 + pop r4 + pop r3 +%endif + ret +not_dc_h: + sub r0, r1 + sub r0, r1 + sub r0, r1 + movd xmm0, [r0] + pshufd xmm0, xmm0, 0 + movdqa [r4],xmm0 + mov retrd, r6d + mov r5, arg6 + mov dword [r5], 0x00 + POP_XMM +%ifdef X86_32 + pop r6 + pop r5 + pop r4 + pop r3 +%endif + ret + + +%macro SSE41_I16x16Get8WSumSub 3 ;xmm5 HSumSubDB1, xmm6 HSumSubDW1, xmm7 PDW1 : in %1, pOut %1, %3 + pmaddubsw %1, xmm5 + movdqa %2, %1 + pmaddwd %1, xmm7 + pmaddwd %2, xmm6 + movdqa %3, %1 + punpckldq %1, %2 + punpckhdq %2, %3 + movdqa %3, %1 + punpcklqdq %1, %2 + punpckhqdq %3, %2 + paddd xmm4, %1 ;for dc + paddd xmm4, %3 ;for dc + packssdw %1, %3 + psllw %1, 2 +%endmacro +%macro SSE41_ChromaGet8WSumSub 4 ;xmm5 HSumSubDB1, xmm6 HSumSubDW1, xmm7 PDW1 : in %1, pOut %1, %3 : %4 tempsse2 + pmaddubsw %1, xmm5 + movdqa %2, %1 + pmaddwd %1, xmm7 + pmaddwd %2, xmm6 + movdqa %3, %1 + punpckldq %1, %2 + punpckhdq %2, %3 + movdqa %3, %1 + punpcklqdq %1, %2 + punpckhqdq %3, %2 +; paddd xmm4, %1 ;for dc +; paddd xmm4, %3 ;for dc + movdqa %4, %1 + punpcklqdq %4, %3 + packssdw %1, %3 + psllw %1, 2 +%endmacro + +%macro SSE41_GetX38x4SatdDec 0 + pxor xmm7, xmm7 + movq xmm0, [r2] + movq xmm1, [r2+r3] + lea r2, [r2+2*r3] + movq xmm2, [r2] + movq xmm3, [r2+r3] + lea r2, [r2+2*r3] + punpcklbw xmm0, xmm7 + punpcklbw xmm1, xmm7 + punpcklbw xmm2, xmm7 + punpcklbw xmm3, xmm7 + SSE2_HDMTwo4x4 xmm0,xmm1,xmm2,xmm3,xmm7 + SSE2_TransTwo4x4W xmm3,xmm1,xmm0,xmm2,xmm7 + SSE2_HDMTwo4x4 xmm3,xmm1,xmm2,xmm7,xmm0 ;pOut xmm7,xmm1,xmm3,xmm2 + ;doesn't need another transpose +%endmacro + +%macro SSE41_GetX38x4SatdV 2 + pxor xmm0, xmm0 + pinsrw xmm0, word[r6+%2], 0 + pinsrw xmm0, word[r6+%2+8], 4 + psubsw xmm0, xmm7 + pabsw xmm0, xmm0 + paddw xmm4, xmm0 + pxor xmm0, xmm0 + pinsrw xmm0, word[r6+%2+2], 0 + pinsrw xmm0, word[r6+%2+10], 4 + psubsw xmm0, xmm1 + pabsw xmm0, xmm0 + paddw xmm4, xmm0 + pxor xmm0, xmm0 + pinsrw xmm0, word[r6+%2+4], 0 + pinsrw xmm0, word[r6+%2+12], 4 + psubsw xmm0, xmm3 + pabsw xmm0, xmm0 + paddw xmm4, xmm0 + pxor xmm0, xmm0 + pinsrw xmm0, word[r6+%2+6], 0 + pinsrw xmm0, word[r6+%2+14], 4 + psubsw xmm0, xmm2 + pabsw xmm0, xmm0 + paddw xmm4, xmm0 +%endmacro +%macro SSE41_GetX38x4SatdH 3 + movq xmm0, [r6+%3+8*%1] + punpcklqdq xmm0, xmm0 + psubsw xmm0, xmm7 + pabsw xmm0, xmm0 + paddw xmm5, xmm0 + pabsw xmm1, xmm1 + pabsw xmm2, xmm2 + pabsw xmm3, xmm3 + paddw xmm2, xmm1;for DC + paddw xmm2, xmm3;for DC + paddw xmm5, xmm2 +%endmacro +%macro SSE41_I16X16GetX38x4SatdDC 0 + pxor xmm0, xmm0 + movq2dq xmm0, mm4 + punpcklqdq xmm0, xmm0 + psubsw xmm0, xmm7 + pabsw xmm0, xmm0 + paddw xmm6, xmm0 + paddw xmm6, xmm2 +%endmacro +%macro SSE41_ChromaGetX38x4SatdDC 1 + shl %1, 4 + movdqa xmm0, [r6+32+%1] + psubsw xmm0, xmm7 + pabsw xmm0, xmm0 + paddw xmm6, xmm0 + paddw xmm6, xmm2 +%endmacro +%macro SSE41_I16x16GetX38x4Satd 2 + SSE41_GetX38x4SatdDec + SSE41_GetX38x4SatdV %1, %2 + SSE41_GetX38x4SatdH %1, %2, 32 + SSE41_I16X16GetX38x4SatdDC +%endmacro +%macro SSE41_ChromaGetX38x4Satd 2 + SSE41_GetX38x4SatdDec + SSE41_GetX38x4SatdV %1, %2 + SSE41_GetX38x4SatdH %1, %2, 16 + SSE41_ChromaGetX38x4SatdDC %1 +%endmacro +%macro SSE41_HSum8W 3 + pmaddwd %1, %2 + movhlps %3, %1 + paddd %1, %3 + pshuflw %3, %1,0Eh + paddd %1, %3 +%endmacro + +WELS_EXTERN WelsIntra16x16Combined3Satd_sse41 + %assign push_num 0 + LOAD_7_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r2 +%endif + + INIT_X86_32_PIC r2 + pxor xmm4, xmm4 + movdqa xmm5, [pic(HSumSubDB1)] + movdqa xmm6, [pic(HSumSubDW1)] + movdqa xmm7, [pic(PDW1)] + DEINIT_X86_32_PIC + sub r0, r1 + movdqu xmm0, [r0] + movhlps xmm1, xmm0 + punpcklqdq xmm0, xmm0 + punpcklqdq xmm1, xmm1 + SSE41_I16x16Get8WSumSub xmm0, xmm2, xmm3 + SSE41_I16x16Get8WSumSub xmm1, xmm2, xmm3 + movdqa [r6], xmm0 ;V + movdqa [r6+16], xmm1 + add r0, r1 + pinsrb xmm0, byte[r0-1], 0 + pinsrb xmm0, byte[r0+r1-1], 1 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 2 + pinsrb xmm0, byte[r0+r1-1], 3 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 4 + pinsrb xmm0, byte[r0+r1-1], 5 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 6 + pinsrb xmm0, byte[r0+r1-1], 7 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 8 + pinsrb xmm0, byte[r0+r1-1], 9 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 10 + pinsrb xmm0, byte[r0+r1-1], 11 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 12 + pinsrb xmm0, byte[r0+r1-1], 13 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 14 + pinsrb xmm0, byte[r0+r1-1], 15 + movhlps xmm1, xmm0 + punpcklqdq xmm0, xmm0 + punpcklqdq xmm1, xmm1 + SSE41_I16x16Get8WSumSub xmm0, xmm2, xmm3 + SSE41_I16x16Get8WSumSub xmm1, xmm2, xmm3 + movdqa [r6+32], xmm0 ;H + movdqa [r6+48], xmm1 + movd r0d, xmm4 ;dc + add r0d, 16 ;(sum+16) + shr r0d, 5 ;((sum+16)>>5) + shl r0d, 4 ; + movd mm4, r0d ; mm4 copy DC + pxor xmm4, xmm4 ;V + pxor xmm5, xmm5 ;H + pxor xmm6, xmm6 ;DC +%ifdef UNIX64 + push r4 +%endif + mov r0, 0 + mov r4, 0 + +.loop16x16_get_satd: +.loopStart1: + SSE41_I16x16GetX38x4Satd r0, r4 + inc r0 + cmp r0, 4 + jl .loopStart1 + cmp r4, 16 + je .loop16x16_get_satd_end +%ifdef X86_32 + mov r2, arg3 +%else + mov r2, r12 +%endif + add r2, 8 + mov r0, 0 + add r4, 16 + jmp .loop16x16_get_satd + .loop16x16_get_satd_end: + MMX_DW_1_2REG xmm0, xmm1 + psrlw xmm4, 1 ;/2 + psrlw xmm5, 1 ;/2 + psrlw xmm6, 1 ;/2 + SSE41_HSum8W xmm4, xmm0, xmm1 + SSE41_HSum8W xmm5, xmm0, xmm1 + SSE41_HSum8W xmm6, xmm0, xmm1 + +%ifdef UNIX64 + pop r4 +%endif + ; comparing order: DC H V + movd r3d, xmm6 ;DC + movd r1d, xmm5 ;H + movd r0d, xmm4 ;V +%ifndef X86_32 + pop r12 +%endif + shl r5d, 1 + add r1d, r5d + add r3d, r5d + mov r4, arg5 + cmp r3d, r1d + jge near not_dc_16x16 + cmp r3d, r0d + jge near not_dc_h_16x16 + + ; for DC mode + mov dword[r4], 2;I16_PRED_DC + mov retrd, r3d + jmp near return_satd_intra_16x16_x3 +not_dc_16x16: + ; for H mode + cmp r1d, r0d + jge near not_dc_h_16x16 + mov dword[r4], 1;I16_PRED_H + mov retrd, r1d + jmp near return_satd_intra_16x16_x3 +not_dc_h_16x16: + ; for V mode + mov dword[r4], 0;I16_PRED_V + mov retrd, r0d +return_satd_intra_16x16_x3: + WELSEMMS + POP_XMM + LOAD_7_PARA_POP +ret + +%macro SSE41_ChromaGetX38x8Satd 0 + movdqa xmm5, [pic(HSumSubDB1)] + movdqa xmm6, [pic(HSumSubDW1)] + movdqa xmm7, [pic(PDW1)] + sub r0, r1 + movq xmm0, [r0] + punpcklqdq xmm0, xmm0 + SSE41_ChromaGet8WSumSub xmm0, xmm2, xmm3, xmm4 + movdqa [r6], xmm0 ;V + add r0, r1 + pinsrb xmm0, byte[r0-1], 0 + pinsrb xmm0, byte[r0+r1-1], 1 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 2 + pinsrb xmm0, byte[r0+r1-1], 3 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 4 + pinsrb xmm0, byte[r0+r1-1], 5 + lea r0, [r0+2*r1] + pinsrb xmm0, byte[r0-1], 6 + pinsrb xmm0, byte[r0+r1-1], 7 + punpcklqdq xmm0, xmm0 + SSE41_ChromaGet8WSumSub xmm0, xmm2, xmm3, xmm1 + movdqa [r6+16], xmm0 ;H +;(sum+2)>>2 + movdqa xmm6, [pic(PDQ2)] + movdqa xmm5, xmm4 + punpckhqdq xmm5, xmm1 + paddd xmm5, xmm6 + psrld xmm5, 2 +;(sum1+sum2+4)>>3 + paddd xmm6, xmm6 + paddd xmm4, xmm1 + paddd xmm4, xmm6 + psrld xmm4, 3 +;satd *16 + pslld xmm5, 4 + pslld xmm4, 4 +;temp satd + movdqa xmm6, xmm4 + punpcklqdq xmm4, xmm5 + psllq xmm4, 32 + psrlq xmm4, 32 + movdqa [r6+32], xmm4 + punpckhqdq xmm5, xmm6 + psllq xmm5, 32 + psrlq xmm5, 32 + movdqa [r6+48], xmm5 + + pxor xmm4, xmm4 ;V + pxor xmm5, xmm5 ;H + pxor xmm6, xmm6 ;DC + mov r0, 0 + SSE41_ChromaGetX38x4Satd r0, 0 + inc r0 + SSE41_ChromaGetX38x4Satd r0, 0 +%endmacro + +%macro SSEReg2MMX 3 + movdq2q %2, %1 + movhlps %1, %1 + movdq2q %3, %1 +%endmacro +%macro MMXReg2SSE 4 + movq2dq %1, %3 + movq2dq %2, %4 + punpcklqdq %1, %2 +%endmacro +;for reduce the code size of WelsIntraChroma8x8Combined3Satd_sse41 + +WELS_EXTERN WelsIntraChroma8x8Combined3Satd_sse41 + %assign push_num 0 + LOAD_7_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d +loop_chroma_satdx3: + INIT_X86_32_PIC r4 + SSE41_ChromaGetX38x8Satd + SSEReg2MMX xmm4, mm0,mm1 + SSEReg2MMX xmm5, mm2,mm3 + SSEReg2MMX xmm6, mm5,mm6 + mov r0, arg8 + mov r2, arg9 + + SSE41_ChromaGetX38x8Satd + DEINIT_X86_32_PIC + + MMXReg2SSE xmm0, xmm3, mm0, mm1 + MMXReg2SSE xmm1, xmm3, mm2, mm3 + MMXReg2SSE xmm2, xmm3, mm5, mm6 + + paddw xmm4, xmm0 + paddw xmm5, xmm1 + paddw xmm6, xmm2 + + MMX_DW_1_2REG xmm0, xmm1 + psrlw xmm4, 1 ;/2 + psrlw xmm5, 1 ;/2 + psrlw xmm6, 1 ;/2 + SSE41_HSum8W xmm4, xmm0, xmm1 + SSE41_HSum8W xmm5, xmm0, xmm1 + SSE41_HSum8W xmm6, xmm0, xmm1 + ; comparing order: DC H V + movd r3d, xmm6 ;DC + movd r1d, xmm5 ;H + movd r0d, xmm4 ;V + + + shl r5d, 1 + add r1d, r5d + add r0d, r5d + cmp r3d, r1d + jge near not_dc_8x8 + cmp r3d, r0d + jge near not_dc_h_8x8 + + ; for DC mode + mov dword[r4], 0;I8_PRED_DC + mov retrd, r3d + jmp near return_satd_intra_8x8_x3 +not_dc_8x8: + ; for H mode + cmp r1d, r0d + jge near not_dc_h_8x8 + mov dword[r4], 1;I8_PRED_H + mov retrd, r1d + jmp near return_satd_intra_8x8_x3 +not_dc_h_8x8: + ; for V mode + mov dword[r4], 2;I8_PRED_V + mov retrd, r0d +return_satd_intra_8x8_x3: + WELSEMMS + POP_XMM + LOAD_7_PARA_POP +ret + + +;*********************************************************************** +; +;Pixel_satd_intra_sse2 END +; +;*********************************************************************** +%macro SSSE3_Get16BSadHVDC 2 + movd xmm6,%1 + pshufb xmm6,xmm1 + movdqa %1, xmm6 + movdqa xmm0,%2 + psadbw xmm0,xmm7 + paddw xmm4,xmm0 + movdqa xmm0,%2 + psadbw xmm0,xmm5 + paddw xmm2,xmm0 + psadbw xmm6,%2 + paddw xmm3,xmm6 +%endmacro +%macro WelsAddDCValue 4 + movzx %2, byte %1 + mov %3, %2 + add %4, %2 +%endmacro + +;*********************************************************************** +; +;Pixel_sad_intra_ssse3 BEGIN +; +;*********************************************************************** +WELS_EXTERN WelsIntra16x16Combined3Sad_ssse3 + %assign push_num 0 + LOAD_7_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + + push r5 + push r4 + push r3 + + sub r0, r1 + movdqa xmm5,[r0] + pxor xmm0,xmm0 + psadbw xmm0,xmm5 + movhlps xmm1,xmm0 + paddw xmm0,xmm1 + movd r5d, xmm0 + + add r0,r1 + lea r3,[r1+2*r1] ;ebx r3 + WelsAddDCValue [r0-1 ], r4d, [r6 ], r5d ; esi r4d, eax r5d + WelsAddDCValue [r0-1+r1 ], r4d, [r6+16], r5d + WelsAddDCValue [r0-1+r1*2], r4d, [r6+32], r5d + WelsAddDCValue [r0-1+r3 ], r4d, [r6+48], r5d + lea r0, [r0+4*r1] + add r6, 64 + WelsAddDCValue [r0-1 ], r4d, [r6 ], r5d + WelsAddDCValue [r0-1+r1 ], r4d, [r6+16], r5d + WelsAddDCValue [r0-1+r1*2], r4d, [r6+32], r5d + WelsAddDCValue [r0-1+r3 ], r4d, [r6+48], r5d + lea r0, [r0+4*r1] + add r6, 64 + WelsAddDCValue [r0-1 ], r4d, [r6 ], r5d + WelsAddDCValue [r0-1+r1 ], r4d, [r6+16], r5d + WelsAddDCValue [r0-1+r1*2], r4d, [r6+32], r5d + WelsAddDCValue [r0-1+r3 ], r4d, [r6+48], r5d + lea r0, [r0+4*r1] + add r6, 64 + WelsAddDCValue [r0-1 ], r4d, [r6 ], r5d + WelsAddDCValue [r0-1+r1 ], r4d, [r6+16], r5d + WelsAddDCValue [r0-1+r1*2], r4d, [r6+32], r5d + WelsAddDCValue [r0-1+r3 ], r4d, [r6+48], r5d + sub r6, 192 + add r5d,10h + shr r5d,5 + movd xmm7,r5d + pxor xmm1,xmm1 + pshufb xmm7,xmm1 + pxor xmm4,xmm4 + pxor xmm3,xmm3 + pxor xmm2,xmm2 + ;sad begin + pop r3 + lea r4, [r3+2*r3] ;esi r4 + SSSE3_Get16BSadHVDC [r6], [r2] + SSSE3_Get16BSadHVDC [r6+16], [r2+r3] + SSSE3_Get16BSadHVDC [r6+32], [r2+2*r3] + SSSE3_Get16BSadHVDC [r6+48], [r2+r4] + add r6, 64 + lea r2, [r2+4*r3] + SSSE3_Get16BSadHVDC [r6], [r2] + SSSE3_Get16BSadHVDC [r6+16], [r2+r3] + SSSE3_Get16BSadHVDC [r6+32], [r2+2*r3] + SSSE3_Get16BSadHVDC [r6+48], [r2+r4] + add r6, 64 + lea r2, [r2+4*r3] + SSSE3_Get16BSadHVDC [r6], [r2] + SSSE3_Get16BSadHVDC [r6+16], [r2+r3] + SSSE3_Get16BSadHVDC [r6+32], [r2+2*r3] + SSSE3_Get16BSadHVDC [r6+48], [r2+r4] + add r6, 64 + lea r2, [r2+4*r3] + SSSE3_Get16BSadHVDC [r6], [r2] + SSSE3_Get16BSadHVDC [r6+16], [r2+r3] + SSSE3_Get16BSadHVDC [r6+32], [r2+2*r3] + SSSE3_Get16BSadHVDC [r6+48], [r2+r4] + + pop r4 + pop r5 + pslldq xmm3,4 + por xmm3,xmm2 + movhlps xmm1,xmm3 + paddw xmm3,xmm1 + movhlps xmm0,xmm4 + paddw xmm4,xmm0 + ; comparing order: DC H V + movd r1d, xmm4 ;DC ;ebx r1d + movd r0d, xmm3 ;V ;ecx r0d + psrldq xmm3, 4 + movd r2d, xmm3 ;H ;esi r2d + + ;mov eax, [esp+36] ;lamda ;eax r5 + shl r5d, 1 + add r2d, r5d + add r1d, r5d + ;mov edx, [esp+32] ;edx r4 + cmp r1d, r2d + jge near not_dc_16x16_sad + cmp r1d, r0d + jge near not_dc_h_16x16_sad + ; for DC mode + mov dword[r4], 2;I16_PRED_DC + mov retrd, r1d + sub r6, 192 +%assign x 0 +%rep 16 + movdqa [r6+16*x], xmm7 +%assign x x+1 +%endrep + jmp near return_sad_intra_16x16_x3 +not_dc_16x16_sad: + ; for H mode + cmp r2d, r0d + jge near not_dc_h_16x16_sad + mov dword[r4], 1;I16_PRED_H + mov retrd, r2d + jmp near return_sad_intra_16x16_x3 +not_dc_h_16x16_sad: + ; for V mode + mov dword[r4], 0;I16_PRED_V + mov retrd, r0d + sub r6, 192 +%assign x 0 +%rep 16 + movdqa [r6+16*x], xmm5 +%assign x x+1 +%endrep +return_sad_intra_16x16_x3: + POP_XMM + LOAD_7_PARA_POP + ret + +;*********************************************************************** +; +;Pixel_sad_intra_ssse3 END +; +;*********************************************************************** +;*********************************************************************** +; +;Pixel_satd_wxh_sse41 BEGIN +; +;*********************************************************************** + +;SSE4.1 +%macro SSE41_GetSatd8x4 0 + movq xmm0, [r0] + punpcklqdq xmm0, xmm0 + pmaddubsw xmm0, xmm7 + movq xmm1, [r0+r1] + punpcklqdq xmm1, xmm1 + pmaddubsw xmm1, xmm7 + movq xmm2, [r2] + punpcklqdq xmm2, xmm2 + pmaddubsw xmm2, xmm7 + movq xmm3, [r2+r3] + punpcklqdq xmm3, xmm3 + pmaddubsw xmm3, xmm7 + psubsw xmm0, xmm2 + psubsw xmm1, xmm3 + movq xmm2, [r0+2*r1] + punpcklqdq xmm2, xmm2 + pmaddubsw xmm2, xmm7 + movq xmm3, [r0+r4] + punpcklqdq xmm3, xmm3 + pmaddubsw xmm3, xmm7 + movq xmm4, [r2+2*r3] + punpcklqdq xmm4, xmm4 + pmaddubsw xmm4, xmm7 + movq xmm5, [r2+r5] + punpcklqdq xmm5, xmm5 + pmaddubsw xmm5, xmm7 + psubsw xmm2, xmm4 + psubsw xmm3, xmm5 + SSE2_HDMTwo4x4 xmm0, xmm1, xmm2, xmm3, xmm4 + pabsw xmm0, xmm0 + pabsw xmm2, xmm2 + pabsw xmm1, xmm1 + pabsw xmm3, xmm3 + movdqa xmm4, xmm3 + pblendw xmm3, xmm1, 0xAA + pslld xmm1, 16 + psrld xmm4, 16 + por xmm1, xmm4 + pmaxuw xmm1, xmm3 + paddw xmm6, xmm1 + movdqa xmm4, xmm0 + pblendw xmm0, xmm2, 0xAA + pslld xmm2, 16 + psrld xmm4, 16 + por xmm2, xmm4 + pmaxuw xmm0, xmm2 + paddw xmm6, xmm0 +%endmacro + +%macro SSSE3_SumWHorizon 4 ;eax, srcSSE, tempSSE, tempSSE + MMX_DW_1_2REG %3, %4 + pmaddwd %2, %3 + movhlps %4, %2 + paddd %2, %4 + pshuflw %4, %2,0Eh + paddd %2, %4 + movd %1, %2 +%endmacro +;*********************************************************************** +; +;int32_t WelsSampleSatd4x4_sse41( uint8_t *, int32_t, uint8_t *, int32_t ); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatd4x4_sse41 + %assign push_num 0 + INIT_X86_32_PIC r5 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + movdqa xmm4,[pic(HSwapSumSubDB1)] + movd xmm2,[r2] + movd xmm5,[r2+r3] + shufps xmm2,xmm5,0 + movd xmm3,[r2+r3*2] + lea r2, [r3*2+r2] + movd xmm5,[r2+r3] + shufps xmm3,xmm5,0 + movd xmm0,[r0] + movd xmm5,[r0+r1] + shufps xmm0,xmm5,0 + movd xmm1,[r0+r1*2] + lea r0, [r1*2+r0] + movd xmm5,[r0+r1] + shufps xmm1,xmm5,0 + pmaddubsw xmm0,xmm4 + pmaddubsw xmm1,xmm4 + pmaddubsw xmm2,xmm4 + pmaddubsw xmm3,xmm4 + psubw xmm0,xmm2 + psubw xmm1,xmm3 + movdqa xmm2,xmm0 + paddw xmm0,xmm1 + psubw xmm1,xmm2 + movdqa xmm2,xmm0 + punpcklqdq xmm0,xmm1 + punpckhqdq xmm2,xmm1 + movdqa xmm1,xmm0 + paddw xmm0,xmm2 + psubw xmm2,xmm1 + movdqa xmm1,xmm0 + pblendw xmm0,xmm2,0AAh + pslld xmm2,16 + psrld xmm1,16 + por xmm2,xmm1 + pabsw xmm0,xmm0 + pabsw xmm2,xmm2 + pmaxsw xmm0,xmm2 + SSSE3_SumWHorizon retrd, xmm0, xmm5, xmm7 + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; +;int32_t WelsSampleSatd8x8_sse41( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatd8x8_sse41 +%ifdef X86_32 + push r4 + push r5 +%endif + %assign push_num 2 + INIT_X86_32_PIC r6 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + movdqa xmm7, [pic(HSumSubDB1)] + lea r4, [r1+r1*2] + lea r5, [r3+r3*2] + pxor xmm6, xmm6 + SSE41_GetSatd8x4 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + SSE41_GetSatd8x4 + SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7 + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r5 + pop r4 +%endif + ret + +;*********************************************************************** +; +;int32_t WelsSampleSatd8x16_sse41( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatd8x16_sse41 +%ifdef X86_32 + push r4 + push r5 + push r6 +%endif + %assign push_num 3 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + INIT_X86_32_PIC_NOPRESERVE r4 + movdqa xmm7, [pic(HSumSubDB1)] + DEINIT_X86_32_PIC + lea r4, [r1+r1*2] + lea r5, [r3+r3*2] + pxor xmm6, xmm6 + mov r6, 0 +loop_get_satd_8x16: + SSE41_GetSatd8x4 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + inc r6 + cmp r6, 4 + jl loop_get_satd_8x16 + SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7 + POP_XMM + LOAD_4_PARA_POP +%ifdef X86_32 + pop r6 + pop r5 + pop r4 +%endif + ret + +;*********************************************************************** +; +;int32_t WelsSampleSatd16x8_sse41( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** +WELS_EXTERN WelsSampleSatd16x8_sse41 +%ifdef X86_32 + push r4 + push r5 +%endif + %assign push_num 2 + INIT_X86_32_PIC r6 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + push r0 + push r2 + + movdqa xmm7, [pic(HSumSubDB1)] + lea r4, [r1+r1*2] + lea r5, [r3+r3*2] + pxor xmm6, xmm6 + SSE41_GetSatd8x4 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + SSE41_GetSatd8x4 + + pop r2 + pop r0 + add r0, 8 + add r2, 8 + SSE41_GetSatd8x4 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + SSE41_GetSatd8x4 + SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7 + POP_XMM + LOAD_4_PARA_POP + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r5 + pop r4 +%endif + ret + +;*********************************************************************** +; +;int32_t WelsSampleSatd16x16_sse41( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** + +WELS_EXTERN WelsSampleSatd16x16_sse41 +%ifdef X86_32 + push r4 + push r5 + push r6 +%endif + %assign push_num 3 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + push r0 + push r2 + + INIT_X86_32_PIC_NOPRESERVE r4 + movdqa xmm7, [pic(HSumSubDB1)] + DEINIT_X86_32_PIC + lea r4, [r1+r1*2] + lea r5, [r3+r3*2] + pxor xmm6, xmm6 + mov r6, 0 +loop_get_satd_16x16_left: + SSE41_GetSatd8x4 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + inc r6 + cmp r6, 4 + jl loop_get_satd_16x16_left + + pop r2 + pop r0 + add r0, 8 + add r2, 8 + mov r6, 0 +loop_get_satd_16x16_right: + SSE41_GetSatd8x4 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + inc r6 + cmp r6, 4 + jl loop_get_satd_16x16_right + SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7 + POP_XMM + LOAD_4_PARA_POP +%ifdef X86_32 + pop r6 + pop r5 + pop r4 +%endif + ret + +;*********************************************************************** +; +;Pixel_satd_wxh_sse41 END +; +;*********************************************************************** + +;*********************************************************************** +; +;Pixel_satd_wxh_avx2 BEGIN +; +;*********************************************************************** + +%ifdef HAVE_AVX2 +; out=%1 pSrcA=%2 pSrcB=%3 HSumSubDB1_256=%4 ymm_clobber=%5 +%macro AVX2_LoadDiffSatd16x1 5 + vbroadcasti128 %1, [%2] + vpmaddubsw %1, %1, %4 ; hadamard neighboring horizontal sums and differences + vbroadcasti128 %5, [%3] + vpmaddubsw %5, %5, %4 ; hadamard neighboring horizontal sums and differences + vpsubw %1, %1, %5 ; diff srcA srcB +%endmacro + +; out=%1 pSrcA=%2 pSrcA+4*iStride=%3 pSrcB=%4 pSrcB+4*iStride=%5 HSumSubDB1_128x2=%6 ymm_clobber=%7,%8 +%macro AVX2_LoadDiffSatd8x2 8 + vpbroadcastq %1, [%2] + vpbroadcastq %7, [%3] + vpblendd %1, %1, %7, 11110000b + vpmaddubsw %1, %1, %6 ; hadamard neighboring horizontal sums and differences + vpbroadcastq %7, [%4] + vpbroadcastq %8, [%5] + vpblendd %7, %7, %8, 11110000b + vpmaddubsw %7, %7, %6 ; hadamard neighboring horizontal sums and differences + vpsubw %1, %1, %7 ; diff srcA srcB +%endmacro + +; in/out=%1,%2,%3,%4 clobber=%5 +%macro AVX2_HDMFour4x4 5 + vpsubw %5, %1, %4 ; s3 = x0 - x3 + vpaddw %1, %1, %4 ; s0 = x0 + x3 + vpsubw %4, %2, %3 ; s2 = x1 - x2 + vpaddw %2, %2, %3 ; s1 = x1 + x2 + vpsubw %3, %1, %2 ; y2 = s0 - s1 + vpaddw %1, %1, %2 ; y0 = s0 + s1 + vpaddw %2, %5, %4 ; y1 = s3 + s2 + vpsubw %4, %5, %4 ; y3 = s3 - s2 +%endmacro + +; out=%1 in=%1,%2,%3,%4 clobber=%5 +%macro AVX2_SatdFour4x4 5 + AVX2_HDMFour4x4 %1, %2, %3, %4, %5 + vpabsw %1, %1 + vpabsw %2, %2 + vpabsw %3, %3 + vpabsw %4, %4 + ; second stage of horizontal hadamard. + ; utilizes that |a + b| + |a - b| = 2 * max(|a|, |b|) + vpblendw %5, %1, %2, 10101010b + vpslld %2, %2, 16 + vpsrld %1, %1, 16 + vpor %2, %2, %1 + vpmaxuw %2, %2, %5 + vpblendw %5, %3, %4, 10101010b + vpslld %4, %4, 16 + vpsrld %3, %3, 16 + vpor %4, %4, %3 + vpmaxuw %3, %5, %4 + vpaddw %1, %2, %3 +%endmacro + +; out=%1 pSrcA=%2 iStrideA=%3 3*iStrideA=%4 pSrcB=%5 iStrideB=%6 3*iStrideB=%7 HSumSubDB1_256=%8 ymm_clobber=%9,%10,%11,%12 +%macro AVX2_GetSatd16x4 12 + AVX2_LoadDiffSatd16x1 %1, %2 + 0 * %3, %5 + 0 * %6, %8, %12 + AVX2_LoadDiffSatd16x1 %9, %2 + 1 * %3, %5 + 1 * %6, %8, %12 + AVX2_LoadDiffSatd16x1 %10, %2 + 2 * %3, %5 + 2 * %6, %8, %12 + AVX2_LoadDiffSatd16x1 %11, %2 + 1 * %4, %5 + 1 * %7, %8, %12 + AVX2_SatdFour4x4 %1, %9, %10, %11, %12 +%endmacro + +; out=%1 pSrcA=%2 iStrideA=%3 3*iStrideA=%4 pSrcB=%5 iStrideB=%6 3*iStrideB=%7 HSumSubDB1_128x2=%8 ymm_clobber=%9,%10,%11,%12,%13 +%macro AVX2_GetSatd8x8 13 + AVX2_LoadDiffSatd8x2 %1, %2 + 0 * %3, %2 + 4 * %3, %5 + 0 * %6, %5 + 4 * %6, %8, %12, %13 + AVX2_LoadDiffSatd8x2 %10, %2 + 2 * %3, %2 + 2 * %4, %5 + 2 * %6, %5 + 2 * %7, %8, %12, %13 + add %2, %3 + add %5, %6 + AVX2_LoadDiffSatd8x2 %9, %2 + 0 * %3, %2 + 4 * %3, %5 + 0 * %6, %5 + 4 * %6, %8, %12, %13 + AVX2_LoadDiffSatd8x2 %11, %2 + 2 * %3, %2 + 2 * %4, %5 + 2 * %6, %5 + 2 * %7, %8, %12, %13 + AVX2_SatdFour4x4 %1, %9, %10, %11, %12 +%endmacro + +; d_out=%1 mm_in=%2 mm_clobber=%3 +%macro AVX2_SumWHorizon 3 + WELS_DW1_VEX y%3 + vpmaddwd y%2, y%2, y%3 + vextracti128 x%3, y%2, 1 + vpaddd x%2, x%2, x%3 + vpunpckhqdq x%3, x%2, x%2 + vpaddd x%2, x%2, x%3 + vpsrldq x%3, x%2, 4 + vpaddd x%2, x%2, x%3 + vmovd %1, x%2 +%endmacro + +;*********************************************************************** +; +;int32_t WelsSampleSatd8x16_avx2( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** + +WELS_EXTERN WelsSampleSatd8x16_avx2 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + mov r4, 2 ; loop cnt + jmp WelsSampleSatd8x8N_avx2 + +;*********************************************************************** +; +;int32_t WelsSampleSatd8x8_avx2( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** + +WELS_EXTERN WelsSampleSatd8x8_avx2 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + mov r4, 1 ; loop cnt + ; fall through +WelsSampleSatd8x8N_avx2: +%ifdef X86_32 + push r5 + push r6 + %assign push_num push_num+2 +%endif + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + INIT_X86_32_PIC_NOPRESERVE r5 + vbroadcasti128 ymm7, [pic(HSumSubDB1)] + DEINIT_X86_32_PIC + lea r5, [3 * r1] + lea r6, [3 * r3] + vpxor ymm6, ymm6, ymm6 +.loop: + AVX2_GetSatd8x8 ymm0, r0, r1, r5, r2, r3, r6, ymm7, ymm1, ymm2, ymm3, ymm4, ymm5 + vpaddw ymm6, ymm6, ymm0 + sub r4, 1 + jbe .loop_end + add r0, r5 + add r2, r6 + lea r0, [r0 + 4 * r1] + lea r2, [r2 + 4 * r3] + jmp .loop +.loop_end: + AVX2_SumWHorizon retrd, mm6, mm5 + vzeroupper + POP_XMM + LOAD_4_PARA_POP +%ifdef X86_32 + pop r6 + pop r5 + pop r4 +%endif + ret + +;*********************************************************************** +; +;int32_t WelsSampleSatd16x16_avx2( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** + +WELS_EXTERN WelsSampleSatd16x16_avx2 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + mov r4, 4 ; loop cnt + jmp WelsSampleSatd16x4N_avx2 + +;*********************************************************************** +; +;int32_t WelsSampleSatd16x8_avx2( uint8_t *, int32_t, uint8_t *, int32_t, ); +; +;*********************************************************************** + +WELS_EXTERN WelsSampleSatd16x8_avx2 + %assign push_num 0 +%ifdef X86_32 + push r4 + %assign push_num 1 +%endif + mov r4, 2 ; loop cnt + ; fall through +WelsSampleSatd16x4N_avx2: +%ifdef X86_32 + push r5 + push r6 + %assign push_num push_num+2 +%endif + LOAD_4_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + INIT_X86_32_PIC_NOPRESERVE r5 + vpbroadcastq xmm0, [pic(HSumSubDB1)] + vpbroadcastq ymm6, [pic(HSumSubDB1 + 8)] + vpblendd ymm6, ymm0, ymm6, 11110000b + DEINIT_X86_32_PIC + lea r5, [3 * r1] + lea r6, [3 * r3] + vpxor ymm5, ymm5, ymm5 +.loop: + AVX2_GetSatd16x4 ymm0, r0, r1, r5, r2, r3, r6, ymm6, ymm1, ymm2, ymm3, ymm4 + vpaddw ymm5, ymm5, ymm0 + lea r0, [r0 + 4 * r1] + lea r2, [r2 + 4 * r3] + sub r4, 1 + ja .loop + AVX2_SumWHorizon retrd, mm5, mm0 + vzeroupper + POP_XMM + LOAD_4_PARA_POP +%ifdef X86_32 + pop r6 + pop r5 + pop r4 +%endif + ret + +%endif + +;*********************************************************************** +; +;Pixel_satd_wxh_avx2 END +; +;*********************************************************************** + +;*********************************************************************** +; +;Pixel_sad_wxh_sse2 BEGIN +; +;*********************************************************************** + +%macro SSE2_GetSad2x16 0 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqu xmm1, [r2] + MOVDQ xmm2, [r0];[eax] must aligned 16 + psadbw xmm1, xmm2 + paddw xmm0, xmm1 + movdqu xmm1, [r2+r3] + MOVDQ xmm2, [r0+r1] + psadbw xmm1, xmm2 + paddw xmm0, xmm1 +%endmacro + + +%macro SSE2_GetSad4x16 0 + movdqu xmm0, [r2] + MOVDQ xmm2, [r0] + psadbw xmm0, xmm2 + paddw xmm7, xmm0 + movdqu xmm1, [r2+r3] + MOVDQ xmm2, [r0+r1] + psadbw xmm1, xmm2 + paddw xmm7, xmm1 + movdqu xmm1, [r2+2*r3] + MOVDQ xmm2, [r0+2*r1];[eax] must aligned 16 + psadbw xmm1, xmm2 + paddw xmm7, xmm1 + movdqu xmm1, [r2+r5] + MOVDQ xmm2, [r0+r4] + psadbw xmm1, xmm2 + paddw xmm7, xmm1 +%endmacro + + +%macro SSE2_GetSad8x4 0 + movq xmm0, [r0] + movq xmm1, [r0+r1] + lea r0, [r0+2*r1] + movhps xmm0, [r0] + movhps xmm1, [r0+r1] + + movq xmm2, [r2] + movq xmm3, [r2+r3] + lea r2, [r2+2*r3] + movhps xmm2, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm2 + psadbw xmm1, xmm3 + paddw xmm6, xmm0 + paddw xmm6, xmm1 +%endmacro + +;*********************************************************************** +; +;int32_t WelsSampleSad16x16_sse2( uint8_t *, int32_t, uint8_t *, int32_t, ) +;First parameter can align to 16 bytes, +;In wels, the third parameter can't align to 16 bytes. +; +;*********************************************************************** +WELS_EXTERN WelsSampleSad16x16_sse2 +%ifdef X86_32 + push r4 + push r5 +%endif + + %assign push_num 2 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + lea r4, [3*r1] + lea r5, [3*r3] + + pxor xmm7, xmm7 + SSE2_GetSad4x16 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + SSE2_GetSad4x16 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + SSE2_GetSad4x16 + lea r0, [r0+4*r1] + lea r2, [r2+4*r3] + SSE2_GetSad4x16 + movhlps xmm0, xmm7 + paddw xmm0, xmm7 + movd retrd, xmm0 + POP_XMM + LOAD_4_PARA_POP +%ifdef X86_32 + pop r5 + pop r4 +%endif + ret + +;*********************************************************************** +; +;int32_t WelsSampleSad16x8_sse2( uint8_t *, int32_t, uint8_t *, int32_t, ) +;First parameter can align to 16 bytes, +;In wels, the third parameter can't align to 16 bytes. +; +;*********************************************************************** +WELS_EXTERN WelsSampleSad16x8_sse2 + %assign push_num 0 + LOAD_4_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + movdqu xmm0, [r2] + MOVDQ xmm2, [r0] + psadbw xmm0, xmm2 + movdqu xmm1, [r2+r3] + MOVDQ xmm2, [r0+r1] + psadbw xmm1, xmm2 + paddw xmm0, xmm1 + + SSE2_GetSad2x16 + SSE2_GetSad2x16 + SSE2_GetSad2x16 + + movhlps xmm1, xmm0 + paddw xmm0, xmm1 + movd retrd, xmm0 + LOAD_4_PARA_POP + ret + + + +WELS_EXTERN WelsSampleSad8x16_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm6, xmm6 + + SSE2_GetSad8x4 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_GetSad8x4 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_GetSad8x4 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_GetSad8x4 + + movhlps xmm0, xmm6 + paddw xmm0, xmm6 + movd retrd, xmm0 + POP_XMM + LOAD_4_PARA_POP + ret + + +%macro CACHE_SPLIT_CHECK 3 ; address, width, cacheline +and %1, 0x1f|(%3>>1) +cmp %1, (32-%2)|(%3>>1) +%endmacro + +WELS_EXTERN WelsSampleSad8x8_sse21 + %assign push_num 0 + mov r2, arg3 + push r2 + CACHE_SPLIT_CHECK r2, 8, 64 + jle near .pixel_sad_8x8_nsplit + pop r2 +%ifdef X86_32 + push r3 + push r4 + push r5 +%endif + %assign push_num 3 + PUSH_XMM 8 + mov r0, arg1 + mov r1, arg2 + SIGN_EXTENSION r1, r1d + pxor xmm7, xmm7 + + ;ecx r2, edx r4, edi r5 + + mov r5, r2 + and r5, 0x07 + sub r2, r5 + mov r4, 8 + sub r4, r5 + + shl r5, 3 + shl r4, 3 + movd xmm5, r5d + movd xmm6, r4d + mov r5, 8 + add r5, r2 + mov r3, arg4 + SIGN_EXTENSION r3, r3d + movq xmm0, [r0] + movhps xmm0, [r0+r1] + + movq xmm1, [r2] + movq xmm2, [r5] + movhps xmm1, [r2+r3] + movhps xmm2, [r5+r3] + psrlq xmm1, xmm5 + psllq xmm2, xmm6 + por xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm7, xmm0 + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + lea r5, [r5+2*r3] + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + + movq xmm1, [r2] + movq xmm2, [r5] + movhps xmm1, [r2+r3] + movhps xmm2, [r5+r3] + psrlq xmm1, xmm5 + psllq xmm2, xmm6 + por xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm7, xmm0 + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + lea r5, [r5+2*r3] + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + + movq xmm1, [r2] + movq xmm2, [r5] + movhps xmm1, [r2+r3] + movhps xmm2, [r5+r3] + psrlq xmm1, xmm5 + psllq xmm2, xmm6 + por xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm7, xmm0 + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + lea r5, [r5+2*r3] + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + + movq xmm1, [r2] + movq xmm2, [r5] + movhps xmm1, [r2+r3] + movhps xmm2, [r5+r3] + psrlq xmm1, xmm5 + psllq xmm2, xmm6 + por xmm1, xmm2 + + psadbw xmm0, xmm1 + paddw xmm7, xmm0 + + movhlps xmm0, xmm7 + paddw xmm0, xmm7 + movd retrd, xmm0 + POP_XMM +%ifdef X86_32 + pop r5 + pop r4 + pop r3 +%endif + jmp .return + +.pixel_sad_8x8_nsplit: + + pop r2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 7 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm6, xmm6 + SSE2_GetSad8x4 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + SSE2_GetSad8x4 + movhlps xmm0, xmm6 + paddw xmm0, xmm6 + movd retrd, xmm0 + POP_XMM + LOAD_4_PARA_POP +.return: + ret + + +;*********************************************************************** +; +;Pixel_sad_wxh_sse2 END +; +;*********************************************************************** + + +;*********************************************************************** +; +;Pixel_sad_4_wxh_sse2 BEGIN +; +;*********************************************************************** + + +%macro SSE2_Get4LW16Sad 5 ;s-1l, s, s+1l, d, address + psadbw %1, %4 + paddw xmm5, %1 + psadbw %4, %3 + paddw xmm4, %4 + movdqu %4, [%5-1] + psadbw %4, %2 + paddw xmm6, %4 + movdqu %4, [%5+1] + psadbw %4, %2 + paddw xmm7, %4 +%endmacro +WELS_EXTERN WelsSampleSadFour16x16_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm4, xmm4 ;sad pRefMb-i_stride_ref + pxor xmm5, xmm5 ;sad pRefMb+i_stride_ref + pxor xmm6, xmm6 ;sad pRefMb-1 + pxor xmm7, xmm7 ;sad pRefMb+1 + movdqa xmm0, [r0] + sub r2, r3 + movdqu xmm3, [r2] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movdqa xmm1, [r0+r1] + movdqu xmm3, [r2+r3] + psadbw xmm3, xmm1 + paddw xmm4, xmm3 + + movdqu xmm2, [r2+r3-1] + psadbw xmm2, xmm0 + paddw xmm6, xmm2 + + movdqu xmm3, [r2+r3+1] + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm2, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm0, xmm1, xmm2, xmm3, r2 + movdqa xmm0, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm1, xmm2, xmm0, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm1, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm2, xmm0, xmm1, xmm3, r2 + movdqa xmm2, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm0, xmm1, xmm2, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm0, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm1, xmm2, xmm0, xmm3, r2 + movdqa xmm1, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm2, xmm0, xmm1, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm2, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm0, xmm1, xmm2, xmm3, r2 + movdqa xmm0, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm1, xmm2, xmm0, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm1, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm2, xmm0, xmm1, xmm3, r2 + movdqa xmm2, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm0, xmm1, xmm2, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm0, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm1, xmm2, xmm0, xmm3, r2 + movdqa xmm1, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm2, xmm0, xmm1, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm2, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm0, xmm1, xmm2, xmm3, r2 + movdqa xmm0, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm1, xmm2, xmm0, xmm3, r2+r3 + lea r2, [r2+2*r3] + movdqu xmm3, [r2] + psadbw xmm2, xmm3 + paddw xmm5, xmm2 + + movdqu xmm2, [r2-1] + psadbw xmm2, xmm0 + paddw xmm6, xmm2 + + movdqu xmm3, [r2+1] + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movdqu xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movhlps xmm0, xmm4 + paddw xmm4, xmm0 + movhlps xmm0, xmm5 + paddw xmm5, xmm0 + movhlps xmm0, xmm6 + paddw xmm6, xmm0 + movhlps xmm0, xmm7 + paddw xmm7, xmm0 + punpckldq xmm4, xmm5 + punpckldq xmm6, xmm7 + punpcklqdq xmm4, xmm6 + movdqa [r4],xmm4 + POP_XMM + LOAD_5_PARA_POP + ret + + +WELS_EXTERN WelsSampleSadFour16x8_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm4, xmm4 ;sad pRefMb-i_stride_ref + pxor xmm5, xmm5 ;sad pRefMb+i_stride_ref + pxor xmm6, xmm6 ;sad pRefMb-1 + pxor xmm7, xmm7 ;sad pRefMb+1 + movdqa xmm0, [r0] + sub r2, r3 + movdqu xmm3, [r2] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movdqa xmm1, [r0+r1] + movdqu xmm3, [r2+r3] + psadbw xmm3, xmm1 + paddw xmm4, xmm3 + + movdqu xmm2, [r2+r3-1] + psadbw xmm2, xmm0 + paddw xmm6, xmm2 + + movdqu xmm3, [r2+r3+1] + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm2, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm0, xmm1, xmm2, xmm3, r2 + movdqa xmm0, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm1, xmm2, xmm0, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm1, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm2, xmm0, xmm1, xmm3, r2 + movdqa xmm2, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm0, xmm1, xmm2, xmm3, r2+r3 + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movdqa xmm0, [r0] + movdqu xmm3, [r2] + SSE2_Get4LW16Sad xmm1, xmm2, xmm0, xmm3, r2 + movdqa xmm1, [r0+r1] + movdqu xmm3, [r2+r3] + SSE2_Get4LW16Sad xmm2, xmm0, xmm1, xmm3, r2+r3 + lea r2, [r2+2*r3] + movdqu xmm3, [r2] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movdqu xmm0, [r2-1] + psadbw xmm0, xmm1 + paddw xmm6, xmm0 + + movdqu xmm3, [r2+1] + psadbw xmm3, xmm1 + paddw xmm7, xmm3 + + movdqu xmm3, [r2+r3] + psadbw xmm1, xmm3 + paddw xmm5, xmm1 + + movhlps xmm0, xmm4 + paddw xmm4, xmm0 + movhlps xmm0, xmm5 + paddw xmm5, xmm0 + movhlps xmm0, xmm6 + paddw xmm6, xmm0 + movhlps xmm0, xmm7 + paddw xmm7, xmm0 + punpckldq xmm4, xmm5 + punpckldq xmm6, xmm7 + punpcklqdq xmm4, xmm6 + movdqa [r4],xmm4 + POP_XMM + LOAD_5_PARA_POP + ret + +WELS_EXTERN WelsSampleSadFour8x16_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm4, xmm4 ;sad pRefMb-i_stride_ref + pxor xmm5, xmm5 ;sad pRefMb+i_stride_ref + pxor xmm6, xmm6 ;sad pRefMb-1 + pxor xmm7, xmm7 ;sad pRefMb+1 + movq xmm0, [r0] + movhps xmm0, [r0+r1] + sub r2, r3 + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movhlps xmm0, xmm4 + paddw xmm4, xmm0 + movhlps xmm0, xmm5 + paddw xmm5, xmm0 + movhlps xmm0, xmm6 + paddw xmm6, xmm0 + movhlps xmm0, xmm7 + paddw xmm7, xmm0 + punpckldq xmm4, xmm5 + punpckldq xmm6, xmm7 + punpcklqdq xmm4, xmm6 + movdqa [r4],xmm4 + POP_XMM + LOAD_5_PARA_POP + ret + + +WELS_EXTERN WelsSampleSadFour8x8_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm4, xmm4 ;sad pRefMb-i_stride_ref + pxor xmm5, xmm5 ;sad pRefMb+i_stride_ref + pxor xmm6, xmm6 ;sad pRefMb-1 + pxor xmm7, xmm7 ;sad pRefMb+1 + movq xmm0, [r0] + movhps xmm0, [r0+r1] + sub r2, r3 + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movq xmm0, [r0] + movhps xmm0, [r0+r1] + psadbw xmm3, xmm0 + paddw xmm4, xmm3 + + + movq xmm1, [r2+r3-1] + movq xmm3, [r2+r3+1] + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + movhps xmm1, [r2-1] + movhps xmm3, [r2+1] + + psadbw xmm1, xmm0 + paddw xmm6, xmm1 + psadbw xmm3, xmm0 + paddw xmm7, xmm3 + + movq xmm3, [r2] + movhps xmm3, [r2+r3] + psadbw xmm0, xmm3 + paddw xmm5, xmm0 + + movhlps xmm0, xmm4 + paddw xmm4, xmm0 + movhlps xmm0, xmm5 + paddw xmm5, xmm0 + movhlps xmm0, xmm6 + paddw xmm6, xmm0 + movhlps xmm0, xmm7 + paddw xmm7, xmm0 + punpckldq xmm4, xmm5 + punpckldq xmm6, xmm7 + punpcklqdq xmm4, xmm6 + movdqa [r4],xmm4 + POP_XMM + LOAD_5_PARA_POP + ret + +WELS_EXTERN WelsSampleSadFour4x4_sse2 + %assign push_num 0 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + movd xmm0, [r0] + movd xmm1, [r0+r1] + lea r0, [r0+2*r1] + movd xmm2, [r0] + movd xmm3, [r0+r1] + punpckldq xmm0, xmm1 + punpckldq xmm2, xmm3 + punpcklqdq xmm0, xmm2 + sub r2, r3 + movd xmm1, [r2] + movd xmm2, [r2+r3] + punpckldq xmm1, xmm2 + movd xmm2, [r2+r3-1] + movd xmm3, [r2+r3+1] + + lea r2, [r2+2*r3] + + movd xmm4, [r2] + movd xmm5, [r2-1] + punpckldq xmm2, xmm5 + movd xmm5, [r2+1] + punpckldq xmm3, xmm5 + + movd xmm5, [r2+r3] + punpckldq xmm4, xmm5 + + punpcklqdq xmm1, xmm4 ;-L + + movd xmm5, [r2+r3-1] + movd xmm6, [r2+r3+1] + + lea r2, [r2+2*r3] + movd xmm7, [r2-1] + punpckldq xmm5, xmm7 + punpcklqdq xmm2, xmm5 ;-1 + movd xmm7, [r2+1] + punpckldq xmm6, xmm7 + punpcklqdq xmm3, xmm6 ;+1 + movd xmm6, [r2] + movd xmm7, [r2+r3] + punpckldq xmm6, xmm7 + punpcklqdq xmm4, xmm6 ;+L + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + + movhlps xmm0, xmm1 + paddw xmm1, xmm0 + movhlps xmm0, xmm2 + paddw xmm2, xmm0 + movhlps xmm0, xmm3 + paddw xmm3, xmm0 + movhlps xmm0, xmm4 + paddw xmm4, xmm0 + punpckldq xmm1, xmm4 + punpckldq xmm2, xmm3 + punpcklqdq xmm1, xmm2 + movdqa [r4],xmm1 + POP_XMM + LOAD_5_PARA_POP + ret + +;*********************************************************************** +; +;Pixel_sad_4_wxh_sse2 END +; +;*********************************************************************** + +;*********************************************************************** +; int32_t WelsSampleSad4x4_mmx (uint8_t *, int32_t, uint8_t *, int32_t ) +;*********************************************************************** +WELS_EXTERN WelsSampleSad4x4_mmx + %assign push_num 0 + LOAD_4_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + movd mm0, [r0] + movd mm1, [r0+r1] + punpckldq mm0, mm1 + + movd mm3, [r2] + movd mm4, [r2+r3] + punpckldq mm3, mm4 + psadbw mm0, mm3 + + lea r0, [r0+2*r1] + lea r2, [r2+2*r3] + + movd mm1, [r0] + movd mm2, [r0+r1] + punpckldq mm1, mm2 + + movd mm3, [r2] + movd mm4, [r2+r3] + punpckldq mm3, mm4 + psadbw mm1, mm3 + paddw mm0, mm1 + + movd retrd, mm0 + + WELSEMMS + LOAD_4_PARA_POP + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/vaa.asm b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/vaa.asm new file mode 100644 index 000000000..1edb9f6dc --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/common/x86/vaa.asm @@ -0,0 +1,411 @@ +;*! +;* \copy +;* Copyright (c) 2010-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* vaa.asm +;* +;* Abstract +;* sse2 for pVaa routines +;* +;* History +;* 04/14/2010 Created +;* 06/07/2010 Added AnalysisVaaInfoIntra_sse2(ssse3) +;* 06/10/2010 Tune rc_sad_frame_sse2 and got about 40% improvement +;* 08/11/2010 Added abs_difference_mbrow_sse2 & sum_sqrsum_mbrow_sse2 +;* +;*************************************************************************/ +%include "asm_inc.asm" + + +;*********************************************************************** +; Macros and other preprocessor constants +;*********************************************************************** + +; by comparing it outperforms than phaddw(SSSE3) sets +%macro SUM_WORD_8x2_SSE2 2 ; dst(pSrc), tmp + ; @sum_8x2 begin + pshufd %2, %1, 04Eh ; 01001110 B + paddw %1, %2 + pshuflw %2, %1, 04Eh ; 01001110 B + paddw %1, %2 + pshuflw %2, %1, 0B1h ; 10110001 B + paddw %1, %2 + ; end of @sum_8x2 +%endmacro ; END of SUM_WORD_8x2_SSE2 + + +%macro VAA_AVG_BLOCK_SSE2 6 ; dst, t0, t1, t2, t3, t4 + movdqa %1, [r0 ] ; line 0 + movdqa %2, [r0+r1] ; line 1 + movdqa %3, %1 + punpcklbw %1, xmm7 + punpckhbw %3, xmm7 + movdqa %4, %2 + punpcklbw %4, xmm7 + punpckhbw %2, xmm7 + paddw %1, %4 + paddw %2, %3 + movdqa %3, [r0+r2] ; line 2 + movdqa %4, [r0+r3] ; line 3 + movdqa %5, %3 + punpcklbw %3, xmm7 + punpckhbw %5, xmm7 + movdqa %6, %4 + punpcklbw %6, xmm7 + punpckhbw %4, xmm7 + paddw %3, %6 + paddw %4, %5 + paddw %1, %3 ; block 0, 1 + paddw %2, %4 ; block 2, 3 + pshufd %3, %1, 0B1h + pshufd %4, %2, 0B1h + paddw %1, %3 + paddw %2, %4 + movdqa %3, %1 + movdqa %4, %2 + pshuflw %5, %1, 0B1h + pshufhw %6, %3, 0B1h + paddw %1, %5 + paddw %3, %6 + pshuflw %5, %2, 0B1h + pshufhw %6, %4, 0B1h + paddw %2, %5 + paddw %4, %6 + punpcklwd %1, %2 + punpckhwd %3, %4 + punpcklwd %1, %3 + psraw %1, $04 +%endmacro + +%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4 + movdqa %1, [r0 ] ; line 0 + movdqa %2, [r0+r1] ; line 1 + movdqa %3, %1 + punpcklbw %1, xmm7 + punpckhbw %3, xmm7 + movdqa %4, %2 + punpcklbw %4, xmm7 + punpckhbw %2, xmm7 + paddw %1, %4 + paddw %2, %3 + movdqa %3, [r0+r2] ; line 2 + movdqa %4, [r0+r3] ; line 3 + movdqa %5, %3 + punpcklbw %3, xmm7 + punpckhbw %5, xmm7 + movdqa %6, %4 + punpcklbw %6, xmm7 + punpckhbw %4, xmm7 + paddw %3, %6 + paddw %4, %5 + paddw %1, %3 ; block 0, 1 + paddw %2, %4 ; block 2, 3 + phaddw %1, %2 ; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; .. + phaddw %1, xmm7 ; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; .... + psraw %1, $04 +%endmacro + + + +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + +; , 6/7/2010 + +;*********************************************************************** +; int32_t AnalysisVaaInfoIntra_sse2( uint8_t *pDataY, const int32_t iLineSize ); +;*********************************************************************** +WELS_EXTERN AnalysisVaaInfoIntra_sse2 + + %assign push_num 0 + LOAD_2_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1,r1d + +%ifdef X86_32 + push r3 + push r4 + push r5 + push r6 + %assign push_num push_num+4 +%endif + + mov r5,r7 + and r5,0fh + sub r7,r5 + sub r7,32 + + + mov r2,r1 + sal r2,$01 ;r2 = 2*iLineSize + mov r3,r2 + add r3,r1 ;r3 = 3*iLineSize + + mov r4,r2 + sal r4,$01 ;r4 = 4*iLineSize + + pxor xmm7, xmm7 + + ; loops + VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + movq [r7], xmm0 + + lea r0, [r0+r4] + VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + movq [r7+8], xmm0 + + lea r0, [r0+r4] + VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + movq [r7+16], xmm0 + + lea r0, [r0+r4] + VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + movq [r7+24], xmm0 + + movdqa xmm0, [r7] ; block 0~7 + movdqa xmm1, [r7+16] ; block 8~15 + movdqa xmm2, xmm0 + paddw xmm0, xmm1 + SUM_WORD_8x2_SSE2 xmm0, xmm3 + + pmullw xmm1, xmm1 + pmullw xmm2, xmm2 + movdqa xmm3, xmm1 + movdqa xmm4, xmm2 + punpcklwd xmm1, xmm7 + punpckhwd xmm3, xmm7 + punpcklwd xmm2, xmm7 + punpckhwd xmm4, xmm7 + paddd xmm1, xmm2 + paddd xmm3, xmm4 + paddd xmm1, xmm3 + pshufd xmm2, xmm1, 01Bh + paddd xmm1, xmm2 + pshufd xmm2, xmm1, 0B1h + paddd xmm1, xmm2 + + + + movd r2d, xmm0 + and r2, 0ffffh ; effective low work truncated + mov r3, r2 + imul r2, r3 + sar r2, $04 + movd retrd, xmm1 + sub retrd, r2d + + add r7,32 + add r7,r5 + +%ifdef X86_32 + pop r6 + pop r5 + pop r4 + pop r3 +%endif + POP_XMM + + ret + +;*********************************************************************** +; int32_t AnalysisVaaInfoIntra_ssse3( uint8_t *pDataY, const int32_t iLineSize ); +;*********************************************************************** +WELS_EXTERN AnalysisVaaInfoIntra_ssse3 + + %assign push_num 0 + LOAD_2_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1,r1d + +%ifdef X86_32 + push r3 + push r4 + push r5 + push r6 + %assign push_num push_num+4 +%endif + + mov r5,r7 + and r5,0fh + sub r7,r5 + sub r7,32 + + + mov r2,r1 + sal r2,$01 ;r2 = 2*iLineSize + mov r3,r2 + add r3,r1 ;r3 = 3*iLineSize + + mov r4,r2 + sal r4,$01 ;r4 = 4*iLineSize + + pxor xmm7, xmm7 + + ; loops + VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + movq [r7],xmm0 + + lea r0,[r0+r4] + VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 + movq [r7+8],xmm1 + + + lea r0,[r0+r4] + VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5 + movq [r7+16],xmm0 + + lea r0,[r0+r4] + VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 + movq [r7+24],xmm1 + + + movdqa xmm0,[r7] + movdqa xmm1,[r7+16] + movdqa xmm2, xmm0 + paddw xmm0, xmm1 + SUM_WORD_8x2_SSE2 xmm0, xmm3 ; better performance than that of phaddw sets + + pmullw xmm1, xmm1 + pmullw xmm2, xmm2 + movdqa xmm3, xmm1 + movdqa xmm4, xmm2 + punpcklwd xmm1, xmm7 + punpckhwd xmm3, xmm7 + punpcklwd xmm2, xmm7 + punpckhwd xmm4, xmm7 + paddd xmm1, xmm2 + paddd xmm3, xmm4 + paddd xmm1, xmm3 + pshufd xmm2, xmm1, 01Bh + paddd xmm1, xmm2 + pshufd xmm2, xmm1, 0B1h + paddd xmm1, xmm2 + + + movd r2d, xmm0 + and r2, 0ffffh ; effective low work truncated + mov r3, r2 + imul r2, r3 + sar r2, $04 + movd retrd, xmm1 + sub retrd, r2d + + add r7,32 + add r7,r5 +%ifdef X86_32 + pop r6 + pop r5 + pop r4 + pop r3 +%endif + POP_XMM + + ret + +;*********************************************************************** +; uint8_t MdInterAnalysisVaaInfo_sse41( int32_t *pSad8x8 ) +;*********************************************************************** +WELS_EXTERN MdInterAnalysisVaaInfo_sse41 + %assign push_num 0 + LOAD_1_PARA + movdqa xmm0,[r0] + pshufd xmm1, xmm0, 01Bh + paddd xmm1, xmm0 + pshufd xmm2, xmm1, 0B1h + paddd xmm1, xmm2 + psrad xmm1, 02h ; iAverageSad + movdqa xmm2, xmm1 + psrad xmm2, 06h + movdqa xmm3, xmm0 ; iSadBlock + psrad xmm3, 06h + psubd xmm3, xmm2 + pmulld xmm3, xmm3 ; [comment]: pmulld from SSE4.1 instruction sets + pshufd xmm4, xmm3, 01Bh + paddd xmm4, xmm3 + pshufd xmm3, xmm4, 0B1h + paddd xmm3, xmm4 + movd r0d, xmm3 + cmp r0d, 20 ; INTER_VARIANCE_SAD_THRESHOLD + + jb near .threshold_exit + pshufd xmm0, xmm0, 01Bh + pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad + movmskps retrd, xmm0 + ret +.threshold_exit: + mov retrd, 15 + ret + +;*********************************************************************** +; uint8_t MdInterAnalysisVaaInfo_sse2( int32_t *pSad8x8 ) +;*********************************************************************** +WELS_EXTERN MdInterAnalysisVaaInfo_sse2 + %assign push_num 0 + LOAD_1_PARA + movdqa xmm0, [r0] + pshufd xmm1, xmm0, 01Bh + paddd xmm1, xmm0 + pshufd xmm2, xmm1, 0B1h + paddd xmm1, xmm2 + psrad xmm1, 02h ; iAverageSad + movdqa xmm2, xmm1 + psrad xmm2, 06h + movdqa xmm3, xmm0 ; iSadBlock + psrad xmm3, 06h + psubd xmm3, xmm2 + + ; to replace pmulld functionality as below + movdqa xmm2, xmm3 + pmuludq xmm2, xmm3 + pshufd xmm4, xmm3, 0B1h + pmuludq xmm4, xmm4 + movdqa xmm5, xmm2 + punpckldq xmm5, xmm4 + punpckhdq xmm2, xmm4 + punpcklqdq xmm5, xmm2 + + pshufd xmm4, xmm5, 01Bh + paddd xmm4, xmm5 + pshufd xmm5, xmm4, 0B1h + paddd xmm5, xmm4 + + movd r0d, xmm5 + cmp r0d, 20 ; INTER_VARIANCE_SAD_THRESHOLD + jb near .threshold_exit + pshufd xmm0, xmm0, 01Bh + pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad + movmskps retrd, xmm0 + ret +.threshold_exit: + mov retrd, 15 + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm/block_add_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm/block_add_neon.S new file mode 100644 index 000000000..fe29e4db9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm/block_add_neon.S @@ -0,0 +1,129 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +.macro ROW_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 +// { // input: src_d[0]~[3], output: e_q[0]~[3]; working: $8 $9 + vaddl.s16 \arg4, \arg0, \arg2 //int32 e[i][0] = src[0] + src[2]; + vsubl.s16 \arg5, \arg0, \arg2 //int32 e[i][1] = src[0] - src[2]; + vshr.s16 \arg8, \arg1, #1 + vshr.s16 \arg9, \arg3, #1 + vsubl.s16 \arg6, \arg8, \arg3 //int32 e[i][2] = (src[1]>>1)-src[3]; + vaddl.s16 \arg7, \arg1, \arg9 //int32 e[i][3] = src[1] + (src[3]>>1); +// } +.endm + +.macro TRANSFORM_4BYTES arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 // both row & col transform used +// { // output: f_q[0]~[3], input: e_q[0]~[3]; + vadd.s32 \arg0, \arg4, \arg7 //int16 f[i][0] = e[i][0] + e[i][3]; + vadd.s32 \arg1, \arg5, \arg6 //int16 f[i][1] = e[i][1] + e[i][2]; + vsub.s32 \arg2, \arg5, \arg6 //int16 f[i][2] = e[i][1] - e[i][2]; + vsub.s32 \arg3, \arg4, \arg7 //int16 f[i][3] = e[i][0] - e[i][3]; +// } +.endm + +.macro COL_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_q[0]~[3], output: e_q[0]~[3]; + vadd.s32 \arg4, \arg0, \arg2 //int32 e[0][j] = f[0][j] + f[2][j]; + vsub.s32 \arg5, \arg0, \arg2 //int32 e[1][j] = f[0][j] - f[2][j]; + vshr.s32 \arg6, \arg1, #1 + vshr.s32 \arg7, \arg3, #1 + vsub.s32 \arg6, \arg6, \arg3 //int32 e[2][j] = (f[1][j]>>1) - f[3][j]; + vadd.s32 \arg7, \arg1, \arg7 //int32 e[3][j] = f[1][j] + (f[3][j]>>1); +// } +.endm + +// uint8_t *pred, const int32_t stride, int16_t *rs +WELS_ASM_FUNC_BEGIN IdctResAddPred_neon + + vld4.s16 {d0, d1, d2, d3}, [r2] // cost 3 cycles! + + ROW_TRANSFORM_1_STEP d0, d1, d2, d3, q8, q9, q10, q11, d4, d5 + + TRANSFORM_4BYTES q0, q1, q2, q3, q8, q9, q10, q11 + + // transform element 32bits + vtrn.s32 q0, q1 //[0 1 2 3]+[4 5 6 7]-->[0 4 2 6]+[1 5 3 7] + vtrn.s32 q2, q3 //[8 9 10 11]+[12 13 14 15]-->[8 12 10 14]+[9 13 11 15] + vswp d1, d4 //[0 4 2 6]+[8 12 10 14]-->[0 4 8 12]+[2 6 10 14] + vswp d3, d6 //[1 5 3 7]+[9 13 11 15]-->[1 5 9 13]+[3 7 11 15] + + COL_TRANSFORM_1_STEP q0, q1, q2, q3, q8, q9, q10, q11 + + TRANSFORM_4BYTES q0, q1, q2, q3, q8, q9, q10, q11 + + //after clip_table[MAX_NEG_CROP] into [0, 255] + mov r2, r0 + vld1.32 {d20[0]},[r0],r1 + vld1.32 {d20[1]},[r0],r1 + vld1.32 {d22[0]},[r0],r1 + vld1.32 {d22[1]},[r0] + + vrshrn.s32 d16, q0, #6 + vrshrn.s32 d17, q1, #6 + vrshrn.s32 d18, q2, #6 + vrshrn.s32 d19, q3, #6 + + vmovl.u8 q0,d20 + vmovl.u8 q1,d22 + vadd.s16 q0,q8 + vadd.s16 q1,q9 + + vqmovun.s16 d20,q0 + vqmovun.s16 d22,q1 + + vst1.32 {d20[0]},[r2],r1 + vst1.32 {d20[1]},[r2],r1 + vst1.32 {d22[0]},[r2],r1 + vst1.32 {d22[1]},[r2] +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsBlockZero16x16_neon + veor q0, q0 + veor q1, q1 + lsl r1, r1, 1 +.rept 16 + vst1.64 {q0, q1}, [r0], r1 +.endr +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN WelsBlockZero8x8_neon + veor q0, q0 + lsl r1, r1, 1 +.rept 8 + vst1.64 {q0}, [r0], r1 +.endr +WELS_ASM_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm/intra_pred_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm/intra_pred_neon.S new file mode 100644 index 000000000..2e356ee00 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm/intra_pred_neon.S @@ -0,0 +1,635 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +//Global macro +#include "arm_arch_common_macro.S" + +//Global macro +.macro GET_8BYTE_DATA arg0, arg1, arg2 + vld1.8 {\arg0[0]}, [\arg1], \arg2 + vld1.8 {\arg0[1]}, [\arg1], \arg2 + vld1.8 {\arg0[2]}, [\arg1], \arg2 + vld1.8 {\arg0[3]}, [\arg1], \arg2 + vld1.8 {\arg0[4]}, [\arg1], \arg2 + vld1.8 {\arg0[5]}, [\arg1], \arg2 + vld1.8 {\arg0[6]}, [\arg1], \arg2 + vld1.8 {\arg0[7]}, [\arg1], \arg2 +.endm + + +WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredV_neon + //Get the top line data to 'q0' + sub r2, r0, r1 + vldm r2, {d0, d1} + + mov r2, r0 + mov r3, #4 + //Set the top line to the each line of MB(16*16) +loop_0_get_i16x16_luma_pred_v: + vst1.8 {d0,d1}, [r2], r1 + vst1.8 {d0,d1}, [r2], r1 + vst1.8 {d0,d1}, [r2], r1 + vst1.8 {d0,d1}, [r2], r1 + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_v + +WELS_ASM_FUNC_END + + + +WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredH_neon + sub r2, r0, #1 + mov r3, #4 +loop_0_get_i16x16_luma_pred_h: + //Get one byte data from left side + vld1.8 {d0[],d1[]}, [r2], r1 + vld1.8 {d2[],d3[]}, [r2], r1 + vld1.8 {d4[],d5[]}, [r2], r1 + vld1.8 {d6[],d7[]}, [r2], r1 + + //Set the line of MB using the left side byte data + vst1.8 {d0,d1}, [r0], r1 + vst1.8 {d2,d3}, [r0], r1 + vst1.8 {d4,d5}, [r0], r1 + vst1.8 {d6,d7}, [r0], r1 + + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_h + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredDc_neon + //stmdb sp!, { r2-r5, lr} + //Get the left vertical line data + sub r2, r0, #1 + GET_8BYTE_DATA d0, r2, r1 + GET_8BYTE_DATA d1, r2, r1 + + //Get the top horizontal line data + sub r2, r0, r1 + vldm r2, {d2, d3} + + //Calculate the sum of top horizontal line data and vertical line data + vpaddl.u8 q0, q0 + vpaddl.u8 q1, q1 + vadd.u16 q0, q0, q1 + vadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + //Calculate the mean value + vrshr.u16 d0, d0, #5 + vdup.8 q0, d0[0] + + //Set the mean value to the all of member of MB + mov r2, #4 +loop_0_get_i16x16_luma_pred_dc_both: + vst1.8 {d0,d1}, [r0], r1 + vst1.8 {d0,d1}, [r0], r1 + vst1.8 {d0,d1}, [r0], r1 + vst1.8 {d0,d1}, [r0], r1 + subs r2, #1 + bne loop_0_get_i16x16_luma_pred_dc_both + +WELS_ASM_FUNC_END + + + +.align 3 +//The table for SIMD instruction {(8,7,6,5,4,3,2,1) * 5} +CONST0_GET_I16X16_LUMA_PRED_PLANE: .long 0x191e2328, 0x050a0f14 + +//The table for SIMD instruction {-7,-6,-5,-4,-3,-2,-1,0} +CONST1_GET_I16X16_LUMA_PRED_PLANE: .long 0xfcfbfaf9, 0x00fffefd + + +WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredPlane_neon + //stmdb sp!, { r2-r5, lr} + + //Load the table {(8,7,6,5,4,3,2,1) * 5} + adr r2, CONST0_GET_I16X16_LUMA_PRED_PLANE + vldr d0, [r2] + + //Pack the top[-1] ~ top[6] to d1 + sub r2, r0, r1 + sub r3, r2, #1 + vld1.8 d1, [r3] + + //Pack the top[8] ~ top[15] to d2 + add r3, #9 + vld1.8 d2, [r3] + + //Save the top[15] to d6 for next step + vdup.u8 d6, d2[7] + + //Get and pack left[-1] ~ left[6] to d4 + sub r3, r2, #1 + GET_8BYTE_DATA d4, r3, r1 + + //Get and pack left[8] ~ left[15] to d3 + add r3, r1 + GET_8BYTE_DATA d3, r3, r1 + + //Save the left[15] to d7 for next step + vdup.u8 d7, d3[7] + + //revert the sequence of d2,d3 + vrev64.8 q1, q1 + + vsubl.u8 q2, d3, d4 //q2={left[8]-left[6],left[9]-left[5],left[10]-left[4], ...} + vsubl.u8 q1, d2, d1 //q1={top[8]-top[6],top[9]-top[5],top[10]-top[4], ...} + + + vmovl.u8 q0, d0 + vmul.s16 q1, q0, q1 //q1 = q1*{(8,7,6,5,4,3,2,1) * 5} + vmul.s16 q2, q0, q2 //q2 = q2*{(8,7,6,5,4,3,2,1) * 5} + + //Calculate the sum of items of q1, q2 + vpadd.s16 d0, d2, d3 + vpadd.s16 d1, d4, d5 + vpaddl.s16 q0, q0 + vpaddl.s32 q0, q0 + + //Get the value of 'b', 'c' and extend to q1, q2. + vrshr.s64 q0, #6 + vdup.s16 q1, d0[0] + vdup.s16 q2, d1[0] + + //Load the table {-7,-6,-5,-4,-3,-2,-1,0} to d0 + adr r2, CONST1_GET_I16X16_LUMA_PRED_PLANE + vld1.32 {d0}, [r2] + + //Get the value of 'a' and save to q3 + vaddl.u8 q3, d6, d7 + vshl.u16 q3, #4 + + //calculate a+'b'*{-7,-6,-5,-4,-3,-2,-1,0} + c*{-7} + vmovl.s8 q0, d0 + vmla.s16 q3, q0, q1 + vmla.s16 q3, q2, d0[0] + + //Calculate a+'b'*{1,2,3,4,5,6,7,8} + c*{-7} + vshl.s16 q8, q1, #3 + vadd.s16 q8, q3 + + //right shift 5 bits and rounding + vqrshrun.s16 d0, q3, #5 + vqrshrun.s16 d1, q8, #5 + + //Set the line of MB + vst1.u32 {d0,d1}, [r0], r1 + + + //Do the same processing for setting other lines + mov r2, #15 +loop_0_get_i16x16_luma_pred_plane: + vadd.s16 q3, q2 + vadd.s16 q8, q2 + vqrshrun.s16 d0, q3, #5 + vqrshrun.s16 d1, q8, #5 + vst1.u32 {d0,d1}, [r0], r1 + subs r2, #1 + bne loop_0_get_i16x16_luma_pred_plane + +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredV_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (4 bytes) + sub r2, r0, r1 + ldr r2, [r2] + + //Set the luma MB using top line + str r2, [r0], r1 + str r2, [r0], r1 + str r2, [r0], r1 + str r2, [r0] + +WELS_ASM_FUNC_END + + + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredH_neon + //stmdb sp!, { r2-r5, lr} + //Load the left column (4 bytes) + sub r2, r0, #1 + vld1.8 {d0[]}, [r2], r1 + vld1.8 {d1[]}, [r2], r1 + vld1.8 {d2[]}, [r2], r1 + vld1.8 {d3[]}, [r2] + + //Set the luma MB using the left side byte + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + vst1.32 {d2[0]}, [r0], r1 + vst1.32 {d3[0]}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredDDL_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row data(8 bytes) + sub r2, r0, r1 + vld1.32 {d0}, [r2] + + //For "t7 + (t7<<1)" + vdup.8 d1, d0[7] + + //calculate "t0+t1,t1+t2,t2+t3...t6+t7,t7+t7" + vext.8 d1, d0, d1, #1 + vaddl.u8 q1, d1, d0 + + //calculate "x,t0+t1+t1+t2,t1+t2+t2+t3,...t5+t6+t6+t7,t6+t7+t7+t7" + vext.8 q2, q1, q1, #14 + vadd.u16 q0, q1, q2 + + //right shift 2 bits and rounding + vqrshrn.u16 d0, q0, #2 + + //Save "ddl0, ddl1, ddl2, ddl3" + vext.8 d1, d0, d0, #1 + vst1.32 d1[0], [r0], r1 + + //Save "ddl1, ddl2, ddl3, ddl4" + vext.8 d1, d0, d0, #2 + vst1.32 d1[0], [r0], r1 + + //Save "ddl2, ddl3, ddl4, ddl5" + vext.8 d1, d0, d0, #3 + vst1.32 d1[0], [r0], r1 + + //Save "ddl3, ddl4, ddl5, ddl6" + vst1.32 d0[1], [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredDDR_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (4 bytes) + sub r2, r0, r1 + vld1.32 {d0[1]}, [r2] + + //Load the left column (5 bytes) + sub r2, #1 + vld1.8 {d0[3]}, [r2], r1 + vld1.8 {d0[2]}, [r2], r1 + vld1.8 {d0[1]}, [r2], r1 + vld1.8 {d0[0]}, [r2], r1 + vld1.8 {d1[7]}, [r2] //For packing the right sequence to do SIMD processing + + + vext.8 d2, d1, d0, #7 //d0:{L2,L1,L0,LT,T0,T1,T2,T3} + //d2:{L3,L2,L1,L0,LT,T0,T1,T2} + + //q2:{L2+L3,L1+L2,L0+L1...T1+T2,T2+T3} + vaddl.u8 q2, d2, d0 + + //q1:{TL0+LT0,LT0+T01,...L12+L23} + vext.8 q3, q3, q2, #14 + vadd.u16 q1, q2, q3 + + //right shift 2 bits and rounding + vqrshrn.u16 d0, q1, #2 + + //Adjust the data sequence for setting luma MB of 'pred' + vst1.32 d0[1], [r0], r1 + vext.8 d0, d0, d0, #7 + vst1.32 d0[1], [r0], r1 + vext.8 d0, d0, d0, #7 + vst1.32 d0[1], [r0], r1 + vext.8 d0, d0, d0, #7 + vst1.32 d0[1], [r0] + +WELS_ASM_FUNC_END + + + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredVL_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (8 bytes) + sub r2, r0, r1 + vld1.32 {d0}, [r2] + + + vext.8 d1, d0, d0, #1 + vaddl.u8 q1, d1, d0 //q1:{t0+t1,t1+t2,t2+t3...t5+t6,x,x} + + vext.8 q2, q1, q1, #2 + vadd.u16 q2, q1, q2 //q2:{t0+t1+t1+t2,t1+t2+t2+t3,...t4+t5+t5+t6,x,x} + + //calculate the "vl0,vl1,vl2,vl3,vl4" + vqrshrn.u16 d0, q1, #1 + + //calculate the "vl5,vl6,vl7,vl8,vl9" + vqrshrn.u16 d1, q2, #2 + + //Adjust the data sequence for setting the luma MB + vst1.32 d0[0], [r0], r1 + vst1.32 d1[0], [r0], r1 + vext.8 d0, d0, d0, #1 + vext.8 d1, d1, d1, #1 + vst1.32 d0[0], [r0], r1 + vst1.32 d1[0], [r0] + +WELS_ASM_FUNC_END + + + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredVR_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (4 bytes) + sub r2, r0, r1 + vld1.32 {d0[1]}, [r2] + + //Load the left column (4 bytes) + sub r2, #1 + vld1.8 {d0[3]}, [r2], r1 + vld1.8 {d0[2]}, [r2], r1 + vld1.8 {d0[1]}, [r2], r1 + vld1.8 {d0[0]}, [r2] + + + vext.8 d1, d0, d0, #7 + vaddl.u8 q1, d0, d1 //q1:{X,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2,T2+T3} + + vext.u8 q2, q1, q1, #14 + vadd.u16 q2, q2, q1 //q2:{X,L2+L1+L1+L0,L1+L0+L0+LT,...T1+T2+T2+T3} + + //Calculate the vr0 ~ vr9 + vqrshrn.u16 d1, q2, #2 + vqrshrn.u16 d0, q1, #1 + + //Adjust the data sequence for setting the luma MB + vst1.32 d0[1], [r0], r1 + vst1.32 d1[1], [r0], r1 + add r2, r0, r1 + vst1.8 d1[3], [r0]! + vst1.16 d0[2], [r0]! + vst1.8 d0[6], [r0]! + vst1.8 d1[2], [r2]! + vst1.16 d1[2], [r2]! + vst1.8 d1[6], [r2] +WELS_ASM_FUNC_END + + + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredHU_neon + //stmdb sp!, { r2-r5, lr} + //Load the left column data + sub r2, r0, #1 + mov r3, #3 + mul r3, r1 + add r3, r2 + vld1.8 {d0[]}, [r3] + vld1.8 {d0[4]}, [r2], r1 + vld1.8 {d0[5]}, [r2], r1 + vld1.8 {d0[6]}, [r2], r1 //d0:{L3,L3,L3,L3,L0,L1,L2,L3} + + vext.8 d1, d0, d0, #1 + vaddl.u8 q2, d0, d1 //q2:{L3+L3,L3+L3,L3+L3,L3+L0,L0+L1,L1+L2,L2+L3,L3+L3} + + vext.u8 d2, d5, d4, #2 + vadd.u16 d3, d2, d5 //d3:{L0+L1+L1+L2,L1+L2+L2+L3,L2+L3+L3+L3,L3+L3+L3+L3} + + //Calculate the hu0 ~ hu5 + vqrshrn.u16 d2, q2, #1 + vqrshrn.u16 d1, q1, #2 + + //Adjust the data sequence for setting the luma MB + vzip.8 d2, d1 + vst1.32 d1[0], [r0], r1 + vext.8 d2, d1, d1, #2 + vst1.32 d2[0], [r0], r1 + vst1.32 d1[1], [r0], r1 + vst1.32 d0[0], [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredHD_neon + //stmdb sp!, { r2-r5, lr} + //Load the data + sub r2, r0, r1 + sub r2, #1 + vld1.32 {d0[1]}, [r2], r1 + vld1.8 {d0[3]}, [r2], r1 + vld1.8 {d0[2]}, [r2], r1 + vld1.8 {d0[1]}, [r2], r1 + vld1.8 {d0[0]}, [r2] //d0:{L3,L2,L1,L0,LT,T0,T1,T2} + + + vext.8 d1, d0, d0, #7 + vaddl.u8 q1, d0, d1 //q1:{x,L3+L2,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2} + + vext.u8 q2, q1, q1, #14 //q2:{x,x, L3+L2,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1} + vadd.u16 q3, q2, q1 //q3:{x,x,L3+L2+L2+L1,L2+L1+L1+L0,L1+L0+L0+LT,L0+LT+LT+T0,LT+T0+T0+T1,T0+T1+T1+T2} + + //Calculate the hd0~hd9 + vqrshrn.u16 d1, q3, #2 + vqrshrn.u16 d0, q2, #1 + + //Adjust the data sequence for setting the luma MB + vmov d3, d1 + vtrn.8 d0, d1 + vext.u8 d2, d1, d1, #6 + vst2.16 {d2[3], d3[3]}, [r0], r1 + vst2.16 {d0[2], d1[2]}, [r0], r1 + vmov d3, d0 + vst2.16 {d2[2], d3[2]}, [r0], r1 + vst2.16 {d0[1], d1[1]}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredV_neon + //stmdb sp!, { r2-r5, lr} + //Get the top row (8 byte) + sub r2, r0, r1 + vldr d0, [r2] + + //Set the chroma MB using top row data + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredH_neon + //stmdb sp!, { r2-r5, lr} + ////Get the left column (8 byte) + sub r2, r0, #1 + vld1.8 {d0[]}, [r2], r1 + vld1.8 {d1[]}, [r2], r1 + vld1.8 {d2[]}, [r2], r1 + vld1.8 {d3[]}, [r2], r1 + vld1.8 {d4[]}, [r2], r1 + vld1.8 {d5[]}, [r2], r1 + vld1.8 {d6[]}, [r2], r1 + vld1.8 {d7[]}, [r2] + + //Set the chroma MB using left column data + vst1.8 {d0}, [r0], r1 + vst1.8 {d1}, [r0], r1 + vst1.8 {d2}, [r0], r1 + vst1.8 {d3}, [r0], r1 + vst1.8 {d4}, [r0], r1 + vst1.8 {d5}, [r0], r1 + vst1.8 {d6}, [r0], r1 + vst1.8 {d7}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredDc_neon + //stmdb sp!, { r2-r5, lr} + //Load the left column data (8 bytes) + sub r2, r0, #1 + GET_8BYTE_DATA d0, r2, r1 + + //Load the top row data (8 bytes) + sub r2, r0, r1 + vldr d1, [r2] + + //Calculate the sum of left column and top row + vpaddl.u8 q0, q0 + vpaddl.u16 q0, q0 + vadd.u32 d2, d0, d1 //'m1' save to d2 + + vrshr.u32 q0, q0, #2 //calculate 'm2','m3' + vrshr.u32 d2, d2, #3 //calculate 'm4' + + //duplicate the 'mx' to a vector line + vdup.8 d4, d2[0] + vdup.8 d5, d1[4] + vdup.8 d6, d0[4] + vdup.8 d7, d2[4] + + //Set the chroma MB + vst2.32 {d4[0],d5[0]}, [r0], r1 + vst2.32 {d4[0],d5[0]}, [r0], r1 + vst2.32 {d4[0],d5[0]}, [r0], r1 + vst2.32 {d4[0],d5[0]}, [r0], r1 + vst2.32 {d6[0],d7[0]}, [r0], r1 + vst2.32 {d6[0],d7[0]}, [r0], r1 + vst2.32 {d6[0],d7[0]}, [r0], r1 + vst2.32 {d6[0],d7[0]}, [r0] + +WELS_ASM_FUNC_END + + +//Table {{1,2,3,4,1,2,3,4}*17} +CONST0_GET_I_CHROMA_PRED_PLANE: .long 0x44332211, 0x44332211//0x140f0a05, 0x28231e19 +//Table {-3,-2,-1,0,1,2,3,4} +CONST1_GET_I_CHROMA_PRED_PLANE: .long 0xfffefffd, 0x0000ffff,0x00020001,0x00040003 + +WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredPlane_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row data + sub r2, r0, #1 + sub r2, r1 + vld1.32 {d1[0]}, [r2] + add r2, #5 + vld1.32 {d0[0]}, [r2] + + //Load the left column data + sub r2, #5 + vld1.8 {d1[4]}, [r2], r1 + vld1.8 {d1[5]}, [r2], r1 + vld1.8 {d1[6]}, [r2], r1 + vld1.8 {d1[7]}, [r2], r1 //d1:{LT,T0,T1,T2,LT,L0,L1,L2} + add r2, r1 + vld1.8 {d0[4]}, [r2], r1 + vld1.8 {d0[5]}, [r2], r1 + vld1.8 {d0[6]}, [r2], r1 + vld1.8 {d0[7]}, [r2] //d0:{T4,T5,T6,T7,L4,L5,L6.L7} + + + //Save T7 to d3 for next step + vdup.u8 d3, d0[3] + //Save L7 to d4 for next step + vdup.u8 d4, d0[7] + + //Calculate the value of 'a' and save to q2 + vaddl.u8 q2, d3, d4 + vshl.u16 q2, #4 + + //Load the table {{1,2,3,4,1,2,3,4}*17} + adr r2, CONST0_GET_I_CHROMA_PRED_PLANE + vld1.32 {d2}, [r2] + + //Calculate the 'b','c', and save to q0 + vrev32.8 d1, d1 + vsubl.u8 q0, d0, d1 + vmovl.u8 q1, d2 + vmul.s16 q0, q1 + vpaddl.s16 q0, q0 + vpaddl.s32 q0, q0 + vrshr.s64 q0, #5 + + //Load the table {-3,-2,-1,0,1,2,3,4} to q3 + adr r2, CONST1_GET_I_CHROMA_PRED_PLANE + vld1.32 {d6, d7}, [r2] + + //Duplicate the 'b','c' to q0, q1 for SIMD instruction + vdup.s16 q1, d1[0] + vdup.s16 q0, d0[0] + + //Calculate the "(a + b * (j - 3) + c * (- 3) + 16) >> 5;" + vmla.s16 q2, q0, q3 + vmla.s16 q2, q1, d6[0] + vqrshrun.s16 d0, q2, #5 + + //Set a line of chroma MB + vst1.u32 {d0}, [r0], r1 + + //Do the same processing for each line. + mov r2, #7 +loop_0_get_i_chroma_pred_plane: + vadd.s16 q2, q1 + vqrshrun.s16 d0, q2, #5 + vst1.u32 {d0}, [r0], r1 + subs r2, #1 + bne loop_0_get_i_chroma_pred_plane + +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm64/block_add_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm64/block_add_aarch64_neon.S new file mode 100644 index 000000000..417961ee9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm64/block_add_aarch64_neon.S @@ -0,0 +1,131 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +.macro ROW_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 +// { // input: src_d[0]~[3], output: e_q[0]~[3]; working: \arg8\() \arg9\() + + saddl \arg4\().4s, \arg0\().4h, \arg2\().4h //int32 e[i][0] = src[0] + src[2]; + ssubl \arg5\().4s, \arg0\().4h, \arg2\().4h //int32 e[i][1] = src[0] - src[2]; + sshr \arg8\().4h, \arg1\().4h, #1 + sshr \arg9\().4h, \arg3\().4h, #1 + ssubl \arg6\().4s, \arg8\().4h, \arg3\().4h //int32 e[i][2] = (src[1]>>1)-src[3]; + saddl \arg7\().4s, \arg1\().4h, \arg9\().4h //int32 e[i][3] = src[1] + (src[3]>>1); +// } +.endm + +.macro TRANSFORM_4BYTES arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// both row & col transform used +// { // output: f_q[0]~[3], input: e_q[0]~[3]; + add \arg0\().4s, \arg4\().4s, \arg7\().4s //int16 f[i][0] = e[i][0] + e[i][3]; + add \arg1\().4s, \arg5\().4s, \arg6\().4s //int16 f[i][1] = e[i][1] + e[i][2]; + sub \arg2\().4s, \arg5\().4s, \arg6\().4s //int16 f[i][2] = e[i][1] - e[i][2]; + sub \arg3\().4s, \arg4\().4s, \arg7\().4s //int16 f[i][3] = e[i][0] - e[i][3]; +// } +.endm + +.macro COL_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_q[0]~[3], output: e_q[0]~[3]; + add \arg4\().4s, \arg0\().4s, \arg2\().4s //int32 e[0][j] = f[0][j] + f[2][j]; + sub \arg5\().4s, \arg0\().4s, \arg2\().4s //int32 e[1][j] = f[0][j] - f[2][j]; + sshr \arg6\().4s, \arg1\().4s, #1 + sshr \arg7\().4s, \arg3\().4s, #1 + sub \arg6\().4s, \arg6\().4s, \arg3\().4s //int32 e[2][j] = (f[1][j]>>1) - f[3][j]; + add \arg7\().4s, \arg1\().4s, \arg7\().4s //int32 e[3][j] = f[1][j] + (f[3][j]>>1); +// } +.endm + +// uint8_t *pred, const int32_t stride, int16_t *rs +WELS_ASM_AARCH64_FUNC_BEGIN IdctResAddPred_AArch64_neon + SIGN_EXTENSION x1,w1 + ld4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x2] // cost 3 cycles! + ROW_TRANSFORM_1_STEP v0, v1, v2, v3, v16, v17, v18, v19, v4, v5 + TRANSFORM_4BYTES v0, v1, v2, v3, v16, v17, v18, v19 + // transform element 32bits + trn1 v16.4s, v0.4s, v1.4s //[0 1 2 3]+[4 5 6 7]-->[0 4 2 6] + trn2 v17.4s, v0.4s, v1.4s //[0 1 2 3]+[4 5 6 7]-->[1 5 3 7] + trn1 v18.4s, v2.4s, v3.4s //[8 9 10 11]+[12 13 14 15]-->[8 12 10 14] + trn2 v19.4s, v2.4s, v3.4s //[8 9 10 11]+[12 13 14 15]-->[9 13 11 15] + trn1 v0.2d, v16.2d, v18.2d //[0 4 2 6]+[8 12 10 14]-->[0 4 8 12] + trn2 v2.2d, v16.2d, v18.2d //[0 4 2 6]+[8 12 10 14]-->[2 6 10 14] + trn1 v1.2d, v17.2d, v19.2d //[1 5 3 7]+[9 13 11 15]-->[1 5 9 13] + trn2 v3.2d, v17.2d, v19.2d //[1 5 3 7]+[9 13 11 15]-->[3 7 11 15] + COL_TRANSFORM_1_STEP v0, v1, v2, v3, v16, v17, v18, v19 + + TRANSFORM_4BYTES v0, v1, v2, v3, v16, v17, v18, v19 + //after clip_table[MAX_NEG_CROP] into [0, 255] + mov x2, x0 + ld1 {v16.s}[0],[x0],x1 + ld1 {v16.s}[1],[x0],x1 + ld1 {v17.s}[0],[x0],x1 + ld1 {v17.s}[1],[x0] + + rshrn v0.4h, v0.4s, #6 + rshrn2 v0.8h, v1.4s, #6 + rshrn v1.4h, v2.4s, #6 + rshrn2 v1.8h, v3.4s, #6 + + uxtl v2.8h,v16.8b + uxtl v3.8h,v17.8b + add v2.8h, v2.8h, v0.8h + add v3.8h, v3.8h, v1.8h + + sqxtun v0.8b,v2.8h + sqxtun v1.8b,v3.8h + + st1 {v0.s}[0],[x2],x1 + st1 {v0.s}[1],[x2],x1 + st1 {v1.s}[0],[x2],x1 + st1 {v1.s}[1],[x2] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero16x16_AArch64_neon + eor v0.16b, v0.16b, v0.16b + eor v1.16b, v1.16b, v1.16b + SIGN_EXTENSION x1,w1 + lsl x1, x1, 1 +.rept 16 + st1 {v0.16b, v1.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero8x8_AArch64_neon + eor v0.16b, v0.16b, v0.16b + SIGN_EXTENSION x1, w1 + lsl x1, x1, 1 +.rept 8 + st1 {v0.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm64/intra_pred_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm64/intra_pred_aarch64_neon.S new file mode 100644 index 000000000..f8f9e03c7 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/arm64/intra_pred_aarch64_neon.S @@ -0,0 +1,524 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +// for Luma 4x4 +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredH_AArch64_neon + sxtw x1, w1 + sub x2, x0, #1 +.rept 4 + ld1r {v0.8b}, [x2], x1 + st1 {v0.S}[0], [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDc_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + sub x3, x0, #1 + ldr s0, [x2] + ld1 {v0.b}[4], [x3], x1 + ld1 {v0.b}[5], [x3], x1 + ld1 {v0.b}[6], [x3], x1 + ld1 {v0.b}[7], [x3] + uaddlv h0, v0.8b + uqrshrn b0, h0, #3 + dup v0.8b, v0.b[0] +.rept 4 + st1 {v0.S}[0], [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDcTop_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + sub v0.8b, v0.8b, v0.8b + ldr s0, [x2] + uaddlv h0, v0.8b + uqrshrn v0.8b, v0.8h, #2 + dup v0.8b, v0.b[0] +.rept 4 + st1 {v0.S}[0], [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDDL_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.8b}, [x2] + dup v1.8b, v0.b[7] + ext v2.8b, v0.8b, v1.8b, #1 + ext v3.8b, v0.8b, v1.8b, #2 + ushll v2.8h, v2.8b, #1 + uaddl v1.8h, v3.8b, v0.8b + add v1.8h, v1.8h, v2.8h + uqrshrn v1.8b, v1.8h, #2 + st1 {v1.S}[0], [x0], x1 + ext v0.8b, v1.8b, v2.8b, #1 + st1 {v0.S}[0], [x0], x1 + ext v0.8b, v1.8b, v2.8b, #2 + st1 {v0.S}[0], [x0], x1 + ext v0.8b, v1.8b, v2.8b, #3 + st1 {v0.S}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDDLTop_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.8b}, [x2] + dup v1.8b, v0.b[3] + mov v0.S[1], v1.S[0] + ext v2.8b, v0.8b, v1.8b, #1 + ext v3.8b, v0.8b, v1.8b, #2 + ushll v2.8h, v2.8b, #1 + uaddl v1.8h, v3.8b, v0.8b + add v1.8h, v1.8h, v2.8h + uqrshrn v1.8b, v1.8h, #2 + st1 {v1.S}[0], [x0], x1 + ext v0.8b, v1.8b, v2.8b, #1 + st1 {v0.S}[0], [x0], x1 + ext v0.8b, v1.8b, v2.8b, #2 + st1 {v0.S}[0], [x0], x1 + ext v0.8b, v1.8b, v2.8b, #3 + st1 {v0.S}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredVL_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.8b}, [x2] + ext v1.8b, v0.8b, v0.8b, #1 + uaddl v1.8h, v1.8b, v0.8b + uqrshrn v0.8b, v1.8h, #1 // v0.8b is VL0, VL1, VL2, VL3, VL4, ... + ext v2.16b, v1.16b, v1.16b, #2 + add v1.8h, v2.8h, v1.8h + uqrshrn v1.8b, v1.8h, #2 // v1.8b is VL5, VL6, VL7, VL8, VL9 + st1 {v0.s}[0], [x0], x1 // write the first row + st1 {v1.s}[0], [x0], x1 // write the second row + ext v3.8b, v0.8b, v0.8b, #1 + ext v2.8b, v1.8b, v1.8b, #1 + st1 {v3.s}[0], [x0], x1 // write the third row + st1 {v2.s}[0], [x0] // write the fourth row +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredVLTop_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.8b}, [x2] + dup v1.8b, v0.b[3] + mov v0.s[1], v1.s[0] + ext v1.8b, v0.8b, v0.8b, #1 + uaddl v1.8h, v1.8b, v0.8b + uqrshrn v0.8b, v1.8h, #1 // v0.8b is VL0, VL1, VL2, VL3, VL4, ... + ext v2.16b, v1.16b, v1.16b, #2 + add v1.8h, v2.8h, v1.8h + uqrshrn v1.8b, v1.8h, #2 // v1.8b is VL5, VL6, VL7, VL8, VL9 + st1 {v0.s}[0], [x0], x1 // write the first row + st1 {v1.s}[0], [x0], x1 // write the second row + ext v3.8b, v0.8b, v0.8b, #1 + ext v2.8b, v1.8b, v1.8b, #1 + st1 {v3.s}[0], [x0], x1 // write the third row + st1 {v2.s}[0], [x0] // write the fourth row +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredVR_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.s}[1], [x2] + sub x2, x2, #1 + ld1 {v0.b}[3], [x2], x1 + ld1 {v0.b}[2], [x2], x1 + ld1 {v0.b}[1], [x2], x1 + ld1 {v0.b}[0], [x2] // v0.8b l2, l1, l0, lt, t0, t1, t2, t3 + + ext v1.8b, v0.8b, v0.8b, #7 + uaddl v2.8h, v1.8b, v0.8b //v2:{X,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2,T2+T3} + ext v1.16b, v2.16b, v2.16b, #14 + add v3.8h, v2.8h, v1.8h //v3:{X,L2+L1+L1+L0,L1+L0+L0+LT,...T1+T2+T2+T3} + + uqrshrn v3.8b, v3.8h, #2 + uqrshrn v2.8b, v2.8h, #1 + + st1 {v2.s}[1], [x0], x1 + st1 {v3.s}[1], [x0], x1 + + ext v2.8b, v2.8b, v2.8b, #7 + ins v2.b[4], v3.b[3] + st1 {v2.s}[1], [x0], x1 + + ext v3.8b, v3.8b, v3.8b, #7 + ins v3.b[4], v3.b[3] + st1 {v3.s}[1], [x0], x1 + +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredHU_AArch64_neon + sxtw x1, w1 + sub x2, x0, #1 + mov x3, #3 + mul x3, x3, x1 + add x3, x3, x2 + ld1r {v0.8b}, [x3] + ld1 {v0.b}[4], [x2], x1 + ld1 {v0.b}[5], [x2], x1 + ld1 {v0.b}[6], [x2], x1 //d0:{L3,L3,L3,L3,L0,L1,L2,L3} + + ext v1.8b, v0.8b, v0.8b, #1 + uaddl v2.8h, v0.8b, v1.8b //v2:{L3+L3,L3+L3,L3+L3,L3+L0,L0+L1,L1+L2,L2+L3,L3+L3} + ext v3.16b, v2.16b, v2.16b, #2 + add v3.8h, v3.8h, v2.8h //v2:{x, HU1, HU3, HU5, x} + + uqrshrn v2.8b, v2.8h, #1 // HU0, HU2, HU4 + uqrshrn v3.8b, v3.8h, #2 // HU1, HU3, HU5 + zip2 v3.8b, v2.8b, v3.8b // HU0, HU1, HU2, HU3, HU4, HU5 + mov v3.h[3], v0.h[0] // v0.8b is hu0, hu1, hu2, hu3, hu4, hu5, l3, l3 + ext v2.8b, v3.8b, v0.8b, #2 + st1 {v3.s}[0], [x0], x1 + st1 {v2.s}[0], [x0], x1 + st1 {v3.s}[1], [x0], x1 + st1 {v0.s}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredHD_AArch64_neon + sxtw x1, w1 + sub x2, x0, #1 + sub x2, x2, x1 // x2 points to top left + ld1 {v0.s}[1], [x2], x1 + ld1 {v0.b}[3], [x2], x1 + ld1 {v0.b}[2], [x2], x1 + ld1 {v0.b}[1], [x2], x1 + ld1 {v0.b}[0], [x2] // v0.8b: l3, l2, l1, l0, lt, t0, t1, t2 + ext v1.8b, v0.8b, v0.8b, #1 // v1.8b: l2, l1, l0, lt, t0, t1, t2, l3 + uaddl v2.8h, v0.8b, v1.8b + ext v3.16b, v2.16b, v2.16b, #2 + add v3.8h, v3.8h, v2.8h + uqrshrn v2.8b, v2.8h, #1 // hd8, hd6, hd4, hd0, xxx + uqrshrn v3.8b, v3.8h, #2 // hd9, hd7, hd5, hd1, hd2, hd3 + zip1 v2.8b, v2.8b, v3.8b // hd8, hd9, hd6, hd7, hd4, hd5, hd0, hd1 + mov v1.h[0], v3.h[2] + ext v3.8b, v2.8b, v1.8b, #6 + st1 {v3.s}[0], [x0], x1 + st1 {v2.s}[1], [x0], x1 + ext v3.8b, v2.8b, v1.8b, #2 + st1 {v3.s}[0], [x0], x1 + st1 {v2.s}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +// for Chroma 8x8 +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredV_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.8b}, [x2] +.rept 8 + st1 {v0.8b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredH_AArch64_neon + sxtw x1, w1 + sub x2, x0, #1 +.rept 8 + ld1r {v0.8b}, [x2], x1 + st1 {v0.8b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredDc_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + sub x3, x0, #1 + ld1 {v0.8b}, [x2] + ld1 {v0.b}[8], [x3], x1 + ld1 {v0.b}[9], [x3], x1 + ld1 {v0.b}[10], [x3], x1 + ld1 {v0.b}[11], [x3], x1 + ld1 {v0.b}[12], [x3], x1 + ld1 {v0.b}[13], [x3], x1 + ld1 {v0.b}[14], [x3], x1 + ld1 {v0.b}[15], [x3] + + uaddlp v1.8h, v0.16b + uaddlp v2.4s, v1.8h + ins v3.d[0], v2.d[1] + add v3.2s, v2.2s, v3.2s + urshr v2.4s, v2.4s, #2 + urshr v3.2s, v3.2s, #3 + + dup v0.8b, v3.b[0] + dup v1.8b, v2.b[4] + dup v2.8b, v2.b[12] + dup v3.8b, v3.b[4] + ins v0.s[1], v1.s[0] + ins v2.s[1], v3.s[0] +.rept 4 + st1 {v0.8b}, [x0], x1 +.endr +.rept 4 + st1 {v2.8b}, [x0], x1 +.endr + +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredDcTop_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.8b}, [x2] + uaddlp v0.4h, v0.8b + addp v0.8h, v0.8h, v0.8h + dup v1.8h, v0.h[0] + dup v2.8h, v0.h[1] + mov v1.D[1], v2.D[0] + uqrshrn v1.8b, v1.8h, #2 +.rept 8 + st1 {v1.8b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +.align 4 +intra_1_to_4: .short 17*1, 17*2, 17*3, 17*4, 17*1, 17*2, 17*3, 17*4 +intra_m3_to_p4: .short -3, -2, -1, 0, 1, 2, 3, 4 + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredPlane_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + sub x2, x2, #1 + mov x3, x2 + // load pTop[2-i] and pLeft[(2-i)*kiStride] + ld1 {v1.b}[3], [x2], #1 + ld1 {v1.b}[2], [x2], #1 + ld1 {v1.b}[1], [x2], #1 + ld1 {v1.b}[0], [x2], #1 + ld1 {v1.b}[7], [x3], x1 + ld1 {v1.b}[6], [x3], x1 + ld1 {v1.b}[5], [x3], x1 + ld1 {v1.b}[4], [x3], x1 + add x2, x2, #1 + add x3, x3, x1 + // load pTop[4+i] and pLeft[(4+i)*kiStride] + ld1 {v0.b}[0], [x2], #1 + ld1 {v0.b}[1], [x2], #1 + ld1 {v0.b}[2], [x2], #1 + ld1 {v0.b}[3], [x2], #1 + ld1 {v0.b}[4], [x3], x1 + ld1 {v0.b}[5], [x3], x1 + ld1 {v0.b}[6], [x3], x1 + ld1 {v0.b}[7], [x3], x1 + + uxtl v1.8h, v1.8b + uxtl v0.8h, v0.8b + ldr q2, intra_1_to_4 + ldr q3, intra_m3_to_p4 + dup v4.8h, v0.h[3] + dup v5.8h, v0.h[7] + add v4.8h, v4.8h, v5.8h + sub v0.8h, v0.8h, v1.8h + shl v4.8h, v4.8h, #4 // v4.8h is a + mul v0.8h, v0.8h, v2.8h // v0.h[0-3] is H, v0.h[4-7] is V + saddlp v0.4s, v0.8h + addp v0.4s, v0.4s, v0.4s // v0.s[0] is H, v0.s[1] is V + sqrshrn v0.4h, v0.4s, #5 + dup v1.8h, v0.h[0] // v1.8h is b + dup v0.8h, v0.h[1] // v0.8h is c + mla v4.8h, v1.8h, v3.8h + mla v4.8h, v0.8h, v3.h[0] + sqrshrun v1.8b, v4.8h, #5 + st1 {v1.8b}, [x0], x1 +.rept 7 + add v4.8h, v4.8h, v0.8h + sqrshrun v1.8b, v4.8h, #5 + st1 {v1.8b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +//for Luma 16x16 +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredV_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.16b}, [x2] +.rept 16 + st1 {v0.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredH_AArch64_neon + sxtw x1, w1 + sub x2, x0, #1 +.rept 16 + ld1r {v0.16b}, [x2], x1 + st1 {v0.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredDc_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + sub x3, x0, #1 + ld1 {v0.16b}, [x2] + ld1 {v1.b}[0], [x3], x1 + ld1 {v1.b}[1], [x3], x1 + ld1 {v1.b}[2], [x3], x1 + ld1 {v1.b}[3], [x3], x1 + ld1 {v1.b}[4], [x3], x1 + ld1 {v1.b}[5], [x3], x1 + ld1 {v1.b}[6], [x3], x1 + ld1 {v1.b}[7], [x3], x1 + ld1 {v1.b}[8], [x3], x1 + ld1 {v1.b}[9], [x3], x1 + ld1 {v1.b}[10], [x3], x1 + ld1 {v1.b}[11], [x3], x1 + ld1 {v1.b}[12], [x3], x1 + ld1 {v1.b}[13], [x3], x1 + ld1 {v1.b}[14], [x3], x1 + ld1 {v1.b}[15], [x3] + // reduce instruction + uaddlv h0, v0.16b + uaddlv h1, v1.16b + add v0.8h, v0.8h, v1.8h + uqrshrn b0, h0, #5 + dup v0.16b, v0.b[0] +.rept 16 + st1 {v0.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredDcTop_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + ld1 {v0.16b}, [x2] + // reduce instruction + uaddlv h0, v0.16b + uqrshrn v0.8b, v0.8h, 4 + dup v0.16b, v0.b[0] +.rept 16 + st1 {v0.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredDcLeft_AArch64_neon + sxtw x1, w1 + sub x3, x0, #1 + ld1 {v1.b}[0], [x3], x1 + ld1 {v1.b}[1], [x3], x1 + ld1 {v1.b}[2], [x3], x1 + ld1 {v1.b}[3], [x3], x1 + ld1 {v1.b}[4], [x3], x1 + ld1 {v1.b}[5], [x3], x1 + ld1 {v1.b}[6], [x3], x1 + ld1 {v1.b}[7], [x3], x1 + ld1 {v1.b}[8], [x3], x1 + ld1 {v1.b}[9], [x3], x1 + ld1 {v1.b}[10], [x3], x1 + ld1 {v1.b}[11], [x3], x1 + ld1 {v1.b}[12], [x3], x1 + ld1 {v1.b}[13], [x3], x1 + ld1 {v1.b}[14], [x3], x1 + ld1 {v1.b}[15], [x3] + // reduce instruction + uaddlv h1, v1.16b + uqrshrn v0.8b, v1.8h, #4 + dup v0.16b, v0.b[0] +.rept 16 + st1 {v0.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END + + +.align 4 +intra_1_to_8: .short 5, 10, 15, 20, 25, 30, 35, 40 +intra_m7_to_p8: .short -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8 + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredPlane_AArch64_neon + sxtw x1, w1 + sub x2, x0, x1 + sub x2, x2, #1 + mov x3, x2 + ld1 {v0.8b}, [x3] // v0 low 8 bit in top(reverse order) + add x3, x3, #9 + rev64 v0.8b, v0.8b // reverse v0 + ld1 {v1.8b}, [x3] // v1 high 8 bit in top + uxtl v0.8h, v0.8b // extend to 16 bit integer + uxtl v1.8h, v1.8b // extend to 16 bit integer + ld1 {v2.b}[7], [x2], x1 + ld1 {v2.b}[6], [x2], x1 + ld1 {v2.b}[5], [x2], x1 + ld1 {v2.b}[4], [x2], x1 + ld1 {v2.b}[3], [x2], x1 + ld1 {v2.b}[2], [x2], x1 + ld1 {v2.b}[1], [x2], x1 + ld1 {v2.b}[0], [x2], x1 // v2.8b low 8 bit in left + add x2, x2, x1 + ld1 {v3.b}[0], [x2], x1 + ld1 {v3.b}[1], [x2], x1 + ld1 {v3.b}[2], [x2], x1 + ld1 {v3.b}[3], [x2], x1 + ld1 {v3.b}[4], [x2], x1 + ld1 {v3.b}[5], [x2], x1 + ld1 {v3.b}[6], [x2], x1 + ld1 {v3.b}[7], [x2] // v3.8b high 8bit in left + uxtl v2.8h, v2.8b + uxtl v3.8h, v3.8b + sub v0.8h, v1.8h, v0.8h + sub v2.8h, v3.8h, v2.8h + ldr q4, intra_1_to_8 + mul v0.8h, v0.8h, v4.8h + mul v2.8h, v2.8h, v4.8h + saddlv s0, v0.8h + saddlv s2, v2.8h + add v1.8h, v1.8h, v3.8h + sqrshrn v0.4h, v0.4S, #6 // b is in v0.h[0] + sqrshrn v2.4h, v2.4S, #6 // c is in v2.h[0] + shl v1.8h, v1.8h, #4 // a is in v1.h[7] + ldr q4, intra_m7_to_p8 + ldr q5, intra_m7_to_p8 + 16 + dup v1.8h, v1.h[7] + dup v3.8h, v1.h[7] + mla v1.8h, v4.8h, v0.h[0] + mla v3.8h, v5.8h, v0.h[0] + dup v2.8h, v2.h[0] // v2.8h is [cccccccc] + mla v1.8h, v2.8h, v4.h[0] + mla v3.8h, v2.8h, v4.h[0] + sqrshrun v4.8b, v1.8h, #5 + sqrshrun2 v4.16b, v3.8h, #5 + st1 {v4.16b}, [x0], x1 +.rept 15 + add v1.8h, v1.8h, v2.8h + add v3.8h, v3.8h, v2.8h + sqrshrun v4.8b, v1.8h, #5 + sqrshrun2 v4.16b, v3.8h, #5 + st1 {v4.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/au_parser.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/au_parser.h new file mode 100644 index 000000000..8a233af90 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/au_parser.h @@ -0,0 +1,199 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file au_parser.h + * + * \brief Interfaces introduced in Access Unit level based parser + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_ACCESS_UNIT_PARSER_H__ +#define WELS_ACCESS_UNIT_PARSER_H__ +#include "typedefs.h" +#include "wels_common_basis.h" +#include "nal_prefix.h" +#include "dec_frame.h" +#include "bit_stream.h" +#include "parameter_sets.h" +#include "decoder_context.h" + +namespace WelsDec { + +/*! + ************************************************************************************* + * \brief Start Code Prefix (0x 00 00 00 01) detection + * + * \param pBuf bitstream payload buffer + * \param pOffset offset between NAL rbsp and original bitsteam that + * start code prefix is seperated from. + * \param iBufSize count size of buffer + * + * \return RBSP buffer of start code prefix exclusive + * + * \note N/A + ************************************************************************************* + */ +uint8_t* DetectStartCodePrefix (const uint8_t* kpBuf, int32_t* pOffset, int32_t iBufSize); + +/*! + ************************************************************************************* + * \brief to parse network abstraction layer unit, + * escape emulation_prevention_three_byte within it + former name is parse_nal + * + * \param pCtx decoder context + * \param pNalUnitHeader parsed result of NAL Unit Header to output + * \param pSrcRbsp bitstream buffer to input + * \param iSrcRbspLen length size of bitstream buffer payload + * \param pSrcNal + * \param iSrcNalLen + * \param pConsumedBytes consumed bytes during parsing + * + * \return decoded bytes payload, might be (pSrcRbsp+1) if no escapes + * + * \note N/A + ************************************************************************************* + */ +uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeader, uint8_t* pSrcRbsp, + int32_t iSrcRbspLen, uint8_t* pSrcNal, int32_t iSrcNalLen, int32_t* pConsumedBytes); + +int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t kiSrcLen, uint8_t* pSrcNal, + const int32_t kSrcNalLen); + +int32_t ParseRefBasePicMarking (PBitStringAux pBs, PRefBasePicMarking pRefBasePicMarking); + +int32_t ParsePrefixNalUnit (PWelsDecoderContext pCtx, PBitStringAux pBs); + +bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PNalUnit kpLastNal, + const PSps kpSps); +bool CheckAccessUnitBoundaryExt (PNalUnitHeaderExt pLastNalHdrExt, PNalUnitHeaderExt pCurNalHeaderExt, + PSliceHeader pLastSliceHeader, PSliceHeader pCurSliceHeader); +bool CheckNextAuNewSeq (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PSps kpSps); + +/*! + ************************************************************************************* + * \brief to parse Sequence Parameter Set (SPS) + * + * \param pCtx Decoder context + * \param pBsAux bitstream reader auxiliary + * \param pPicWidth picture width current Sps represented + * \param pPicHeight picture height current Sps represented + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case eNalUnitType is SPS. + ************************************************************************************* + */ +int32_t ParseSps (PWelsDecoderContext pCtx, PBitStringAux pBsAux, int32_t* pPicWidth, int32_t* pPicHeight, + uint8_t* pSrcNal, const int32_t kSrcNalLen); + +/*! + ************************************************************************************* + * \brief to parse Picture Parameter Set (PPS) + * + * \param pCtx Decoder context + * \param pPpsList pps list + * \param pBsAux bitstream reader auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case eNalUnitType is PPS. + ************************************************************************************* + */ +int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux, uint8_t* pSrcNal, + const int32_t kSrcNalLen); + +/*! +************************************************************************************* +* \brief to parse Video Usability Information (VUI) parameter of the SPS +* +* \param pCtx Decoder context +* \param pSps the sps which current Vui parameter belongs to +* \param pBsAux bitstream reader auxiliary +* +* \return 0 - successed +* 1 - failed +* +* \note Call it in case the flag "vui_parameters_present_flag" in sps is true. +************************************************************************************* +*/ +int32_t ParseVui (PWelsDecoderContext pCtx, PSps pSps, PBitStringAux pBsAux); + +/*! + ************************************************************************************* + * \brief to parse scaling list message payload + * + * \param PPS SPS scaling list matrix message to be parsed output + * \param pBsAux bitstream reader auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case scaling matrix present at sps or pps + ************************************************************************************* +*/ +int32_t SetScalingListValue (uint8_t* pScalingList, int iScalingListNum, bool* bUseDefaultScalingMatrixFlag, + PBitStringAux pBsAux); +int32_t ParseScalingList (PSps pSps, PBitStringAux pBs, bool bPPS, const bool kbTrans8x8ModeFlag, + bool* bScalingListPresentFlag, uint8_t (*iScalingList4x4)[16], uint8_t (*iScalingList8x8)[64]); +/*! + ************************************************************************************* + * \brief to parse SEI message payload + * + * \param pSei sei message to be parsed output + * \param pBsAux bitstream reader auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case eNalUnitType is NAL_UNIT_SEI. + ************************************************************************************* + */ +int32_t ParseSei (void* pSei, PBitStringAux pBsAux); // reserved Sei_Msg type + +/*! + ************************************************************************************* + * \brief reset fmo list due to got Sps now + * + * \param pCtx decoder context + * + * \return count number of fmo context units are reset + ************************************************************************************* + */ +int32_t ResetFmoList (PWelsDecoderContext pCtx); + +} // namespace WelsDec + +#endif//WELS_ACCESS_UNIT_PARSER_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/bit_stream.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/bit_stream.h new file mode 100644 index 000000000..8490a645b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/bit_stream.h @@ -0,0 +1,62 @@ +/*! + * \copy + * Copyright (c) 2004-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//bit_stream.h - bit-stream reading and / writing auxiliary data +#ifndef WELS_BIT_STREAM_H__ +#define WELS_BIT_STREAM_H__ + +#include "typedefs.h" +#include "wels_common_defs.h" +#include "golomb_common.h" + +using namespace WelsCommon; + +namespace WelsDec { + +/*! + * \brief input bits for decoder or initialize bitstream writing in encoder + * + * \param pBitString Bit string auxiliary pointer + * \param kpBuf bit-stream buffer + * \param kiSize size in bits for decoder; size in bytes for encoder + * + * \return size of buffer data in byte; failed in -1 return + */ +int32_t DecInitBits (PBitStringAux pBitString, const uint8_t* kpBuf, const int32_t kiSize); + +int32_t InitReadBits (PBitStringAux pBitString, intX_t iEndOffset); + +void RBSP2EBSP (uint8_t* pDstBuf, uint8_t* pSrcBuf, const int32_t kiSize); + +} // namespace WelsDec + +#endif//WELS_BIT_STREAM_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/cabac_decoder.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/cabac_decoder.h new file mode 100644 index 000000000..79bb50073 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/cabac_decoder.h @@ -0,0 +1,111 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file cabac_decoder.h + * + * \brief Interfaces introduced for cabac decoder + * + * \date 10/10/2014 Created + * + ************************************************************************************* + */ +#ifndef WELS_CABAC_DECODER_H__ +#define WELS_CABAC_DECODER_H__ + +#include "decoder_context.h" +#include "error_code.h" +#include "wels_common_defs.h" +namespace WelsDec { +static const uint8_t g_kRenormTable256[256] = { + 6, 6, 6, 6, 6, 6, 6, 6, + 5, 5, 5, 5, 5, 5, 5, 5, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 +}; + + +//1. CABAC context initialization +void WelsCabacGlobalInit (PWelsDecoderContext pCabacCtx); +void WelsCabacContextInit (PWelsDecoderContext pCtx, uint8_t eSliceType, int32_t iCabacInitIdc, int32_t iQp); + +//2. decoding Engine initialization +int32_t InitCabacDecEngineFromBS (PWelsCabacDecEngine pDecEngine, SBitStringAux* pBsAux); +void RestoreCabacDecEngineToBS (PWelsCabacDecEngine pDecEngine, SBitStringAux* pBsAux); +//3. actual decoding +int32_t Read32BitsCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiValue, int32_t& iNumBitsRead); +int32_t DecodeBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBit); +int32_t DecodeBypassCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal); +int32_t DecodeTerminateCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal); + +//4. unary parsing +int32_t DecodeUnaryBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, int32_t iCtxOffset, + uint32_t& uiSymVal); + +//5. EXGk parsing +int32_t DecodeExpBypassCabac (PWelsCabacDecEngine pDecEngine, int32_t iCount, uint32_t& uiSymVal); +uint32_t DecodeUEGLevelCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBinVal); +int32_t DecodeUEGMvCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t iMaxC, uint32_t& uiCode); + +#define WELS_CABAC_HALF 0x01FE +#define WELS_CABAC_QUARTER 0x0100 +#define WELS_CABAC_FALSE_RETURN(iErrorInfo) \ +if(iErrorInfo) { \ + return iErrorInfo; \ +} +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/deblocking.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/deblocking.h new file mode 100644 index 000000000..e84114a83 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/deblocking.h @@ -0,0 +1,118 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file deblocking.h + * + * \brief Interfaces introduced in frame deblocking filtering + * + * \date 05/14/2009 Created + * + ************************************************************************************* + */ + +#ifndef WELS_DEBLOCKING_H__ +#define WELS_DEBLOCKING_H__ + +#include "decoder_context.h" +#include "deblocking_common.h" +namespace WelsDec { + +/*! + * \brief deblocking module initialize + * + * \param pf + * cpu + * + * \return NONE + */ + +void DeblockingInit (PDeblockingFunc pDeblockingFunc, int32_t iCpu); + + +/*! + * \brief deblocking filtering target slice + * + * \param dec Wels decoder context + * + * \return NONE + */ +void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb); + +/*! +* \brief AVC slice init deblocking filtering target layer +* +* \in and out param SDeblockingFilter +* \in and out param iFilterIdc +* +* \return NONE +*/ +void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc); + +/*! +* \brief AVC MB deblocking filtering target layer +* +* \param DqLayer which has the current location of MB to be deblocked. +* +* \return NONE +*/ +void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc, + PDeblockingFilterMbFunc pDeblockMb); + +/*! + * \brief pixel deblocking filtering + * + * \param filter deblocking filter + * \param pix pixel value + * \param stride frame stride + * \param bs boundary strength + * + * \return NONE + */ + +uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, + int32_t iNeighMb, int32_t iMbXy); +uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy); + +int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc); + +void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag); + +inline int8_t* GetPNzc (PDqLayer pCurDqLayer, int32_t iMbXy) { + if (pCurDqLayer->pDec != NULL && pCurDqLayer->pDec->pNzc != NULL) { + return pCurDqLayer->pDec->pNzc[iMbXy]; + } + return pCurDqLayer->pNzc[iMbXy]; +} + +} // namespace WelsDec + +#endif //WELS_DEBLOCKING_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/dec_frame.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/dec_frame.h new file mode 100644 index 000000000..eb7337266 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/dec_frame.h @@ -0,0 +1,156 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//dec_frame.h +#ifndef WELS_DEC_FRAME_H__ +#define WELS_DEC_FRAME_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "wels_common_basis.h" +#include "parameter_sets.h" +#include "nal_prefix.h" +#include "slice.h" +#include "picture.h" +#include "bit_stream.h" +#include "fmo.h" + +namespace WelsDec { + +///////////////////////////////////DQ Layer level/////////////////////////////////// +typedef struct TagDqLayer SDqLayer; +typedef SDqLayer* PDqLayer; +typedef struct TagLayerInfo { + SNalUnitHeaderExt sNalHeaderExt; + SSlice sSliceInLayer; // Here Slice identify to Frame on concept + PSubsetSps pSubsetSps; // current pSubsetSps used, memory alloc in external + PSps pSps; // current sps based avc used, memory alloc in external + PPps pPps; // current pps used +} SLayerInfo, *PLayerInfo; +/* Layer Representation */ + +struct TagDqLayer { + SLayerInfo sLayerInfo; + + PBitStringAux pBitStringAux; // pointer to SBitStringAux + PFmo pFmo; // Current fmo context pointer used + uint32_t* pMbType; + int32_t* pSliceIdc; // using int32_t for slice_idc + int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; + int16_t (*pMvd[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; + int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; + int8_t (*pDirect)[MB_BLOCK4x4_NUM]; + bool* pNoSubMbPartSizeLessThan8x8Flag; + bool* pTransformSize8x8Flag; + int8_t* pLumaQp; + int8_t (*pChromaQp)[2]; + int8_t* pCbp; + uint16_t *pCbfDc; + int8_t (*pNzc)[24]; + int8_t (*pNzcRs)[24]; + int8_t* pResidualPredFlag; + int8_t* pInterPredictionDoneFlag; + bool* pMbCorrectlyDecodedFlag; + bool* pMbRefConcealedFlag; + int16_t (*pScaledTCoeff)[MB_COEFF_LIST_SIZE]; + int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16 + int8_t (*pIntra4x4FinalMode)[MB_BLOCK4x4_NUM]; + uint8_t *pIntraNxNAvailFlag; + int8_t* pChromaPredMode; + //uint8_t (*motion_pred_flag[LIST_A])[MB_PARTITION_SIZE]; // 8x8 + uint32_t (*pSubMbType)[MB_SUB_PARTITION_SIZE]; + int32_t iLumaStride; + int32_t iChromaStride; + uint8_t* pPred[3]; + int32_t iMbX; + int32_t iMbY; + int32_t iMbXyIndex; + int32_t iMbWidth; // MB width of this picture, equal to sSps.iMbWidth + int32_t iMbHeight; // MB height of this picture, equal to sSps.iMbHeight; + + /* Common syntax elements across all slices of a DQLayer */ + int32_t iSliceIdcBackup; + uint32_t uiSpsId; + uint32_t uiPpsId; + uint32_t uiDisableInterLayerDeblockingFilterIdc; + int32_t iInterLayerSliceAlphaC0Offset; + int32_t iInterLayerSliceBetaOffset; + //SPosOffset sScaledRefLayer; + int32_t iSliceGroupChangeCycle; + + PRefPicListReorderSyn pRefPicListReordering; + PPredWeightTabSyn pPredWeightTable; + PRefPicMarking pRefPicMarking; // Decoded reference picture marking syntaxs + PRefBasePicMarking pRefPicBaseMarking; + + PPicture pRef; // reference picture pointer + PPicture pDec; // reconstruction picture pointer for layer + + int16_t iColocMv[2][16][2]; //Colocated MV cache + int8_t iColocRefIndex[2][16]; //Colocated RefIndex cache + int8_t iColocIntra[16]; //Colocated Intra cache + + bool bUseWeightPredictionFlag; + bool bUseWeightedBiPredIdc; + bool bStoreRefBasePicFlag; // iCurTid == 0 && iCurQid = 0 && bEncodeKeyPic = 1 + bool bTCoeffLevelPredFlag; + bool bConstrainedIntraResamplingFlag; + uint8_t uiRefLayerDqId; + uint8_t uiRefLayerChromaPhaseXPlus1Flag; + uint8_t uiRefLayerChromaPhaseYPlus1; + uint8_t uiLayerDqId; // dq_id of current layer + bool bUseRefBasePicFlag; // whether reference pic or reference base pic is referred? +}; + +typedef struct TagGpuAvcLayer { + SLayerInfo sLayerInfo; + PBitStringAux pBitStringAux; // pointer to SBitStringAux + + uint32_t* pMbType; + int32_t* pSliceIdc; // using int32_t for slice_idc + int8_t* pLumaQp; + int8_t* pCbp; + int8_t (*pNzc)[24]; + int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16 + int32_t iMbX; + int32_t iMbY; + int32_t iMbXyIndex; + int32_t iMbWidth; // MB width of this picture, equal to sSps.iMbWidth + int32_t iMbHeight; // MB height of this picture, equal to sSps.iMbHeight; + +} SGpuAvcDqLayer, *PGpuAvcDqLayer; + +/////////////////////////////////////////////////////////////////////// + +} // namespace WelsDec + +#endif//WELS_DEC_FRAME_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/dec_golomb.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/dec_golomb.h new file mode 100644 index 000000000..fef8df460 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/dec_golomb.h @@ -0,0 +1,344 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file golomb.h + * + * \brief Exponential Golomb entropy coding/decoding routine + * + * \date 03/13/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__ +#define WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__ + +#include "typedefs.h" +#include "bit_stream.h" +#include "macros.h" +//#include +#include "ls_defines.h" +#include "error_code.h" + +namespace WelsDec { + +#define WELS_READ_VERIFY(uiRet) do{ \ + uint32_t uiRetTmp = (uint32_t)uiRet; \ + if( uiRetTmp != ERR_NONE ) \ + return uiRetTmp; \ +}while(0) +#define GET_WORD(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes) { \ + if (iReadBytes > iAllowedBytes+1) { \ + return ERR_INFO_READ_OVERFLOW; \ + } \ + iCurBits |= ((uint32_t)((pBufPtr[0] << 8) | pBufPtr[1])) << (iLeftBits); \ + iLeftBits -= 16; \ + pBufPtr +=2; \ +} +#define NEED_BITS(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes) { \ + if (iLeftBits > 0) { \ + GET_WORD(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes); \ + } \ +} +#define UBITS(iCurBits, iNumBits) (iCurBits>>(32-(iNumBits))) +#define DUMP_BITS(iCurBits, pBufPtr, iLeftBits, iNumBits, iAllowedBytes, iReadBytes) { \ + iCurBits <<= (iNumBits); \ + iLeftBits += (iNumBits); \ + NEED_BITS(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes); \ +} + +static inline int32_t BsGetBits (PBitStringAux pBs, int32_t iNumBits, uint32_t* pCode) { + intX_t iRc = UBITS (pBs->uiCurBits, iNumBits); + intX_t iAllowedBytes = pBs->pEndBuf - pBs->pStartBuf; //actual stream bytes + intX_t iReadBytes = pBs->pCurBuf - pBs->pStartBuf; + DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iNumBits, iAllowedBytes, iReadBytes); + *pCode = (uint32_t)iRc; + return ERR_NONE; +} + +/* + * Exponential Golomb codes decoding routines + */ + +// for data sharing cross modules and try to reduce size of binary generated, 12/10/2009 +extern const uint8_t g_kuiIntra4x4CbpTable[48]; +extern const uint8_t g_kuiIntra4x4CbpTable400[16]; +extern const uint8_t g_kuiInterCbpTable[48]; +extern const uint8_t g_kuiInterCbpTable400[16]; + +extern const uint8_t g_kuiLeadingZeroTable[256]; + +static const uint32_t g_kuiPrefix8BitsTable[16] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 +}; + + +static inline uint32_t GetPrefixBits (uint32_t uiValue) { + uint32_t iNumBit = 0; + + if (uiValue & 0xffff0000) { + uiValue >>= 16; + iNumBit += 16; + } + if (uiValue & 0xff00) { + uiValue >>= 8; + iNumBit += 8; + } + + if (uiValue & 0xf0) { + uiValue >>= 4; + iNumBit += 4; + } + iNumBit += g_kuiPrefix8BitsTable[uiValue]; + + return (32 - iNumBit); +} + +/* + * Read one bit from bit stream followed + */ +static inline uint32_t BsGetOneBit (PBitStringAux pBs, uint32_t* pCode) { + return (BsGetBits (pBs, 1, pCode)); +} + +static inline int32_t GetLeadingZeroBits (uint32_t iCurBits) { //<=32 bits + uint32_t uiValue; + + uiValue = UBITS (iCurBits, 8); //ShowBits( bs, 8 ); + if (uiValue) { + return g_kuiLeadingZeroTable[uiValue]; + } + + uiValue = UBITS (iCurBits, 16); //ShowBits( bs, 16 ); + if (uiValue) { + return (g_kuiLeadingZeroTable[uiValue] + 8); + } + + uiValue = UBITS (iCurBits, 24); //ShowBits( bs, 24 ); + if (uiValue) { + return (g_kuiLeadingZeroTable[uiValue] + 16); + } + + uiValue = iCurBits; //ShowBits( bs, 32 ); + if (uiValue) { + return (g_kuiLeadingZeroTable[uiValue] + 24); + } +//ASSERT(false); // should not go here + return -1; +} + +static inline uint32_t BsGetUe (PBitStringAux pBs, uint32_t* pCode) { + uint32_t iValue = 0; + int32_t iLeadingZeroBits = GetLeadingZeroBits (pBs->uiCurBits); + intX_t iAllowedBytes, iReadBytes; + iAllowedBytes = pBs->pEndBuf - pBs->pStartBuf; //actual stream bytes + + if (iLeadingZeroBits == -1) { //bistream error + return ERR_INFO_READ_LEADING_ZERO;//-1 + } else if (iLeadingZeroBits > + 16) { //rarely into this condition (even may be bitstream error), prevent from 16-bit reading overflow + //using two-step reading instead of one time reading of >16 bits. + iReadBytes = pBs->pCurBuf - pBs->pStartBuf; + DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, 16, iAllowedBytes, iReadBytes); + iReadBytes = pBs->pCurBuf - pBs->pStartBuf; + DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iLeadingZeroBits + 1 - 16, iAllowedBytes, iReadBytes); + } else { + iReadBytes = pBs->pCurBuf - pBs->pStartBuf; + DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iLeadingZeroBits + 1, iAllowedBytes, iReadBytes); + } + if (iLeadingZeroBits) { + iValue = UBITS (pBs->uiCurBits, iLeadingZeroBits); + iReadBytes = pBs->pCurBuf - pBs->pStartBuf; + DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iLeadingZeroBits, iAllowedBytes, iReadBytes); + } + + *pCode = ((1u << iLeadingZeroBits) - 1 + iValue); + return ERR_NONE; +} + + +/* + * Read signed exp golomb codes + */ +static inline int32_t BsGetSe (PBitStringAux pBs, int32_t* pCode) { + uint32_t uiCodeNum; + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCodeNum)); + + if (uiCodeNum & 0x01) { + *pCode = (int32_t) ((uiCodeNum + 1) >> 1); + } else { + *pCode = NEG_NUM ((int32_t) (uiCodeNum >> 1)); + } + return ERR_NONE; +} + +/* + * Get unsigned truncated exp golomb code. + */ +static inline int32_t BsGetTe0 (PBitStringAux pBs, int32_t iRange, uint32_t* pCode) { + if (iRange == 1) { + *pCode = 0; + } else if (iRange == 2) { + WELS_READ_VERIFY (BsGetOneBit (pBs, pCode)); + *pCode ^= 1; + } else { + WELS_READ_VERIFY (BsGetUe (pBs, pCode)); + } + return ERR_NONE; +} + +/* + * Get number of trailing bits + */ +static inline int32_t BsGetTrailingBits (uint8_t* pBuf) { +// TODO + uint32_t uiValue = *pBuf; + int32_t iRetNum = 0; + + do { + if (uiValue & 1) + return iRetNum; + uiValue >>= 1; + ++ iRetNum; + } while (iRetNum < 9); + + return 0; +} + +/* + * Check whether there is more rbsp data for processing + */ +static inline bool CheckMoreRBSPData (PBitStringAux pBsAux) { + if ((pBsAux->iBits - ((pBsAux->pCurBuf - pBsAux->pStartBuf - 2) << 3) - pBsAux->iLeftBits) > 1) { + return true; + } else { + return false; + } +} + +//define macros to check syntax elements +#define WELS_CHECK_SE_BOTH_ERROR(val, lower_bound, upper_bound, syntax_name, ret_code) do {\ +if ((val < lower_bound) || (val > upper_bound)) {\ + WelsLog(&(pCtx->sLogCtx), WELS_LOG_ERROR, "invalid syntax " syntax_name " %d", val);\ + return ret_code;\ +}\ +}while(0) + +#define WELS_CHECK_SE_LOWER_ERROR(val, lower_bound, syntax_name, ret_code) do {\ +if (val < lower_bound) {\ + WelsLog(&(pCtx->sLogCtx), WELS_LOG_ERROR, "invalid syntax " syntax_name " %d", val);\ + return ret_code;\ +}\ +}while(0) + +#define WELS_CHECK_SE_UPPER_ERROR(val, upper_bound, syntax_name, ret_code) do {\ +if (val > upper_bound) {\ + WelsLog(&(pCtx->sLogCtx), WELS_LOG_ERROR, "invalid syntax " syntax_name " %d", val);\ + return ret_code;\ +}\ +}while(0) + +#define WELS_CHECK_SE_BOTH_ERROR_NOLOG(val, lower_bound, upper_bound, syntax_name, ret_code) do {\ +if ((val < lower_bound) || (val > upper_bound)) {\ + return ret_code;\ +}\ +}while(0) + +#define WELS_CHECK_SE_LOWER_ERROR_NOLOG(val, lower_bound, syntax_name, ret_code) do {\ +if (val < lower_bound) {\ + return ret_code;\ +}\ +}while(0) + +#define WELS_CHECK_SE_UPPER_ERROR_NOLOG(val, upper_bound, syntax_name, ret_code) do {\ +if (val > upper_bound) {\ + return ret_code;\ +}\ +}while(0) + + +#define WELS_CHECK_SE_BOTH_WARNING(val, lower_bound, upper_bound, syntax_name) do {\ +if ((val < lower_bound) || (val > upper_bound)) {\ + WelsLog(&(pCtx->sLogCtx), WELS_LOG_WARNING, "invalid syntax " syntax_name " %d", val);\ +}\ +}while(0) + +#define WELS_CHECK_SE_LOWER_WARNING(val, lower_bound, syntax_name) do {\ +if (val < lower_bound) {\ + WelsLog(&(pCtx->sLogCtx), WELS_LOG_WARNING, "invalid syntax " syntax_name " %d", val);\ +}\ +}while(0) + +#define WELS_CHECK_SE_UPPER_WARNING(val, upper_bound, syntax_name) do {\ +if (val > upper_bound) {\ + WelsLog(&(pCtx->sLogCtx), WELS_LOG_WARNING, "invalid syntax " syntax_name " %d", val);\ +}\ +}while(0) +// below define syntax element offset +// for bit_depth_luma_minus8 and bit_depth_chroma_minus8 +#define BIT_DEPTH_LUMA_OFFSET 8 +#define BIT_DEPTH_CHROMA_OFFSET 8 +// for log2_max_frame_num_minus4 +#define LOG2_MAX_FRAME_NUM_OFFSET 4 +// for log2_max_pic_order_cnt_lsb_minus4 +#define LOG2_MAX_PIC_ORDER_CNT_LSB_OFFSET 4 +// for pic_width_in_mbs_minus1 +#define PIC_WIDTH_IN_MBS_OFFSET 1 +// for pic_height_in_map_units_minus1 +#define PIC_HEIGHT_IN_MAP_UNITS_OFFSET 1 +// for bit_depth_aux_minus8 +#define BIT_DEPTH_AUX_OFFSET 8 +// for num_slice_groups_minus1 +#define NUM_SLICE_GROUPS_OFFSET 1 +// for run_length_minus1 +#define RUN_LENGTH_OFFSET 1 +// for slice_group_change_rate_minus1 +#define SLICE_GROUP_CHANGE_RATE_OFFSET 1 +// for pic_size_in_map_units_minus1 +#define PIC_SIZE_IN_MAP_UNITS_OFFSET 1 +// for num_ref_idx_l0_default_active_minus1 and num_ref_idx_l1_default_active_minus1 +#define NUM_REF_IDX_L0_DEFAULT_ACTIVE_OFFSET 1 +#define NUM_REF_IDX_L1_DEFAULT_ACTIVE_OFFSET 1 +// for pic_init_qp_minus26 and pic_init_qs_minus26 +#define PIC_INIT_QP_OFFSET 26 +#define PIC_INIT_QS_OFFSET 26 +// for num_ref_idx_l0_active_minus1 and num_ref_idx_l1_active_minus1 +#define NUM_REF_IDX_L0_ACTIVE_OFFSET 1 +#define NUM_REF_IDX_L1_ACTIVE_OFFSET 1 + +// From Level 5.2 +#define MAX_MB_SIZE 36864 +// for aspect_ratio_idc +#define EXTENDED_SAR 255 + +} // namespace WelsDec + +#endif//WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decode_mb_aux.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decode_mb_aux.h new file mode 100644 index 000000000..ee8858794 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decode_mb_aux.h @@ -0,0 +1,78 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_DECODE_MB_AUX_H__ +#define WELS_DECODE_MB_AUX_H__ + +#include "typedefs.h" +#include "macros.h" + +namespace WelsDec { + +void IdctResAddPred_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs); +void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +void IdctResAddPred_mmx (uint8_t* pPred, const int32_t kiStride, int16_t* pRs); +void IdctResAddPred_sse2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs); +#if defined(HAVE_AVX2) +void IdctResAddPred_avx2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs); +void IdctFourResAddPred_avx2 (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc); +#endif +#endif//X86_ASM + +#if defined(HAVE_NEON) +void IdctResAddPred_neon (uint8_t* pred, const int32_t stride, int16_t* rs); +#endif + +#if defined(HAVE_NEON_AARCH64) +void IdctResAddPred_AArch64_neon (uint8_t* pred, const int32_t stride, int16_t* rs); +#endif + + +#if defined(HAVE_MMI) +void IdctResAddPred_mmi (uint8_t* pPred, const int32_t kiStride, int16_t* pRs); +#endif//HAVE_MMI + +#if defined(__cplusplus) +} +#endif//__cplusplus + +void GetI4LumaIChromaAddrTable (int32_t* pBlockOffset, const int32_t kiYStride, const int32_t kiUVStride); + +} // namespace WelsDec + +#endif//WELS_DECODE_MB_AUX_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h new file mode 100644 index 000000000..f7197abbf --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h @@ -0,0 +1,104 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_DECODE_SLICE_H__ +#define WELS_DECODE_SLICE_H__ + +#include "decoder_context.h" + +namespace WelsDec { + +int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx); +int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); + +int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx); +int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); + +int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx); +int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); + +typedef int32_t (*PWelsDecMbFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); + +int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); +int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); +int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); +int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag); +int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag); +int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag); + +int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx); //construction based on slice + +int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur); +int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx); + +int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx); + +int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput); +int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, + uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC); +int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer); +void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx); +int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer); +void WelsChromaDcIdct (int16_t* pBlock); +bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx); + +#ifdef __cplusplus +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +void WelsBlockZero16x16_sse2 (int16_t* block, int32_t stride); +void WelsBlockZero8x8_sse2 (int16_t* block, int32_t stride); +#endif + +#if defined(HAVE_NEON) +void WelsBlockZero16x16_neon (int16_t* block, int32_t stride); +void WelsBlockZero8x8_neon (int16_t* block, int32_t stride); +#endif + +#if defined(HAVE_NEON_AARCH64) +void WelsBlockZero16x16_AArch64_neon (int16_t* block, int32_t stride); +void WelsBlockZero8x8_AArch64_neon (int16_t* block, int32_t stride); +#endif +#ifdef __cplusplus +} +#endif//__cplusplus + +void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu); +void WelsBlockZero16x16_c (int16_t* block, int32_t stride); +void WelsBlockZero8x8_c (int16_t* block, int32_t stride); + +} // namespace WelsDec + +#endif //WELS_DECODE_SLICE_H__ + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder.h new file mode 100644 index 000000000..0c84739d5 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder.h @@ -0,0 +1,186 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file decoder.h + * + * \brief Interfaces introduced in decoder system architecture + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_DECODER_SYSTEM_ARCHITECTURE_H__ +#define WELS_DECODER_SYSTEM_ARCHITECTURE_H__ + +#include "typedefs.h" +#include "decoder_context.h" + +namespace WelsDec { + +#ifdef __cplusplus +extern "C" { +#endif//__cplusplus + +/*! + * \brief configure decoder parameters + */ +int32_t DecoderConfigParam (PWelsDecoderContext pCtx, const SDecodingParam* kpParam); + +/*! + * \brief fill in default values of decoder context + */ +void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx); + +/* +* fill last decoded picture info +*/ +void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo); + +/*! +* \brief fill data fields in SPS and PPS default for decoder context +*/ +void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx); + +/*! +* \brief copy SpsPps from one Ctx to another ctx for threaded code +*/ +void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx); + +/*! + ************************************************************************************* + * \brief Initialize Wels decoder parameters and memory + * + * \param pCtx input context to be initialized at first stage + * \param pTraceHandle handle for trace + * \param pLo log info pointer + * + * \return 0 - successed + * \return 1 - failed + * + * \note N/A + ************************************************************************************* + */ +int32_t WelsInitDecoder (PWelsDecoderContext pCtx, SLogContext* pLogCtx); + +/*! + ************************************************************************************* + * \brief Uninitialize Wels decoder parameters and memory + * + * \param pCtx input context to be uninitialized at release stage + * + * \return NONE + * + * \note N/A + ************************************************************************************* + */ +void WelsEndDecoder (PWelsDecoderContext pCtx); + +/*! + ************************************************************************************* + * \brief First entrance to decoding core interface. + * + * \param pCtx decoder context + * \param pBufBs bit streaming buffer + * \param kBsLen size in bytes length of bit streaming buffer input + * \param ppDst picture payload data to be output + * \param pDstBufInfo buf information of ouput data + * + * \return 0 - successed + * \return 1 - failed + * + * \note N/A + ************************************************************************************* + */ + +int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const int32_t kiBsLen, + uint8_t** ppDst, SBufferInfo* pDstBufInfo, SParserBsInfo* pDstBsInfo); + +/* + * request memory blocks for decoder avc part + */ +int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight, + bool& bReallocFlag); + + +/* + * free memory dynamically allocated during decoder + */ +void WelsFreeDynamicMemory (PWelsDecoderContext pCtx); + +/*! + * \brief make sure synchonozization picture resolution (get from slice header) among different parts (i.e, memory related and so on) + * over decoder internal + * ( MB coordinate and parts of data within decoder context structure ) + * \param pCtx Wels decoder context + * \param iMbWidth MB width + * \pram iMbHeight MB height + * \return 0 - successful; none 0 - something wrong + */ +int32_t SyncPictureResolutionExt (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight); + +/*! + * \brief init decoder predictive function pointers including ASM functions during MB reconstruction + * \param pCtx Wels decoder context + * \param uiCpuFlag cpu assembly indication + */ +void InitPredFunc (PWelsDecoderContext pCtx, uint32_t uiCpuFlag); + +/*! + * \brief init decoder internal function pointers including ASM functions + * \param pCtx Wels decoder context + * \param uiCpuFlag cpu assembly indication + */ +void InitDecFuncs (PWelsDecoderContext pCtx, uint32_t uiCpuFlag); + +void GetVclNalTemporalId (PWelsDecoderContext pCtx); //get the info that whether or not have VCL NAL in current AU, +//and if YES, get the temporal ID + +//reset decoder number related statistics info +void ResetDecStatNums (SDecoderStatistics* pDecStat); +//update information when freezing occurs, including IDR/non-IDR number +void UpdateDecStatFreezingInfo (const bool kbIdrFlag, SDecoderStatistics* pDecStat); +//update information when no freezing occurs, including QP, correct IDR number, ECed IDR number +void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx); +//update decoder statistics information +void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput); +//Destroy picutre buffer +void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa); +//reset picture reodering buffer list +void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo, + const bool& bFullReset); + +#ifdef __cplusplus +} +#endif//__cplusplus + +} // namespace WelsDec + +#endif//WELS_DECODER_SYSTEM_ARCHITECTURE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h new file mode 100644 index 000000000..7e8e9d798 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h @@ -0,0 +1,587 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file decoder_context.h + * + * \brief mainly interface introduced in Wels decoder side + * + * \date 3/4/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_DECODER_FRAMEWORK_H__ +#define WELS_DECODER_FRAMEWORK_H__ +#include "typedefs.h" +#include "utils.h" +#include "wels_const.h" +#include "wels_common_basis.h" +#include "wels_common_defs.h" +#include "codec_app_def.h" +#include "parameter_sets.h" +#include "nalu.h" +#include "dec_frame.h" +#include "pic_queue.h" +#include "vlc_decoder.h" +#include "fmo.h" +#include "crt_util_safe_x.h" +#include "mb_cache.h" +#include "expand_pic.h" +#include "mc.h" +#include "memory_align.h" +#include "wels_decoder_thread.h" + +namespace WelsDec { +#define MAX_PRED_MODE_ID_I16x16 3 +#define MAX_PRED_MODE_ID_CHROMA 3 +#define MAX_PRED_MODE_ID_I4x4 8 +#define WELS_QP_MAX 51 + +#define LONG_TERM_REF +#define IMinInt32 -0x7FFFFFFF +typedef struct SWels_Cabac_Element { + uint8_t uiState; + uint8_t uiMPS; +} SWelsCabacCtx, *PWelsCabacCtx; + +typedef struct { + uint64_t uiRange; + uint64_t uiOffset; + int32_t iBitsLeft; + uint8_t* pBuffStart; + uint8_t* pBuffCurr; + uint8_t* pBuffEnd; +} SWelsCabacDecEngine, *PWelsCabacDecEngine; + +#define NEW_CTX_OFFSET_MB_TYPE_I 3 +#define NEW_CTX_OFFSET_SKIP 11 +#define NEW_CTX_OFFSET_SUBMB_TYPE 21 +#define NEW_CTX_OFFSET_B_SUBMB_TYPE 36 +#define NEW_CTX_OFFSET_MVD 40 +#define NEW_CTX_OFFSET_REF_NO 54 +#define NEW_CTX_OFFSET_DELTA_QP 60 +#define NEW_CTX_OFFSET_IPR 68 +#define NEW_CTX_OFFSET_CIPR 64 +#define NEW_CTX_OFFSET_CBP 73 +#define NEW_CTX_OFFSET_CBF 85 +#define NEW_CTX_OFFSET_MAP 105 +#define NEW_CTX_OFFSET_LAST 166 +#define NEW_CTX_OFFSET_ONE 227 +#define NEW_CTX_OFFSET_ABS 232 +#define NEW_CTX_OFFSET_TS_8x8_FLAG 399 +#define CTX_NUM_MVD 7 +#define CTX_NUM_CBP 4 +// Table 9-34 in Page 270 +#define NEW_CTX_OFFSET_TRANSFORM_SIZE_8X8_FLAG 399 +#define NEW_CTX_OFFSET_MAP_8x8 402 +#define NEW_CTX_OFFSET_LAST_8x8 417 +#define NEW_CTX_OFFSET_ONE_8x8 426 +#define NEW_CTX_OFFSET_ABS_8x8 431 // Puzzle, where is the definition? + +typedef struct TagDataBuffer { + uint8_t* pHead; + uint8_t* pEnd; + + uint8_t* pStartPos; + uint8_t* pCurPos; +} SDataBuffer; + +//limit size for SPS PPS total permitted size for parse_only +#define SPS_PPS_BS_SIZE 128 +typedef struct TagSpsBsInfo { + uint8_t pSpsBsBuf [SPS_PPS_BS_SIZE]; + int32_t iSpsId; + uint16_t uiSpsBsLen; +} SSpsBsInfo; + +typedef struct TagPpsBsInfo { + uint8_t pPpsBsBuf [SPS_PPS_BS_SIZE]; + int32_t iPpsId; + uint16_t uiPpsBsLen; +} SPpsBsInfo; +//#ifdef __cplusplus +//extern "C" { +//#endif//__cplusplus + +/* + * Need move below structures to function pointer to seperate module/file later + */ + +//typedef int32_t (*rec_mb) (Mb *cur_mb, PWelsDecoderContext pCtx); + +/*typedef for get intra predictor func pointer*/ +typedef void (*PGetIntraPredFunc) (uint8_t* pPred, const int32_t kiLumaStride); +typedef void (*PIdctResAddPredFunc) (uint8_t* pPred, const int32_t kiStride, int16_t* pRs); +typedef void (*PIdctFourResAddPredFunc) (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc); +typedef void (*PExpandPictureFunc) (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicWidth, + const int32_t kiPicHeight); + +typedef void (*PGetIntraPred8x8Func) (uint8_t* pPred, const int32_t kiLumaStride, bool bTLAvail, bool bTRAvail); + +/**/ +typedef struct TagRefPic { + PPicture pRefList[LIST_A][MAX_DPB_COUNT]; // reference picture marking plus FIFO scheme + PPicture pShortRefList[LIST_A][MAX_DPB_COUNT]; + PPicture pLongRefList[LIST_A][MAX_DPB_COUNT]; + uint8_t uiRefCount[LIST_A]; + uint8_t uiShortRefCount[LIST_A]; + uint8_t uiLongRefCount[LIST_A]; // dependend on ref pic module + int32_t iMaxLongTermFrameIdx; +} SRefPic, *PRefPic; + +typedef void (*PCopyFunc) (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +typedef struct TagCopyFunc { + PCopyFunc pCopyLumaFunc; + PCopyFunc pCopyChromaFunc; +} SCopyFunc; + +//deblock module defination +struct TagDeblockingFunc; + +typedef struct tagDeblockingFilter { + uint8_t* pCsData[3]; // pointer to reconstructed picture data + int32_t iCsStride[2]; // Cs stride + EWelsSliceType eSliceType; + int8_t iSliceAlphaC0Offset; + int8_t iSliceBetaOffset; + int8_t iChromaQP[2]; + int8_t iLumaQP; + struct TagDeblockingFunc* pLoopf; + PPicture* pRefPics[LIST_A]; +} SDeblockingFilter, *PDeblockingFilter; + +typedef void (*PDeblockingFilterMbFunc) (PDqLayer pCurDqLayer, PDeblockingFilter filter, int32_t boundry_flag); +typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* iTc); +typedef void (*PLumaDeblockingEQ4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +typedef void (*PChromaDeblockingLT4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha, + int32_t iBeta, int8_t* iTc); +typedef void (*PChromaDeblockingEQ4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha, + int32_t iBeta); +typedef void (*PChromaDeblockingLT4Func2) (uint8_t* iSampleCbr, int32_t iStride, int32_t iAlpha, + int32_t iBeta, int8_t* iTc); +typedef void (*PChromaDeblockingEQ4Func2) (uint8_t* iSampleCbr, int32_t iStride, int32_t iAlpha, + int32_t iBeta); + +typedef struct TagDeblockingFunc { + PLumaDeblockingLT4Func pfLumaDeblockingLT4Ver; + PLumaDeblockingEQ4Func pfLumaDeblockingEQ4Ver; + PLumaDeblockingLT4Func pfLumaDeblockingLT4Hor; + PLumaDeblockingEQ4Func pfLumaDeblockingEQ4Hor; + + PChromaDeblockingLT4Func pfChromaDeblockingLT4Ver; + PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Ver; + PChromaDeblockingLT4Func pfChromaDeblockingLT4Hor; + PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor; + + PChromaDeblockingLT4Func2 pfChromaDeblockingLT4Ver2; + PChromaDeblockingEQ4Func2 pfChromaDeblockingEQ4Ver2; + PChromaDeblockingLT4Func2 pfChromaDeblockingLT4Hor2; + PChromaDeblockingEQ4Func2 pfChromaDeblockingEQ4Hor2; + +} SDeblockingFunc, *PDeblockingFunc; + +typedef void (*PWelsNonZeroCountFunc) (int8_t* pNonZeroCount); +typedef void (*PWelsBlockZeroFunc) (int16_t* block, int32_t stride); +typedef struct TagBlockFunc { + PWelsNonZeroCountFunc pWelsSetNonZeroCountFunc; + PWelsBlockZeroFunc pWelsBlockZero16x16Func; + PWelsBlockZeroFunc pWelsBlockZero8x8Func; +} SBlockFunc; + +typedef void (*PWelsFillNeighborMbInfoIntra4x4Func) (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int8_t* pIntraPredMode, PDqLayer pCurDqLayer); +typedef void (*PWelsMapNeighToSample) (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail); +typedef void (*PWelsMap16NeighToSample) (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail); +typedef int32_t (*PWelsParseIntra4x4ModeFunc) (PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs, + PDqLayer pCurDqLayer); +typedef int32_t (*PWelsParseIntra16x16ModeFunc) (PWelsNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer); + +enum { + OVERWRITE_NONE = 0, + OVERWRITE_PPS = 1, + OVERWRITE_SPS = 1 << 1, + OVERWRITE_SUBSETSPS = 1 << 2 +}; + + +//Decoder SPS and PPS global CTX +typedef struct tagWelsWelsDecoderSpsPpsCTX { + SPosOffset sFrameCrop; + + SSps sSpsBuffer[MAX_SPS_COUNT + 1]; + SPps sPpsBuffer[MAX_PPS_COUNT + 1]; + + SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1]; + SNalUnit sPrefixNal; + + PSps pActiveLayerSps[MAX_LAYER_NUM]; + bool bAvcBasedFlag; // For decoding bitstream: + + // for EC parameter sets + bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence? + bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence? + bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence? + + int32_t iSpsErrorIgnored; + int32_t iSubSpsErrorIgnored; + int32_t iPpsErrorIgnored; + + bool bSpsAvailFlags[MAX_SPS_COUNT]; + bool bSubspsAvailFlags[MAX_SPS_COUNT]; + bool bPpsAvailFlags[MAX_PPS_COUNT]; + int32_t iPPSLastInvalidId; + int32_t iPPSInvalidNum; + int32_t iSPSLastInvalidId; + int32_t iSPSInvalidNum; + int32_t iSubSPSLastInvalidId; + int32_t iSubSPSInvalidNum; + int32_t iSeqId; //sequence id + int iOverwriteFlags; +} SWelsDecoderSpsPpsCTX, *PWelsDecoderSpsPpsCTX; + +//Last Decoded Picture Info +typedef struct tagSWelsLastDecPicInfo { + // Save the last nal header info + SNalUnitHeaderExt sLastNalHdrExt; + SSliceHeader sLastSliceHeader; + int32_t iPrevPicOrderCntMsb; + int32_t iPrevPicOrderCntLsb; + PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment + int32_t iPrevFrameNum;// frame number of previous frame well decoded for non-truncated mode yet + bool bLastHasMmco5; + uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps +} SWelsLastDecPicInfo, *PWelsLastDecPicInfo; + +typedef struct tagPictInfo { + SBufferInfo sBufferInfo; + int32_t iPOC; + int32_t iPicBuffIdx; + uint32_t uiDecodingTimeStamp; + bool bLastGOP; +} SPictInfo, *PPictInfo; + +typedef struct tagPictReoderingStatus { + int32_t iPictInfoIndex; + int32_t iMinPOC; + int32_t iNumOfPicts; + int32_t iLastGOPRemainPicts; + int32_t iLastWrittenPOC; + int32_t iLargestBufferedPicIndex; +} SPictReoderingStatus, *PPictReoderingStatus; + +/* + * SWelsDecoderContext: to maintail all modules data over decoder@framework + */ + +typedef struct TagWelsDecoderContext { + SLogContext sLogCtx; +// Input + void* + pArgDec; // structured arguments for decoder, reserved here for extension in the future + + SDataBuffer sRawData; + SDataBuffer sSavedData; //for parse only purpose + +// Configuration + SDecodingParam* pParam; + uint32_t uiCpuFlag; // CPU compatibility detected + + VIDEO_BITSTREAM_TYPE eVideoType; //indicate the type of video to decide whether or not to do qp_delta error detection. + bool bHaveGotMemory; // global memory for decoder context related ever requested? + + int32_t iImgWidthInPixel; // width of image in pixel reconstruction picture to be output + int32_t iImgHeightInPixel;// height of image in pixel reconstruction picture to be output + int32_t + iLastImgWidthInPixel; // width of image in last successful pixel reconstruction picture to be output + int32_t + iLastImgHeightInPixel;// height of image in last successful pixel reconstruction picture to be output + bool bFreezeOutput; // indicating current frame freezing. Default: true + + +// Derived common elements + SNalUnitHeader sCurNalHead; + EWelsSliceType eSliceType; // Slice type + bool bUsedAsRef; //flag as ref + int32_t iFrameNum; + int32_t iErrorCode; // error code return while decoding in case packets lost + SFmo sFmoList[MAX_PPS_COUNT]; // list for FMO storage + PFmo pFmo; // current fmo context after parsed slice_header + int32_t iActiveFmoNum; // active count number of fmo context in list + + /*needed info by decode slice level and mb level*/ + int32_t + iDecBlockOffsetArray[24]; // address talbe for sub 4x4 block in intra4x4_mb, so no need to caculta the address every time. + + struct { + uint32_t* pMbType[LAYER_NUM_EXCHANGEABLE]; /* mb type */ + int16_t (*pMv[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*] + int8_t (*pRefIndex[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM]; + int8_t (*pDirect[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM]; + bool* pNoSubMbPartSizeLessThan8x8Flag[LAYER_NUM_EXCHANGEABLE]; + bool* pTransformSize8x8Flag[LAYER_NUM_EXCHANGEABLE]; + int8_t* pLumaQp[LAYER_NUM_EXCHANGEABLE]; /*mb luma_qp*/ + int8_t (*pChromaQp[LAYER_NUM_EXCHANGEABLE])[2]; /*mb chroma_qp*/ + int16_t (*pMvd[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*] + uint16_t* pCbfDc[LAYER_NUM_EXCHANGEABLE]; + int8_t (*pNzc[LAYER_NUM_EXCHANGEABLE])[24]; + int8_t (*pNzcRs[LAYER_NUM_EXCHANGEABLE])[24]; + int16_t (*pScaledTCoeff[LAYER_NUM_EXCHANGEABLE])[MB_COEFF_LIST_SIZE]; /*need be aligned*/ + int8_t (*pIntraPredMode[LAYER_NUM_EXCHANGEABLE])[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16 + int8_t (*pIntra4x4FinalMode[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM]; + uint8_t* pIntraNxNAvailFlag[LAYER_NUM_EXCHANGEABLE]; + int8_t* pChromaPredMode[LAYER_NUM_EXCHANGEABLE]; + int8_t* pCbp[LAYER_NUM_EXCHANGEABLE]; + uint8_t (*pMotionPredFlag[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_PARTITION_SIZE]; // 8x8 + uint32_t (*pSubMbType[LAYER_NUM_EXCHANGEABLE])[MB_SUB_PARTITION_SIZE]; + int32_t* pSliceIdc[LAYER_NUM_EXCHANGEABLE]; // using int32_t for slice_idc + int8_t* pResidualPredFlag[LAYER_NUM_EXCHANGEABLE]; + int8_t* pInterPredictionDoneFlag[LAYER_NUM_EXCHANGEABLE]; + bool* pMbCorrectlyDecodedFlag[LAYER_NUM_EXCHANGEABLE]; + bool* pMbRefConcealedFlag[LAYER_NUM_EXCHANGEABLE]; + uint32_t iMbWidth; + uint32_t iMbHeight; + } sMb; + + +// reconstruction picture + PPicture pDec; //pointer to current picture being reconstructed + + PPicture + pTempDec; //pointer to temp decoder picture to be used only for Bi Prediction. + +// reference pictures + SRefPic sRefPic; + SRefPic sTmpRefPic; //used to temporarily save RefPic for next active thread + SVlcTable* pVlcTable; // vlc table + + SBitStringAux sBs; + int32_t iMaxBsBufferSizeInByte; //actual memory size for BS buffer + + /* Global memory external */ + SWelsDecoderSpsPpsCTX sSpsPpsCtx; + bool bHasNewSps; + + SPosOffset sFrameCrop; + + PSliceHeader pSliceHeader; + + PPicBuff pPicBuff; // Initially allocated memory for pictures which are used in decoding. + int32_t iPicQueueNumber; + + PAccessUnit pAccessUnitList; // current access unit list to be performed + //PSps pActiveLayerSps[MAX_LAYER_NUM]; + PSps pSps; // used by current AU + PPps pPps; // used by current AU +// Memory for pAccessUnitList is dynamically held till decoder destruction. + PDqLayer + pCurDqLayer; // current DQ layer representation, also carry reference base layer if applicable + PDqLayer pDqLayersList[LAYER_NUM_EXCHANGEABLE]; // DQ layers list with memory allocated + PNalUnit pNalCur; // point to current NAL Nnit + uint8_t uiNalRefIdc; // NalRefIdc for easy access; + int32_t iPicWidthReq; // picture width have requested the memory + int32_t iPicHeightReq; // picture height have requested the memory + + uint8_t uiTargetDqId; // maximal DQ ID in current access unit, meaning target layer ID + //bool bAvcBasedFlag; // For decoding bitstream: + bool bEndOfStreamFlag; // Flag on end of stream requested by external application layer + bool bInstantDecFlag; // Flag for no-delay decoding + bool bInitialDqLayersMem; // dq layers related memory is available? + + bool bOnlyOneLayerInCurAuFlag; //only one layer in current AU: 1 + + bool bReferenceLostAtT0Flag; + int32_t iTotalNumMbRec; //record current number of decoded MB +#ifdef LONG_TERM_REF + bool bParamSetsLostFlag; //sps or pps do not exist or not correct + + bool + bCurAuContainLtrMarkSeFlag; //current AU has the LTR marking syntax element, mark the previous frame or self + int32_t iFrameNumOfAuMarkedLtr; //if bCurAuContainLtrMarkSeFlag==true, SHOULD set this variable + + uint16_t uiCurIdrPicId; +#endif + bool bNewSeqBegin; + bool bNextNewSeqBegin; + +//for Parse only + bool bFramePending; + bool bFrameFinish; + int32_t iNalNum; + int32_t iMaxNalNum; //permitted max NAL num stored in parser + SSpsBsInfo sSpsBsInfo [MAX_SPS_COUNT]; + SSpsBsInfo sSubsetSpsBsInfo [MAX_PPS_COUNT]; + SPpsBsInfo sPpsBsInfo [MAX_PPS_COUNT]; + SParserBsInfo* pParserBsInfo; + + //PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment + PGetIntraPredFunc pGetI16x16LumaPredFunc[7]; //h264_predict_copy_16x16; + PGetIntraPredFunc pGetI4x4LumaPredFunc[14]; // h264_predict_4x4_t + PGetIntraPredFunc pGetIChromaPredFunc[7]; // h264_predict_8x8_t + PIdctResAddPredFunc pIdctResAddPredFunc; + PIdctFourResAddPredFunc pIdctFourResAddPredFunc; + SMcFunc sMcFunc; + //Transform8x8 + PGetIntraPred8x8Func pGetI8x8LumaPredFunc[14]; + PIdctResAddPredFunc pIdctResAddPredFunc8x8; + +//For error concealment + SCopyFunc sCopyFunc; + /* For Deblocking */ + SDeblockingFunc sDeblockingFunc; + SExpandPicFunc sExpandPicFunc; + + /* For Block */ + SBlockFunc sBlockFunc; + + int32_t iCurSeqIntervalTargetDependId; + int32_t iCurSeqIntervalMaxPicWidth; + int32_t iCurSeqIntervalMaxPicHeight; + + PWelsFillNeighborMbInfoIntra4x4Func pFillInfoCacheIntraNxNFunc; + PWelsMapNeighToSample pMapNxNNeighToSampleFunc; + PWelsMap16NeighToSample pMap16x16NeighToSampleFunc; + +//feedback whether or not have VCL in current AU, and the temporal ID + int32_t iFeedbackVclNalInAu; + int32_t iFeedbackTidInAu; + int32_t iFeedbackNalRefIdc; + + bool bAuReadyFlag; // true: one au is ready for decoding; false: default value + + bool bPrintFrameErrorTraceFlag; //true: can print info for upper layer + int32_t iIgnoredErrorInfoPacketCount; //store the packet number with error decoding info +//trace handle + void* pTraceHandle; + + PWelsLastDecPicInfo pLastDecPicInfo; + + SWelsCabacCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT]; + bool bCabacInited; + SWelsCabacCtx pCabacCtx[WELS_CONTEXT_COUNT]; + PWelsCabacDecEngine pCabacDecEngine; + double dDecTime; + SDecoderStatistics* pDecoderStatistics; // For real time debugging + int32_t iMbEcedNum; + int32_t iMbEcedPropNum; + int32_t iMbNum; + bool bMbRefConcealed; + bool bRPLRError; + int32_t iECMVs[16][2]; + PPicture pECRefPic[16]; + unsigned long long uiTimeStamp; + uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps +// To support scaling list HP + uint16_t pDequant_coeff_buffer4x4[6][52][16]; + uint16_t pDequant_coeff_buffer8x8[6][52][64]; + uint16_t (*pDequant_coeff4x4[6])[16];// 4x4 sclaing list value pointer + uint16_t (*pDequant_coeff8x8[6])[64];//64 residual coeff ,with 6 kinds of residual type, 52 qp level + int iDequantCoeffPpsid;//When a new pps actived, reinitialised the scaling list value + bool bDequantCoeff4x4Init; + bool bUseScalingList; + CMemoryAlign* pMemAlign; + void* pThreadCtx; + void* pLastThreadCtx; + WELS_MUTEX* pCsDecoder; + int16_t lastReadyHeightOffset[LIST_A][MAX_REF_PIC_COUNT]; //last ready reference MB offset + PPictInfo pPictInfoList; + PPictReoderingStatus pPictReoderingStatus; + SBufferInfo* pDstInfo; +} SWelsDecoderContext, *PWelsDecoderContext; + +typedef struct tagSWelsDecThread { + SWelsDecSemphore* sIsBusy; + SWelsDecSemphore sIsActivated; + SWelsDecSemphore sIsIdle; + SWelsDecThread sThrHandle; + uint32_t uiCommand; + uint32_t uiThrNum; + uint32_t uiThrMaxNum; + uint32_t uiThrStackSize; + DECLARE_PROCTHREAD_PTR (pThrProcMain); +} SWelsDecThreadInfo, *PWelsDecThreadInfo; + +typedef struct tagSWelsDecThreadCtx { + SWelsDecThreadInfo sThreadInfo; + PWelsDecoderContext pCtx; + void* threadCtxOwner; + uint8_t* kpSrc; + int32_t kiSrcLen; + uint8_t** ppDst; + SBufferInfo sDstInfo; + PPicture pDec; + SWelsDecEvent sImageReady; + SWelsDecEvent sSliceDecodeStart; + SWelsDecEvent sSliceDecodeFinish; + int32_t iPicBuffIdx; //picBuff Index +} SWelsDecoderThreadCTX, *PWelsDecoderThreadCTX; + +static inline void ResetActiveSPSForEachLayer (PWelsDecoderContext pCtx) { + if (pCtx->iTotalNumMbRec == 0) { + for (int i = 0; i < MAX_LAYER_NUM; i++) { + pCtx->sSpsPpsCtx.pActiveLayerSps[i] = NULL; + } + } +} +static inline int32_t GetThreadCount (PWelsDecoderContext pCtx) { + int32_t iThreadCount = 0; + if (pCtx->pThreadCtx != NULL) { + PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx; + iThreadCount = pThreadCtx->sThreadInfo.uiThrMaxNum; + } + return iThreadCount; +} +//GetPrevFrameNum only applies when thread count >= 2 +static inline int32_t GetPrevFrameNum (PWelsDecoderContext pCtx) { + if (pCtx->uiDecodingTimeStamp > 0) { + PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx; + int32_t iThreadCount = int32_t (pThreadCtx->sThreadInfo.uiThrMaxNum); + int32_t uiThrNum = int32_t (pThreadCtx->sThreadInfo.uiThrNum); + for (int32_t i = 0; i < iThreadCount; ++i) { + int32_t id = i - uiThrNum; + if (id != 0 && pThreadCtx[id].pCtx->uiDecodingTimeStamp == pCtx->uiDecodingTimeStamp - 1) { + if (pThreadCtx[id].pCtx->pDec != NULL) { + int32_t iFrameNum = pThreadCtx[id].pCtx->pDec->iFrameNum; + if (iFrameNum >= 0) return iFrameNum; + } + return pThreadCtx[id].pCtx->iFrameNum; + } + } + } + return pCtx->pLastDecPicInfo->iPrevFrameNum; +} +//#ifdef __cplusplus +//} +//#endif//__cplusplus + + +} // namespace WelsDec + +#endif//WELS_DECODER_FRAMEWORK_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h new file mode 100644 index 000000000..df2533996 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h @@ -0,0 +1,206 @@ +/*! + * \copy + * Copyright (c) 2008-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * decoder_core.h + * + * Abstract + * Encapsulative core interfaces + * + * History + * 07/10/2008 Created + * + *****************************************************************************/ +#ifndef WELS_DECODER_CORE_H__ +#define WELS_DECODER_CORE_H__ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "decoder_context.h" + +#include "codec_def.h" + +namespace WelsDec { +/* + * InitBsBuffer + * Memory allocation for Bitstream Buffer + * return: + * 0 - success; otherwise returned error_no defined in error_no.h. + */ +int32_t InitBsBuffer (PWelsDecoderContext pCtx); + +/* + * ExpandBsBuffer + * Expand current BS buffer and copy its content + * new buffer size will consider input size as a reference + * return: + * 0 - success; otherwise returned error_no defined in error_no.h. + */ +int32_t ExpandBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen); + +/* + * ExpandBsLenBuffer + * Expand current BS length buffer to double size or maximum, due to max slice number exceeding + * Parameter: + * kiCurrLen: current value of total nal number (including non-VCL nal) + * return: + * 0 - success; otherwise returned error_no defined in error_no.h. + */ +int32_t ExpandBsLenBuffer (PWelsDecoderContext pCtx, const int32_t kiCurrLen); + +/* + * CheckBsBuffer + * Check if current buffer size is enough + */ +int32_t CheckBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen); + +/* + * WelsInitStaticMemory + * Memory request for introduced data at decoder start + * Especially for: + * rbsp_au_buffer, cur_dq_layer_ptr and ref_dq_layer_ptr in MB info cache. + * return: + * 0 - success; otherwise returned error_no defined in error_no.h. + */ +int32_t WelsInitStaticMemory (PWelsDecoderContext pCtx); + +/* + * WelsFreeStaticMemory + * Free memory introduced in WelsInitStaticMemory at destruction of decoder. + * + */ +void WelsFreeStaticMemory (PWelsDecoderContext pCtx); + +/*! + * \brief request memory when maximal picture width and height are available + */ +int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight); + +/*! + * \brief free dq layer context memory related + */ +void UninitialDqLayersContext (PWelsDecoderContext pCtx); + +/* + * DecodeNalHeaderExt + * Trigger condition: NAL_UNIT_TYPE = NAL_UNIT_PREFIX or NAL_UNIT_CODED_SLICE_EXT + * Parameter: + * pNal: target NALUnit ptr + * pSrc: NAL Unit bitstream + */ +void DecodeNalHeaderExt (PNalUnit pNal, uint8_t* pSrc); + +/* + * ParseSliceHeaderSyntaxs + * Parse slice header of bitstream + */ +int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag); +/* + * Copy relative syntax elements of NALUnitHeaderExt, sRefPicBaseMarking and bStoreRefBasePicFlag in prefix nal unit. + * pSrc: mark as decoded prefix NAL + * pDst: succeeded VCL NAL based AVC (I/P Slice) + */ +bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kpDst, PNalUnit const kpSrc); + +/* +* WelsDecodeInitAccessUnitStart +* check and (re)allocate picture buffers on new sequence begin +* bit_len: size in bit length of data +* buf_len: size in byte length of data +* coded_au: mark an Access Unit decoding finished +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo); +/* +* AllocPicBuffOnNewSeqBegin +* check and (re)allocate picture buffers on new sequence begin +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx); + +/* +* InitConstructAccessUnit +* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to +* joint a collective access unit. +* parameter\ +* SBufferInfo: Buffer info +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo); + +/* + * ConstructAccessUnit + * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to + * joint a collective access unit. + * parameter\ + * buf: bitstream data buffer + * bit_len: size in bit length of data + * buf_len: size in byte length of data + * coded_au: mark an Access Unit decoding finished + * return: + * 0 - success; otherwise returned error_no defined in error_no.h + */ +int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo); + + +/* + * DecodeCurrentAccessUnit + * Decode current access unit when current AU is completed. + */ +int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo); + +/* + * Check if frame is completed and EC is required + */ +bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** pDst, SBufferInfo* pDstInfo); +/* + * Prepare current dq layer context initialization. + */ +void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pSps, PPps pPps); + + +int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx); +void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx); +void DecodeFinishUpdate (PWelsDecoderContext pCtx); + +void ForceResetCurrentAccessUnit (PAccessUnit pAu); +void ForceClearCurrentNal (PAccessUnit pAu); + +bool CheckRefPicturesComplete (PWelsDecoderContext pCtx); // Check whether all ref pictures are complete + +void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx); +} // namespace WelsDec + +#endif//WELS_DECODER_CORE_H__ + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/error_code.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/error_code.h new file mode 100644 index 000000000..8e9c37842 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/error_code.h @@ -0,0 +1,222 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file error_code.h + * + * \brief Error codes used in Wels decoder side + * + * \date 3/4/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_ERROR_CODE_H__ +#define WELS_ERROR_CODE_H__ + +namespace WelsDec { + +typedef enum TagWelsErr { + ERR_NONE = 0, + ERR_INVALID_PARAMETERS = 1, + ERR_MALLOC_FAILED = 2, + ERR_API_FAILED = 3, + + ERR_BOUND = 31 +} EWelsErr; + +/* + * Specified error format: + * ERR_NO = (ERR_LEVEL_FROM (HIGH WORD) << 16) | (ERR_INFO_FROM (LOW WORD)) + * + */ +#define GENERATE_ERROR_NO(iErrLevel, iErrInfo) ((iErrLevel << 16) | (iErrInfo & 0xFFFF)) +#define ERR_INVALID_INTRA4X4_MODE -1 + +/* ERR_LEVEL */ +//----------------------------------------------------------------------------------------------------------- +enum { + ERR_LEVEL_ACCESS_UNIT = 1, + ERR_LEVEL_NAL_UNIT_HEADER, + ERR_LEVEL_PREFIX_NAL, + ERR_LEVEL_PARAM_SETS, + ERR_LEVEL_SLICE_HEADER, + ERR_LEVEL_SLICE_DATA, + ERR_LEVEL_MB_DATA +}; + +//----------------------------------------------------------------------------------------------------------- + +/* More detailed error information, maximal value is 65535 */ +//----------------------------------------------------------------------------------------------------------- +#define ERR_INFO_COMMON_BASE 1 +#define ERR_INFO_SYNTAX_BASE 1001 +#define ERR_INFO_LOGIC_BASE 10001 +enum { + /* Error from common system level: 1-1000 */ + ERR_INFO_OUT_OF_MEMORY = ERR_INFO_COMMON_BASE, + ERR_INFO_INVALID_ACCESS, + ERR_INFO_INVALID_PTR, + ERR_INFO_INVALID_PARAM, + ERR_INFO_FILE_NO_FOUND, + ERR_INFO_PATH_NO_FOUND, + ERR_INFO_ACCESS_DENIED, + ERR_INFO_NOT_READY, + ERR_INFO_WRITE_FAULT, + ERR_INFO_READ_FAULT, + ERR_INFO_READ_OVERFLOW, + ERR_INFO_READ_LEADING_ZERO, + ERR_INFO_UNINIT, + /* Error from H.264 syntax elements parser: 1001-10000 */ + ERR_INFO_NO_PREFIX_CODE = ERR_INFO_SYNTAX_BASE, // No start prefix code indication + ERR_INFO_NO_PARAM_SETS, // No SPS and/ PPS before sequence header + ERR_INFO_PARAM_SETS_NOT_INTEGRATED, // Parameters sets (sps/pps) are not integrated at all before to decode VCL nal + ERR_INFO_SPS_ID_OVERFLOW, + ERR_INFO_PPS_ID_OVERFLOW, + ERR_INFO_INVALID_PROFILE_IDC, + ERR_INFO_UNMATCHED_LEVEL_IDC, + ERR_INFO_INVALID_POC_TYPE, + ERR_INFO_INVALID_MB_SIZE_INFO, + ERR_INFO_REF_COUNT_OVERFLOW, + ERR_INFO_CROPPING_NO_SUPPORTED, + ERR_INFO_INVALID_CROPPING_DATA, + ERR_INFO_UNSUPPORTED_VUI_HRD, + ERR_INFO_INVALID_SLICEGROUP, + ERR_INFO_INVALID_SLICEGROUP_MAP_TYPE, + ERR_INFO_INVALID_FRAME_NUM, + ERR_INFO_INVALID_IDR_PIC_ID, + ERR_INFO_INVALID_REDUNDANT_PIC_CNT, + ERR_INFO_INVALID_MAX_NUM_REF_FRAMES, + ERR_INFO_INVALID_MAX_MB_SIZE, + ERR_INFO_INVALID_FIRST_MB_IN_SLICE, + ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1, + ERR_INFO_INVALID_NUM_REF_IDX_L1_ACTIVE_MINUS1, + ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2, + ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2, + ERR_INFO_FMO_INIT_FAIL, + ERR_INFO_SLICE_TYPE_OVERFLOW, + ERR_INFO_INVALID_CABAC_INIT_IDC, + ERR_INFO_INVALID_QP, + ERR_INFO_INVALID_PIC_INIT_QS, + ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET, + ERR_INFO_INVALID_PIC_INIT_QP, + ERR_INFO_INVALID_LOG2_MAX_FRAME_NUM_MINUS4, + ERR_INFO_INVALID_LOG2_MAX_PIC_ORDER_CNT_LSB_MINUS4, + ERR_INFO_INVALID_NUM_REF_FRAME_IN_PIC_ORDER_CNT_CYCLE, + ERR_INFO_INVALID_DBLOCKING_IDC, + ERR_INFO_INVALID_MB_TYPE, + ERR_INFO_INVALID_MB_SKIP_RUN, + ERR_INFO_INVALID_SPS_ID, + ERR_INFO_INVALID_PPS_ID, + ERR_INFO_INVALID_SUB_MB_TYPE, + ERR_INFO_UNAVAILABLE_TOP_BLOCK_FOR_INTRA, + ERR_INFO_UNAVAILABLE_LEFT_BLOCK_FOR_INTRA, + ERR_INFO_INVALID_REF_INDEX, + ERR_INFO_INVALID_CBP, + ERR_INFO_DQUANT_OUT_OF_RANGE, + ERR_INFO_CAVLC_INVALID_PREFIX, + ERR_INFO_CAVLC_INVALID_LEVEL, + ERR_INFO_CAVLC_INVALID_TOTAL_COEFF_OR_TRAILING_ONES, + ERR_INFO_CAVLC_INVALID_ZERO_LEFT, + ERR_INFO_CAVLC_INVALID_RUN_BEFORE, + ERR_INFO_MV_OUT_OF_RANGE, + + ERR_INFO_INVALID_I4x4_PRED_MODE, + ERR_INFO_INVALID_I16x16_PRED_MODE, + ERR_INFO_INVALID_I_CHROMA_PRED_MODE, + + ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM, + ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM, + ERR_INFO_INVALID_LUMA_WEIGHT, + ERR_INFO_INVALID_CHROMA_WEIGHT, + ERR_INFO_INVALID_LUMA_OFFSET, + ERR_INFO_INVALID_CHROMA_OFFSET, + + ERR_INFO_UNSUPPORTED_NON_BASELINE, + ERR_INFO_UNSUPPORTED_FMOTYPE, + ERR_INFO_UNSUPPORTED_MBAFF, + ERR_INFO_UNSUPPORTED_ILP, + ERR_INFO_UNSUPPORTED_CABAC_EL, + ERR_INFO_UNSUPPORTED_SPSI, + ERR_INFO_UNSUPPORTED_MGS, + ERR_INFO_UNSUPPORTED_BIPRED, + ERR_INFO_UNSUPPORTED_WP, + ERR_INFO_UNSUPPORTED_SLICESKIP, + + ERR_INFO_FRAMES_LOST, + ERR_INFO_DEPENDENCY_SPATIAL_LAYER_LOST, + ERR_INFO_DEPENDENCY_QUALIT_LAYER_LOST, + ERR_INFO_REFERENCE_PIC_LOST, + ERR_INFO_INVALID_REORDERING, + ERR_INFO_INVALID_MARKING, + + ERR_INFO_FMO_NOT_SUPPORTED_IN_BASE_LAYER, + ERR_INFO_INVALID_ESS, + ERR_INFO_INVALID_SLICE_TYPE, + ERR_INFO_INVALID_REF_MARKING, + ERR_INFO_INVALID_REF_REORDERING, + + /* Error from corresponding logic, 10001-65535 */ + ERR_INFO_NO_IDR_PIC = ERR_INFO_LOGIC_BASE, // NO IDR picture available before sequence header + ERR_INFO_EC_NO_NEIGHBOUR_MBS, + ERR_INFO_EC_UNEXPECTED_MB_TYPE, + ERR_INFO_EC_NO_ENOUGH_NEIGHBOUR_MBS, + ERR_INFO_DUPLICATE_FRAME_NUM, +//for LTR + ERR_INFO_INVALID_MMCO_NUM, + ERR_INFO_INVALID_MMCO_OPCODE_BASE, + ERR_INFO_INVALID_MMCO_SHORT2UNUSED, + EER_INFO_INVALID_MMCO_LONG2UNUSED, + ERR_INFO_INVALID_MMCO_SHOART2LONG, + ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW, + ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH, + ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX, +//for CABAC + ERR_CABAC_NO_BS_TO_READ, + ERR_CABAC_UNEXPECTED_VALUE, +//for scaling list + ERR_SCALING_LIST_DELTA_SCALE, +//logic error related to multi-layer + ERR_INFO_WIDTH_MISMATCH, +//reconstruction error + ERR_INFO_MB_RECON_FAIL, + ERR_INFO_MB_NUM_EXCEED_FAIL, + ERR_INFO_BS_INCOMPLETE, + ERR_INFO_MB_NUM_INADEQUATE, +//parse only error + ERR_INFO_PARSEONLY_PENDING, + ERR_INFO_PARSEONLY_ERROR, +}; +//----------------------------------------------------------------------------------------------------------- + +} // namespace WelsDec + +#endif//WELS_ERROR_CODE_H__ + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/error_concealment.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/error_concealment.h new file mode 100644 index 000000000..60a8e068f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/error_concealment.h @@ -0,0 +1,68 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file error_concealment.h + * + * \brief Interfaces introduced for error concealment + * + * \date 04/14/2014 Created + * + ************************************************************************************* + */ +#ifndef WELS_ERROR_CONCEALMENT_H__ +#define WELS_ERROR_CONCEALMENT_H__ +#include "typedefs.h" +#include "rec_mb.h" +#include "decoder_context.h" + +namespace WelsDec { +//Initialize error concealment +void InitErrorCon (PWelsDecoderContext pCtx); +//Do error concealment using frame copy method +void DoErrorConFrameCopy (PWelsDecoderContext pCtx); +//Do error concealment using slice copy method +void DoErrorConSliceCopy (PWelsDecoderContext pCtx); +//Do error concealment using slice MV copy method +void DoMbECMvCopy (PWelsDecoderContext pCtx, PPicture pDec, PPicture pRef, int32_t iMbXy, int32_t iMbX, int32_t iMbY, + sMCRefMember* pMCRefMem, int32_t iCurrPoc); +void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx); +void DoErrorConSliceMVCopy (PWelsDecoderContext pCtx); +//Mark erroneous frame as Ref Pic into DPB +int32_t MarkECFrameAsRef (PWelsDecoderContext pCtx); +//Judge if EC is needed to current frame +bool NeedErrorCon (PWelsDecoderContext pCtx); +// ImplementErrorConceal +// Do actual error concealment +void ImplementErrorCon (PWelsDecoderContext pCtx); + +} // namespace WelsDec + +#endif//WELS_ERROR_CONCEALMENT_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/fmo.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/fmo.h new file mode 100644 index 000000000..7d17b9862 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/fmo.h @@ -0,0 +1,114 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file fmo.h + * + * \brief Flexible Macroblock Ordering implementation + * + * \date 2/4/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_FLEXIBLE_MACROBLOCK_ORDERING_H__ +#define WELS_FLEXIBLE_MACROBLOCK_ORDERING_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "parameter_sets.h" +#include "memory_align.h" + +namespace WelsDec { + +#ifndef MB_XY_T +#define MB_XY_T int32_t +#endif//MB_XY_T + +/*! + * \brief Wels Flexible Macroblock Ordering (FMO) + */ +typedef struct TagFmo { + uint8_t* pMbAllocMap; + int32_t iCountMbNum; + int32_t iSliceGroupCount; + int32_t iSliceGroupType; + bool bActiveFlag; + uint8_t uiReserved[3]; // reserved padding bytes +} SFmo, *PFmo; + + +/*! + * \brief Initialize Wels Flexible Macroblock Ordering (FMO) + * + * \param pFmo Wels fmo to be initialized + * \param pPps PPps + * \param kiMbWidth mb width + * \param kiMbHeight mb height + * + * \return 0 - successful; none 0 - failed; + */ +int32_t InitFmo (PFmo pFmo, PPps pPps, const int32_t kiMbWidth, const int32_t kiMbHeight, CMemoryAlign* pMa); + +/*! + * \brief Uninitialize Wels Flexible Macroblock Ordering (FMO) list + * + * \param pFmo Wels base fmo ptr to be uninitialized + * \param kiCnt count number of PPS per list + * \param kiAvail count available number of PPS in list + * + * \return NONE + */ +void UninitFmoList (PFmo pFmo, const int32_t kiCnt, const int32_t kiAvail, CMemoryAlign* pMa); + +/*! + * \brief update/insert FMO parameter unit + * + * \param pFmo FMO context + * \param pSps PSps + * \param pPps PPps + * \param pActiveFmoNum int32_t* [in/out] + * + * \return true - update/insert successfully; false - failed; + */ +int32_t FmoParamUpdate (PFmo pFmo, PSps pSps, PPps pPps, int32_t* pActiveFmoNum, CMemoryAlign* pMa); + +/*! + * \brief Get successive mb to be processed with given current mb_xy + * + * \param pFmo Wels fmo context + * \param iMbXy current mb_xy + * + * \return iNextMb - successful; -1 - failed; + */ +MB_XY_T FmoNextMb (PFmo pFmo, const MB_XY_T kiMbXy); + +} // namespace WelsDec + +#endif//WELS_FLEXIBLE_MACROBLOCK_ORDERING_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/get_intra_predictor.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/get_intra_predictor.h new file mode 100644 index 000000000..473997080 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/get_intra_predictor.h @@ -0,0 +1,191 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file get_intra_predictor.h + * + * \brief interfaces for get intra predictor about 16x16, 4x4, chroma. + * + * \date 4/2/2009 Created + * + ************************************************************************************* + */ + +#ifndef WELS_GET_INTRA_PREDICTOR_H__ +#define WELS_GET_INTRA_PREDICTOR_H__ + +#include "typedefs.h" + +namespace WelsDec { + +void WelsI4x4LumaPredV_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredH_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredDc_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredVL_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredVR_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredHU_c (uint8_t* pPred, const int32_t kiStride); +void WelsI4x4LumaPredHD_c (uint8_t* pPred, const int32_t kiStride); + +void WelsI8x8LumaPredV_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredH_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredDc_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredVL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredVR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredHU_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); +void WelsI8x8LumaPredHD_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail); + +void WelsIChromaPredV_c (uint8_t* pPred, const int32_t kiStride); +void WelsIChromaPredH_c (uint8_t* pPred, const int32_t kiStride); +void WelsIChromaPredPlane_c (uint8_t* pPred, const int32_t kiStride); +void WelsIChromaPredDc_c (uint8_t* pPred, const int32_t kiStride); +void WelsIChromaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride); +void WelsIChromaPredDcTop_c (uint8_t* pPred, const int32_t kiStride); +void WelsIChromaPredDcNA_c (uint8_t* pPred, const int32_t kiStride); + +void WelsI16x16LumaPredV_c (uint8_t* pPred, const int32_t kiStride); +void WelsI16x16LumaPredH_c (uint8_t* pPred, const int32_t kiStride); +void WelsI16x16LumaPredPlane_c (uint8_t* pPred, const int32_t kiStride); +void WelsI16x16LumaPredDc_c (uint8_t* pPred, const int32_t kiStride); +void WelsI16x16LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride); +void WelsI16x16LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride); +void WelsI16x16LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +void WelsDecoderI16x16LumaPredPlane_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredH_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredV_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDc_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDcTop_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDcNA_sse2 (uint8_t* pPred, const int32_t kiStride); + +void WelsDecoderIChromaPredDcTop_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredPlane_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredDc_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredH_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredV_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredDcLeft_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredDcNA_mmx (uint8_t* pPred, const int32_t kiStride); + +void WelsDecoderI4x4LumaPredH_sse2 (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDDR_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredHD_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredHU_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredVR_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDDL_mmx (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredVL_mmx (uint8_t* pPred, const int32_t kiStride); +#endif//X86_ASM + +#if defined(HAVE_NEON) +void WelsDecoderI16x16LumaPredV_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredH_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDc_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredPlane_neon (uint8_t* pPred, const int32_t kiStride); + +void WelsDecoderI4x4LumaPredV_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredH_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDDL_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDDR_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredVL_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredVR_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredHU_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredHD_neon (uint8_t* pPred, const int32_t kiStride); + +void WelsDecoderIChromaPredV_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredH_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredDc_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredPlane_neon (uint8_t* pPred, const int32_t kiStride); +#endif//HAVE_NEON + +#if defined(HAVE_NEON_AARCH64) +void WelsDecoderI16x16LumaPredV_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredH_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDc_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDcTop_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDcLeft_AArch64_neon (uint8_t* pPred, const int32_t kiStride); + +void WelsDecoderI4x4LumaPredH_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDDL_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDDLTop_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredVL_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredVLTop_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredVR_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredHU_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredHD_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDc_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredDcTop_AArch64_neon (uint8_t* pPred, const int32_t kiStride); + +void WelsDecoderIChromaPredV_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredH_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredDc_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredPlane_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredDcTop_AArch64_neon (uint8_t* pPred, const int32_t kiStride); +#endif//HAVE_NEON_AARCH64 + +#if defined(HAVE_MMI) +void WelsDecoderI16x16LumaPredDc_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredPlane_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredH_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredV_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDcTop_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI16x16LumaPredDcNA_mmi (uint8_t* pPred, const int32_t kiStride); + +void WelsDecoderIChromaPredDcTop_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredPlane_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderIChromaPredDc_mmi (uint8_t* pPred, const int32_t kiStride); +void WelsDecoderI4x4LumaPredH_mmi (uint8_t* pPred, const int32_t kiStride); +#endif//HAVE_MMI +#if defined(__cplusplus) +} +#endif//__cplusplus + +} // namespace WelsDec + +#endif //WELS_GET_INTRA_PREDICTOR_H__ + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h new file mode 100644 index 000000000..164ae15d6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h @@ -0,0 +1,62 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file manage_dec_ref.h + * + * Abstract + * Interface for managing reference picture + * + * History + * 08/14/2009 Created + * + *****************************************************************************/ +#ifndef WELS_MANAGE_DEC_REF_H__ +#define WELS_MANAGE_DEC_REF_H__ + + +#include "typedefs.h" +#include "decoder_context.h" + +namespace WelsDec { + +void WelsResetRefPic (PWelsDecoderContext pCtx); +void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx); +int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc); +int32_t WelsInitBSliceRefList (PWelsDecoderContext pCtx, int32_t iPoc); +int32_t WelsReorderRefList (PWelsDecoderContext pCtx); +int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx); +int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec = NULL); + +} // namespace WelsDec + +#endif//WELS_MANAGE_DEC_REF_H__ + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/mb_cache.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/mb_cache.h new file mode 100644 index 000000000..45adc4dd1 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/mb_cache.h @@ -0,0 +1,80 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//mb_cache.h +#ifndef WELS_MACROBLOCK_CACHE_H__ +#define WELS_MACROBLOCK_CACHE_H__ + +#include "typedefs.h" + +namespace WelsDec { + +#define REF_NOT_AVAIL -2 +#define REF_NOT_IN_LIST -1 //intra + +/* + * MB Cache information, such one cache should be defined within a slice + */ +/* + * Cache for Luma Cache for Chroma(Cb, Cr) + * + * TL T T T T TL T T + * L - - - - L - - + * L - - - - L - - TR + * L - - - - + * L - - - - TR + * + */ + +////////////////////////mapping scan index//////////////////////// + +extern const uint8_t g_kuiScan4[16]; + +typedef struct TagNeighborAvail { +int32_t iTopAvail; +int32_t iLeftAvail; +int32_t iRightTopAvail; +int32_t iLeftTopAvail; //used for check intra_pred_mode avail or not //1: avail; 0: unavail + +int32_t iLeftType; +int32_t iTopType; +int32_t iLeftTopType; +int32_t iRightTopType; + +int8_t iTopCbp; +int8_t iLeftCbp; +int8_t iDummy[2]; //for align +} SWelsNeighAvail, *PWelsNeighAvail; + +} // namespace WelsDec + +#endif//WELS_MACROBLOCK_CACHE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/memmgr_nal_unit.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/memmgr_nal_unit.h new file mode 100644 index 000000000..380e32ca9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/memmgr_nal_unit.h @@ -0,0 +1,66 @@ +/*! + * \copy + * Copyright (c) 2008-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * memmgr_nal_unit.h + * + * Abstract + * memory manager utils for NAL Unit list available + * + * History + * 07/10/2008 Created + * + *****************************************************************************/ +#ifndef WELS_MEMORY_MANAGER_NAL_UNIT_H__ +#define WELS_MEMORY_MANAGER_NAL_UNIT_H__ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "nalu.h" +#include "memory_align.h" + +namespace WelsDec { + +int32_t MemInitNalList (PAccessUnit* ppAu, const uint32_t kuiSize, CMemoryAlign* pMa); + +int32_t MemFreeNalList (PAccessUnit* ppAu, CMemoryAlign* pMa); + +/* + * MemGetNextNal + * Get next NAL Unit for using. + * Need expand NAL Unit list if exceeding count number of available NAL Units withing an Access Unit + */ +PNalUnit MemGetNextNal (PAccessUnit* ppAu, CMemoryAlign* pMa); + +} // namespace WelsDec + +#endif//WELS_MEMORY_MANAGER_NAL_UNIT_H__ + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h new file mode 100644 index 000000000..943c38d53 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h @@ -0,0 +1,195 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file mv_pred.h + * + * \brief Get MV predictor and update motion vector of mb cache + * + * \date 05/22/2009 Created + * + ************************************************************************************* + */ + +#ifndef WELS_MV_PRED_H__ +#define WELS_MV_PRED_H__ + +#include "dec_frame.h" +#include "decoder_context.h" + +#define RETURN_ERR_IF_NULL(pRefPic0) \ +if ( pRefPic0 == NULL) \ + return GENERATE_ERROR_NO(ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX) + +namespace WelsDec { + +/*! +* \brief update mv and ref_index cache for current MB, only for P_16x16 (SKIP inclusive) +* \param +* \param +*/ +void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef, int16_t iMVs[2]); + +/*! +* \brief update ref_index cache for current MB, only for P_16x16 (SKIP inclusive) +* \param +* \param +*/ +void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef); + +/*! +* \brief update mv only cache for current MB, only for P_16x16 (SKIP inclusive) +* \param +* \param +*/ +void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs[2]); + +/*! +* \brief update mv and ref_index cache for current MB, only for P_16x8 +* \param +* \param +*/ +void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A], + int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]); + + +/*! + * \brief update mv and ref_index cache for current MB, only for P_8x16 + * \param + * \param + */ +void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A], + int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]); + +/*! + * \brief get the motion predictor for skip mode + * \param + * \param output iMvp[] + */ +void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]); + +/*! +* \brief get the motion predictor and reference for B-slice direct mode version 2 +* \param +* \param output iMvp[] and ref +*/ +int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A], + SubMbType& subMbType); + +/*! +* \brief get Colocated MB for both Spatial and Temporal Direct Mode +* \param +* \param output MbType and SubMbType +*/ +int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType); + +/*! +* \brief get the motion predictor for B-slice temporal direct mode 16x16 +*/ +int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A], + SubMbType& subMbType); + +/*! +* \brief get the motion params for B-slice spatial direct mode +* \param +* \param output iMvp[] +*/ + +/*! + * \brief get the motion predictor for 4*4 or 8*8 or 16*16 block + * \param + * \param output iMvp[] + */ +void PredMv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]); + +/*! + * \brief get the motion predictor for inter16x8 MB + * \param + * \param output mvp_x and mvp_y + */ +void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]); + +/*! + * \brief get the motion predictor for inter8x16 MB + * \param + * \param output mvp_x and mvp_y + */ +void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]); + +/*! +* \brief Fill the spatial direct motion vectors for 8x8 direct MB +* \param +* \param output motion vector cache and motion vector deviation cache +*/ +void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW, + const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A], + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]); + +/*! +* \brief Fill the temporal direct motion vectors for 8x8 direct MB +* \param +* \param output motion vector cache and motion vector deviation cache +*/ +void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, + const int8_t& iPartW, + const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]); + +/*! +* \brief returns ref_index in List_0 from the colocated ref_index in LIST_0. +* \param +* returns ref_index in List_0 of ref picture LIST_0 +*/ +int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0, + const int32_t& ref0Count); //ISO/IEC 14496-10:2009(E) (8-193) + +/*! +* \brief update ref_index cache for current MB, for 8x8 +* \param +* \param +*/ +void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef); + +inline uint32_t* GetMbType (PDqLayer& pCurDqLayer) { + if (pCurDqLayer->pDec != NULL) { + return pCurDqLayer->pDec->pMbType; + } else { + return pCurDqLayer->pMbType; + } +} + +} // namespace WelsDec + +#endif//WELS_MV_PRED_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/nal_prefix.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/nal_prefix.h new file mode 100644 index 000000000..2a2c41918 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/nal_prefix.h @@ -0,0 +1,56 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//nal_prefix.h - definitions for NAL Unit Header(/Ext) and PrefixNALUnit +#ifndef WELS_NAL_UNIT_PREFIX_H__ +#define WELS_NAL_UNIT_PREFIX_H__ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "slice.h" + +namespace WelsDec { + +///////////////////////////////////NAL Unit prefix/headers/////////////////////////////////// + +/* Prefix NAL Unix syntax, refer to Page 392 in JVT X201wcm */ +typedef struct TagPrefixNalUnit { + SRefBasePicMarking sRefPicBaseMarking; + bool bStoreRefBasePicFlag; + bool bPrefixNalUnitAdditionalExtFlag; + bool bPrefixNalUnitExtFlag; + bool bPrefixNalCorrectFlag; +} SPrefixNalUnit, *PPrefixNalUnit; + +} // namespace WelsDec + +#endif//WELS_NAL_UNIT_PREFIX_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/nalu.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/nalu.h new file mode 100644 index 000000000..d39e135e5 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/nalu.h @@ -0,0 +1,79 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//nalu.h: NAL Unit definition +#ifndef WELS_NAL_UNIT_H__ +#define WELS_NAL_UNIT_H__ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "nal_prefix.h" +#include "bit_stream.h" + +namespace WelsDec { + +///////////////////////////////////NAL UNIT level/////////////////////////////////// + +/* NAL Unit Structure */ +typedef struct TagNalUnit { + SNalUnitHeaderExt sNalHeaderExt; + + union { + struct SVclNal { + SSliceHeaderExt sSliceHeaderExt; + SBitStringAux sSliceBitsRead; + uint8_t* pNalPos; // save the address of slice nal for GPU function + int32_t iNalLength; // save the nal length for GPU function + bool bSliceHeaderExtFlag; + } sVclNal; + SPrefixNalUnit sPrefixNal; + } sNalData; + unsigned long long uiTimeStamp; +} SNalUnit, *PNalUnit; + +///////////////////////////////////ACCESS Unit level/////////////////////////////////// + +/* Access Unit structure */ +typedef struct TagAccessUnits { + PNalUnit* pNalUnitsList; // list of NAL Units pointer in this AU + uint32_t uiAvailUnitsNum; // Number of NAL Units available in each AU list based current bitstream, + uint32_t uiActualUnitsNum; // actual number of NAL units belong to current au +// While available number exceeds count size below, need realloc extra NAL Units for list space. + uint32_t uiCountUnitsNum; // Count size number of malloced NAL Units in each AU list + uint32_t uiStartPos; + uint32_t uiEndPos; + bool bCompletedAuFlag; // Indicate whether it is a completed AU +} SAccessUnit, *PAccessUnit; + +} // namespace WelsDec + +#endif//WELS_NAL_UNIT_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parameter_sets.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parameter_sets.h new file mode 100644 index 000000000..cbdaecdb2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parameter_sets.h @@ -0,0 +1,217 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_PARAMETER_SETS_H__ +#define WELS_PARAMETER_SETS_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "wels_common_basis.h" + +namespace WelsDec { +/* VUI syntax in Sequence Parameter Set, refer to E.1 in Rec */ +typedef struct TagVui { + bool bAspectRatioInfoPresentFlag; + uint32_t uiAspectRatioIdc; + uint32_t uiSarWidth; + uint32_t uiSarHeight; + bool bOverscanInfoPresentFlag; + bool bOverscanAppropriateFlag; + bool bVideoSignalTypePresentFlag; + uint8_t uiVideoFormat; + bool bVideoFullRangeFlag; + bool bColourDescripPresentFlag; + uint8_t uiColourPrimaries; + uint8_t uiTransferCharacteristics; + uint8_t uiMatrixCoeffs; + bool bChromaLocInfoPresentFlag; + uint32_t uiChromaSampleLocTypeTopField; + uint32_t uiChromaSampleLocTypeBottomField; + bool bTimingInfoPresentFlag; + uint32_t uiNumUnitsInTick; + uint32_t uiTimeScale; + bool bFixedFrameRateFlag; + bool bNalHrdParamPresentFlag; + bool bVclHrdParamPresentFlag; + bool bPicStructPresentFlag; + bool bBitstreamRestrictionFlag; + bool bMotionVectorsOverPicBoundariesFlag; + uint32_t uiMaxBytesPerPicDenom; + uint32_t uiMaxBitsPerMbDenom; + uint32_t uiLog2MaxMvLengthHorizontal; + uint32_t uiLog2MaxMvLengthVertical; + uint32_t uiMaxNumReorderFrames; + uint32_t uiMaxDecFrameBuffering; +} SVui, *PVui; + +/* Sequence Parameter Set, refer to Page 57 in JVT X201wcm */ +typedef struct TagSps { + int32_t iSpsId; + uint32_t iMbWidth; + uint32_t iMbHeight; + uint32_t uiTotalMbCount; //used in decode_slice_data() + + uint32_t uiLog2MaxFrameNum; + uint32_t uiPocType; + /* POC type 0 */ + int32_t iLog2MaxPocLsb; + /* POC type 1 */ + int32_t iOffsetForNonRefPic; + + int32_t iOffsetForTopToBottomField; + int32_t iNumRefFramesInPocCycle; + int8_t iOffsetForRefFrame[256]; + int32_t iNumRefFrames; + + SPosOffset sFrameCrop; + + ProfileIdc uiProfileIdc; + uint8_t uiLevelIdc; + uint8_t uiChromaFormatIdc; + uint8_t uiChromaArrayType; + + uint8_t uiBitDepthLuma; + uint8_t uiBitDepthChroma; + /* TO BE CONTINUE: POC type 1 */ + bool bDeltaPicOrderAlwaysZeroFlag; + bool bGapsInFrameNumValueAllowedFlag; + + bool bFrameMbsOnlyFlag; + bool bMbaffFlag; // MB Adapative Frame Field + bool bDirect8x8InferenceFlag; + bool bFrameCroppingFlag; + + bool bVuiParamPresentFlag; +// bool bTimingInfoPresentFlag; +// bool bFixedFrameRateFlag; + bool bConstraintSet0Flag; + bool bConstraintSet1Flag; + bool bConstraintSet2Flag; + bool bConstraintSet3Flag; + bool bSeparateColorPlaneFlag; + bool bQpPrimeYZeroTransfBypassFlag; + bool bSeqScalingMatrixPresentFlag; + bool bSeqScalingListPresentFlag[12]; + //Add scaling list supporting + uint8_t iScalingList4x4[6][16]; + uint8_t iScalingList8x8[6][64]; + SVui sVui; + const SLevelLimits* pSLevelLimits; +} SSps, *PSps; + + +/* Sequence Parameter Set extension syntax, refer to Page 58 in JVT X201wcm */ +//typedef struct TagSpsExt{ +// uint32_t iSpsId; +// uint32_t uiAuxFormatIdc; +// int32_t iAlphaOpaqueValue; +// int32_t iAlphaTransparentValue; + +// uint8_t uiBitDepthAux; +// bool bAlphaIncrFlag; +// bool bAdditionalExtFlag; +//}SSpsExt, *PSpsExt; + +/* Sequence Parameter Set extension syntax, refer to Page 391 in JVT X201wcm */ +typedef struct TagSpsSvcExt { + SPosOffset sSeqScaledRefLayer; + + uint8_t uiExtendedSpatialScalability; // ESS + uint8_t uiChromaPhaseXPlus1Flag; + uint8_t uiChromaPhaseYPlus1; + uint8_t uiSeqRefLayerChromaPhaseXPlus1Flag; + uint8_t uiSeqRefLayerChromaPhaseYPlus1; + bool bInterLayerDeblockingFilterCtrlPresentFlag; + bool bSeqTCoeffLevelPredFlag; + bool bAdaptiveTCoeffLevelPredFlag; + bool bSliceHeaderRestrictionFlag; +} SSpsSvcExt, *PSpsSvcExt; + +/* Subset sequence parameter set syntax, refer to Page 391 in JVT X201wcm */ +typedef struct TagSubsetSps { + SSps sSps; + SSpsSvcExt sSpsSvcExt; + bool bSvcVuiParamPresentFlag; + bool bAdditionalExtension2Flag; + bool bAdditionalExtension2DataFlag; +} SSubsetSps, *PSubsetSps; + +/* Picture parameter set syntax, refer to Page 59 in JVT X201wcm */ +typedef struct TagPps { + int32_t iSpsId; + int32_t iPpsId; + + uint32_t uiNumSliceGroups; + uint32_t uiSliceGroupMapType; + /* slice_group_map_type = 0 */ + uint32_t uiRunLength[MAX_SLICEGROUP_IDS]; + /* slice_group_map_type = 2 */ + uint32_t uiTopLeft[MAX_SLICEGROUP_IDS]; + uint32_t uiBottomRight[MAX_SLICEGROUP_IDS]; + /* slice_group_map_type = 3, 4 or 5 */ + uint32_t uiSliceGroupChangeRate; + /* slice_group_map_type = 6 */ + uint32_t uiPicSizeInMapUnits; + uint32_t uiSliceGroupId[MAX_SLICEGROUP_IDS]; + + uint32_t uiNumRefIdxL0Active; + uint32_t uiNumRefIdxL1Active; + + int32_t iPicInitQp; + int32_t iPicInitQs; + int32_t iChromaQpIndexOffset[2];//cb,cr + + bool bEntropyCodingModeFlag; + bool bPicOrderPresentFlag; + /* slice_group_map_type = 3, 4 or 5 */ + bool bSliceGroupChangeDirectionFlag; + bool bDeblockingFilterControlPresentFlag; + + bool bConstainedIntraPredFlag; + bool bRedundantPicCntPresentFlag; + bool bWeightedPredFlag; + uint8_t uiWeightedBipredIdc; + + bool bTransform8x8ModeFlag; + //Add for scalinglist support + bool bPicScalingMatrixPresentFlag; + bool bPicScalingListPresentFlag[12]; + uint8_t iScalingList4x4[6][16]; + uint8_t iScalingList8x8[6][64]; + + int32_t iSecondChromaQPIndexOffset; //second_chroma_qp_index_offset + +} SPps, *PPps; + +} // namespace WelsDec + +#endif //WELS_PARAMETER_SETS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h new file mode 100644 index 000000000..8f31543ae --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h @@ -0,0 +1,89 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file parse_mb_syn_cabac.h + * + * \brief cabac parse for syntax elements + * + * \date 10/10/2014 Created + * + ************************************************************************************* + */ +#ifndef WELS_PARSE_MB_SYN_CABAC_H__ +#define WELS_PARSE_MB_SYN_CABAC_H__ + +#include "decoder_context.h" +#include "cabac_decoder.h" +namespace WelsDec { +int32_t ParseEndOfSliceCabac (PWelsDecoderContext pCtx, uint32_t& uiBinVal); +int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip); +int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal); +int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal); +int32_t ParseMBTypeBSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal); +int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, + bool& bTransformSize8x8Flag); +int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType); +int32_t ParseBSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType); +int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal); +int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal); +int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]); +int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30], + int8_t pDirect[30]); +int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* nzc, + int8_t ref_idx[LIST_A][30], int8_t direct[30], + int32_t iListIdx, int32_t index, int32_t iActiveRefNum, int32_t b8mode, int8_t& iRefIdxVal); +int32_t ParseMvdInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t pRefIndex[LIST_A][30], + int16_t pMvdCache[LIST_A][30][2], int32_t index, int8_t iListIdx, int8_t iMvComp, int16_t& iMvdVal); +int32_t ParseCbpInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal); +int32_t ParseDeltaQpCabac (PWelsDecoderContext pCtx, int32_t& iQpDelta); +int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int32_t index, int32_t iResProperty, + PWelsDecoderContext pCtx, uint32_t& uiCbpBit); +int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx, + uint32_t& uiBinVal); +int32_t ParseSignificantCoeffCabac (int32_t* significant, int32_t iResProperty, PWelsDecoderContext pCtx); +int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux, + int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp, + PWelsDecoderContext pCtx); +int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux, + int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp, + PWelsDecoderContext pCtx); +int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx); +void UpdateP16x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvd[2], const int8_t iListIdx); +void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef, + const int8_t iListIdx); +void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx); +void UpdateP16x16DirectCabac (PDqLayer pCurDqLayer); +void UpdateP8x8RefCacheIdxCabac (int8_t pRefIndex[LIST_A][30], const int16_t& iPartIdx, const int32_t& listIdx, + const int8_t& iRef); +} +//#pragma pack() +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h new file mode 100644 index 000000000..d04855164 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h @@ -0,0 +1,137 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file parse_mb_syn_cavlc.h + * + * \brief Parsing all syntax elements of mb and decoding residual with cavlc + * + * \date 03/17/2009 Created + * + ************************************************************************************* + */ + + +#ifndef WELS_PARSE_MB_SYN_CAVLC_H__ +#define WELS_PARSE_MB_SYN_CAVLC_H__ + +#include "wels_common_basis.h" +#include "decoder_context.h" +#include "dec_frame.h" +#include "slice.h" + +namespace WelsDec { + + + +void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer); +void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurDqLayer); +void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, + PDqLayer pCurDqLayer); +void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, + PDqLayer pCurDqLayer); +void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], + PDqLayer pCurDqLayer); +void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer); +void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer); + +/*! + * \brief check iPredMode for intra16x16 eligible or not + * \param input : current iPredMode + * \param output: 0 indicating decoding correctly; -1 means error occurence + */ +int32_t CheckIntra16x16PredMode (uint8_t uiSampleAvail, int8_t* pMode); + +/*! + * \brief check iPredMode for intraNxN eligible or not + * \param input : current iPredMode + * \param output: 0 indicating decoding correctly; -1 means error occurence + */ +int32_t CheckIntraNxNPredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex, bool b8x8); + +/*! + * \brief check iPredMode for chroma eligible or not + * \param input : current iPredMode + * \param output: 0 indicating decoding correctly; -1 means error occurence + */ +int32_t CheckIntraChromaPredMode (uint8_t uiSampleAvail, int8_t* pMode); + +/*! + * \brief predict the mode of intra4x4 + * \param input : current intra4x4 block index + * \param output: mode index + */ +int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4); + + +void BsStartCavlc (PBitStringAux pBs); +void BsEndCavlc (PBitStringAux pBs); + +int32_t WelsResidualBlockCavlc (SVlcTable* pVlcTable, + uint8_t* pNonZeroCountCache, + PBitStringAux pBs, + /*int16_t* coeff_level,*/ + int32_t iIndex, + int32_t iMaxNumCoeff, + const uint8_t* kpZigzagTable, + int32_t iResidualProperty, + /*short *tCoeffLevel,*/ + int16_t* pTCoeff, + uint8_t uiQp, + PWelsDecoderContext pCtx); + +// Transform8x8 +int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable, + uint8_t* pNonZeroCountCache, + PBitStringAux pBs, + /*int16_t* coeff_level,*/ + int32_t iIndex, + int32_t iMaxNumCoeff, + const uint8_t* kpZigzagTable, + int32_t iResidualProperty, + /*short *tCoeffLevel,*/ + int16_t* pTCoeff, + int32_t iIdx4x4, + uint8_t uiQp, + PWelsDecoderContext pCtx); + +/*! + * \brief parsing inter info (including ref_index and pMvd) + * \param input : decoding context, current mb, bit-stream + * \param output: 0 indicating decoding correctly; -1 means error + */ +int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], + PBitStringAux pBs); +int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], + int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs); +} // namespace WelsDec +#endif//WELS_PARSE_MB_SYN_CAVLC_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h new file mode 100644 index 000000000..473f80ff1 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h @@ -0,0 +1,62 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//pic_queue.h +#ifndef WELS_PICTURE_QUEUE_H__ +#define WELS_PICTURE_QUEUE_H__ + + +#include "picture.h" + +namespace WelsDec { + +#define PICTURE_RESOLUTION_ALIGNMENT 32 + + +typedef struct TagPicBuff { + PPicture* ppPic; + int32_t iCapacity; // capacity size of queue + int32_t iCurrentIdx; +} SPicBuff, *PPicBuff; + +/* + * Interfaces + */ + +PPicture PrefetchPic (PPicBuff pPicBuff); // To get current node applicable +PPicture PrefetchPicForThread (PPicBuff pPicBuff); // To get current node applicable in the case of threaded mode +PPicture PrefetchLastPicForThread (PPicBuff pPicBuff, + const int32_t& iLast); // To get last node applicable in the case of threaded mode + +} // namespace WelsDec + +#endif//WELS_PICTURE_QUEUE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/picture.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/picture.h new file mode 100644 index 000000000..bdacc364c --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/picture.h @@ -0,0 +1,111 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//picture.h - reconstruction picture/ reference picture/ residual picture are declared here +#ifndef WELS_PICTURE_H__ +#define WELS_PICTURE_H__ + +#include "typedefs.h" +#include "wels_common_defs.h" +#include "wels_const_common.h" +#include "wels_decoder_thread.h" + +using namespace WelsCommon; + +namespace WelsDec { + +/* +* Reconstructed Picture definition +* It is used to express reference picture, also consequent reconstruction picture for output +*/ + +struct SPicture { + /************************************payload data*********************************/ + uint8_t* pBuffer[4]; // pointer to the first allocated byte, basical offset of buffer, dimension: + uint8_t* pData[4]; // pointer to picture planes respectively + int32_t iLinesize[4];// linesize of picture planes respectively used currently + int32_t iPlanes; // How many planes are introduced due to color space format? +// picture information + + /*******************************from EC mv copy****************************/ + bool bIdrFlag; + + /*******************************from other standard syntax****************************/ + /*from sps*/ + int32_t iWidthInPixel; // picture width in pixel + int32_t iHeightInPixel;// picture height in pixel + /*from slice header*/ + int32_t iFramePoc; // frame POC + + /*******************************sef_definition for misc use****************************/ + bool bUsedAsRef; //for ref pic management + bool bIsLongRef; // long term reference frame flag //for ref pic management + int8_t iRefCount; + + bool bIsComplete; // indicate whether current picture is complete, not from EC + /*******************************for future use****************************/ + uint8_t uiTemporalId; + uint8_t uiSpatialId; + uint8_t uiQualityId; + + int32_t iFrameNum; // frame number //for ref pic management + int32_t iFrameWrapNum; // frame wrap number //for ref pic management + int32_t iLongTermFrameIdx; //id for long term ref pic + uint32_t uiLongTermPicNum; //long_term_pic_num + + int32_t iSpsId; //against mosaic caused by cross-IDR interval reference. + int32_t iPpsId; + unsigned long long uiTimeStamp; + uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps + int32_t iPicBuffIdx; + EWelsSliceType eSliceType; + bool bIsUngroupedMultiSlice; //multi-slice picture with each each slice group contains one slice. + bool bNewSeqBegin; + int32_t iMbEcedNum; + int32_t iMbEcedPropNum; + int32_t iMbNum; + + bool* pMbCorrectlyDecodedFlag; + int8_t (*pNzc)[24]; + uint32_t* pMbType; // mb type used for direct mode + int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; // used for direct mode + int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; //used for direct mode + struct SPicture* pRefPic[LIST_A][17]; //ref pictures used for direct mode + SWelsDecEvent* pReadyEvent; //MB line ready event + +};// "Picture" declaration is comflict with Mac system + +typedef struct SPicture* PPicture; + +} // namespace WelsDec + +#endif//WELS_PICTURE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h new file mode 100644 index 000000000..be0c4a740 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h @@ -0,0 +1,104 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file rec_mb.h + * + * \brief interfaces for all macroblock decoding process after mb syntax parsing and residual decoding with cavlc. + * + * \date 3/4/2009 Created + * + ************************************************************************************* + */ + +#ifndef WELS_REC_MB_H__ +#define WELS_REC_MB_H__ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "error_code.h" + +#include "decoder_context.h" + +namespace WelsDec { + +#define WELS_B_MB_REC_VERIFY(uiRet) do{ \ + uint32_t uiRetTmp = (uint32_t)uiRet; \ + if( uiRetTmp != ERR_NONE ) \ + return uiRetTmp; \ +}while(0) + +typedef struct TagMCRefMember { + uint8_t* pDstY; + uint8_t* pDstU; + uint8_t* pDstV; + + uint8_t* pSrcY; + uint8_t* pSrcU; + uint8_t* pSrcV; + + int32_t iSrcLineLuma; + int32_t iSrcLineChroma; + + int32_t iDstLineLuma; + int32_t iDstLineChroma; + + int32_t iPicWidth; + int32_t iPicHeight; +} sMCRefMember; + +void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx, + int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc, + int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]); + +void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer); + +int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); + +int32_t RecI4x4Luma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); + +int32_t RecI4x4Chroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); + +int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); + +int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); + +int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); + +int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); + +int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx); + +int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx); + +} // namespace WelsDec + +#endif //WELS_REC_MB_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/slice.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/slice.h new file mode 100644 index 000000000..9295eb71b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/slice.h @@ -0,0 +1,208 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_slice.h +#ifndef WELS_SLICE_H__ +#define WELS_SLICE_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "wels_common_basis.h" +#include "picture.h" +#include "parameter_sets.h" + +namespace WelsDec { + +/* + * Reference picture list reordering syntax, refer to page 64 in JVT X201wcm + */ +typedef struct TagRefPicListReorderSyntax { + struct { + uint32_t uiAbsDiffPicNumMinus1; + uint16_t uiLongTermPicNum; + uint16_t uiReorderingOfPicNumsIdc; + } sReorderingSyn[LIST_A][MAX_REF_PIC_COUNT]; + bool bRefPicListReorderingFlag[LIST_A]; +} SRefPicListReorderSyn, *PRefPicListReorderSyn; + +/* + * Prediction weight table syntax, refer to page 65 in JVT X201wcm + */ +typedef struct TagPredWeightTabSyntax { + uint32_t uiLumaLog2WeightDenom; + uint32_t uiChromaLog2WeightDenom; + struct { + int32_t iLumaWeight[MAX_REF_PIC_COUNT]; + int32_t iLumaOffset[MAX_REF_PIC_COUNT]; + int32_t iChromaWeight[MAX_REF_PIC_COUNT][2]; + int32_t iChromaOffset[MAX_REF_PIC_COUNT][2]; + bool bLumaWeightFlag; + bool bChromaWeightFlag; + } sPredList[LIST_A]; + int32_t iImplicitWeight[MAX_REF_PIC_COUNT][MAX_REF_PIC_COUNT]; +} SPredWeightTabSyn, *PPredWeightTabSyn; + +/* Decoded reference picture marking syntax, refer to Page 66 in JVT X201wcm */ +typedef struct TagRefPicMarking { + struct { + uint32_t uiMmcoType; + int32_t iShortFrameNum; + int32_t iDiffOfPicNum; + uint32_t uiLongTermPicNum; + int32_t iLongTermFrameIdx; + int32_t iMaxLongTermFrameIdx; + } sMmcoRef[MAX_MMCO_COUNT]; + + bool bNoOutputOfPriorPicsFlag; + bool bLongTermRefFlag; + bool bAdaptiveRefPicMarkingModeFlag; +} SRefPicMarking, *PRefPicMarking; + +/* Decode reference base picture marking syntax in Page 396 of JVT X201wcm */ +typedef struct TagRefBasePicMarkingSyn { + struct { + uint32_t uiMmcoType; + int32_t iShortFrameNum; + uint32_t uiDiffOfPicNums; + uint32_t uiLongTermPicNum; //should uint32_t, cover larger range of iFrameNum. + } mmco_base[MAX_MMCO_COUNT]; // MAX_REF_PIC for reference picture based on frame + + bool bAdaptiveRefBasePicMarkingModeFlag; +} SRefBasePicMarking, *PRefBasePicMarking; + +/* Header of slice syntax elements, refer to Page 63 in JVT X201wcm */ +typedef struct TagSliceHeaders { + /*****************************slice header syntax and generated****************************/ + int32_t iFirstMbInSlice; + int32_t iFrameNum; + int32_t iPicOrderCntLsb; + int32_t iDeltaPicOrderCntBottom; + int32_t iDeltaPicOrderCnt[2]; + int32_t iRedundantPicCnt; + int32_t iDirectSpatialMvPredFlag; //!< Direct Mode type to be used (0: Temporal, 1: Spatial) + int32_t uiRefCount[LIST_A]; + int32_t iSliceQpDelta; //no use for iSliceQp is used directly + int32_t iSliceQp; + int32_t iSliceQsDelta; // For SP/SI slices + uint32_t uiDisableDeblockingFilterIdc; + int32_t iSliceAlphaC0Offset; + int32_t iSliceBetaOffset; + int32_t iSliceGroupChangeCycle; + + PSps pSps; + PPps pPps; + int32_t iSpsId; + int32_t iPpsId; + bool bIdrFlag; + + /*********************got from other layer for efficency if possible*********************/ + SRefPicListReorderSyn pRefPicListReordering; // Reference picture list reordering syntaxs + SPredWeightTabSyn sPredWeightTable; + int32_t iCabacInitIdc; + int32_t iMbWidth; //from? + int32_t iMbHeight; //from? + SRefPicMarking sRefMarking; // Decoded reference picture marking syntaxs + + uint16_t uiIdrPicId; + EWelsSliceType eSliceType; + bool bNumRefIdxActiveOverrideFlag; + bool bFieldPicFlag; //not supported in base profile + bool bBottomFiledFlag; //not supported in base profile + uint8_t uiPadding1Byte; + bool bSpForSwitchFlag; // For SP/SI slices + int16_t iPadding2Bytes; +} SSliceHeader, *PSliceHeader; + + +/* Slice header in scalable extension syntax, refer to Page 394 in JVT X201wcm */ +typedef struct TagSliceHeaderExt { + SSliceHeader sSliceHeader; + PSubsetSps pSubsetSps; + + uint32_t uiDisableInterLayerDeblockingFilterIdc; + int32_t iInterLayerSliceAlphaC0Offset; + int32_t iInterLayerSliceBetaOffset; + +//SPosOffset sScaledRefLayer; + int32_t iScaledRefLayerPicWidthInSampleLuma; + int32_t iScaledRefLayerPicHeightInSampleLuma; + + SRefBasePicMarking sRefBasePicMarking; + bool bBasePredWeightTableFlag; + bool bStoreRefBasePicFlag; + bool bConstrainedIntraResamplingFlag; + bool bSliceSkipFlag; + + bool bAdaptiveBaseModeFlag; + bool bDefaultBaseModeFlag; + bool bAdaptiveMotionPredFlag; + bool bDefaultMotionPredFlag; + bool bAdaptiveResidualPredFlag; + bool bDefaultResidualPredFlag; + bool bTCoeffLevelPredFlag; + uint8_t uiRefLayerChromaPhaseXPlus1Flag; + + uint8_t uiRefLayerChromaPhaseYPlus1; + uint8_t uiRefLayerDqId; + uint8_t uiScanIdxStart; + uint8_t uiScanIdxEnd; +} SSliceHeaderExt, *PSliceHeaderExt; + + +typedef struct TagSlice { + /*******************************slice_header****************************/ + SSliceHeaderExt sSliceHeaderExt; + + /*******************************use for future****************************/ +// for Macroblock coding within slice + int32_t iLastMbQp; // stored qp for last mb coded, maybe more efficient for mb skip detection etc. + + /*******************************slice_data****************************/ + /*slice_data_ext()*/ + int32_t iMbSkipRun; + int32_t iTotalMbInCurSlice; //record the total number of MB in current slice. + + /*slice_data_ext() generate*/ + + /*******************************misc use****************************/ + bool bSliceHeaderExtFlag; // Indicate which slice header is used, avc or ext? + /*************got from other layer for effiency if possible***************/ + /*from lower layer: slice header*/ + uint8_t eSliceType; + uint8_t uiPadding[2]; + int32_t iLastDeltaQp; + int16_t iMvScale[LIST_A][MAX_DPB_COUNT]; //Moton vector scale For Temporal Direct Mode Type +} SSlice, *PSlice; + +} // namespace WelsDec + +#endif//WELS_SLICE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/vlc_decoder.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/vlc_decoder.h new file mode 100644 index 000000000..9f19700d2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/vlc_decoder.h @@ -0,0 +1,177 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_VLC_DECODER_H__ +#define WELS_VLC_DECODER_H__ + +#include "bit_stream.h" +#include "dec_golomb.h" + +namespace WelsDec { + +typedef struct TagVlcTable { +const uint8_t (*kpCoeffTokenVlcTable[4][8])[2]; +const uint8_t (*kpChromaCoeffTokenVlcTable)[2]; +const uint8_t (*kpZeroTable[7])[2]; +const uint8_t (*kpTotalZerosTable[2][15])[2]; +} SVlcTable; + +// for data sharing cross modules and try to reduce size of binary generated +extern const uint8_t g_kuiVlcChromaTable[256][2]; +extern const uint8_t g_kuiVlcTable_0[256][2]; +extern const uint8_t g_kuiVlcTable_0_0[256][2]; +extern const uint8_t g_kuiVlcTable_0_1[4][2]; +extern const uint8_t g_kuiVlcTable_0_2[2][2]; +extern const uint8_t g_kuiVlcTable_0_3[2][2]; +extern const uint8_t g_kuiVlcTable_1[256][2]; +extern const uint8_t g_kuiVlcTable_1_0[64][2]; +extern const uint8_t g_kuiVlcTable_1_1[8][2]; +extern const uint8_t g_kuiVlcTable_1_2[2][2]; +extern const uint8_t g_kuiVlcTable_1_3[2][2]; +extern const uint8_t g_kuiVlcTable_2[256][2]; +extern const uint8_t g_kuiVlcTable_2_0[4][2]; +extern const uint8_t g_kuiVlcTable_2_1[4][2]; +extern const uint8_t g_kuiVlcTable_2_2[4][2]; +extern const uint8_t g_kuiVlcTable_2_3[4][2]; +extern const uint8_t g_kuiVlcTable_2_4[2][2]; +extern const uint8_t g_kuiVlcTable_2_5[2][2]; +extern const uint8_t g_kuiVlcTable_2_6[2][2]; +extern const uint8_t g_kuiVlcTable_2_7[2][2]; +extern const uint8_t g_kuiVlcTable_3[64][2]; +extern const uint8_t g_kuiVlcTableNeedMoreBitsThread[3]; +extern const uint8_t g_kuiVlcTableMoreBitsCount0[4]; +extern const uint8_t g_kuiVlcTableMoreBitsCount1[4]; +extern const uint8_t g_kuiVlcTableMoreBitsCount2[8]; +extern const uint8_t g_kuiNcMapTable[17]; +extern const uint8_t g_kuiVlcTrailingOneTotalCoeffTable[62][2]; +extern const uint8_t g_kuiTotalZerosTable0[512][2]; +extern const uint8_t g_kuiTotalZerosTable1[64][2]; +extern const uint8_t g_kuiTotalZerosTable2[64][2]; +extern const uint8_t g_kuiTotalZerosTable3[32][2]; +extern const uint8_t g_kuiTotalZerosTable4[32][2]; +extern const uint8_t g_kuiTotalZerosTable5[64][2]; +extern const uint8_t g_kuiTotalZerosTable6[64][2]; +extern const uint8_t g_kuiTotalZerosTable7[64][2]; +extern const uint8_t g_kuiTotalZerosTable8[64][2]; +extern const uint8_t g_kuiTotalZerosTable9[32][2]; +extern const uint8_t g_kuiTotalZerosTable10[16][2]; +extern const uint8_t g_kuiTotalZerosTable11[16][2]; +extern const uint8_t g_kuiTotalZerosTable12[8][2]; +extern const uint8_t g_kuiTotalZerosTable13[4][2]; +extern const uint8_t g_kuiTotalZerosTable14[2][2]; +extern const uint8_t g_kuiTotalZerosBitNumMap[15]; +extern const uint8_t g_kuiTotalZerosChromaTable0[8][2]; +extern const uint8_t g_kuiTotalZerosChromaTable1[4][2]; +extern const uint8_t g_kuiTotalZerosChromaTable2[2][2]; +extern const uint8_t g_kuiTotalZerosBitNumChromaMap[3]; +extern const uint8_t g_kuiZeroLeftTable0[2][2]; +extern const uint8_t g_kuiZeroLeftTable1[4][2]; +extern const uint8_t g_kuiZeroLeftTable2[4][2]; +extern const uint8_t g_kuiZeroLeftTable3[8][2]; +extern const uint8_t g_kuiZeroLeftTable4[8][2]; +extern const uint8_t g_kuiZeroLeftTable5[8][2]; +extern const uint8_t g_kuiZeroLeftTable6[8][2]; +extern const uint8_t g_kuiZeroLeftBitNumMap[16]; + +#if defined(_MSC_VER) && defined(_M_IX86) +//TODO need linux version +#define WELS_GET_PREFIX_BITS(inval,outval){\ + uint32_t local = inval;\ + __asm xor eax, eax\ + __asm bsr eax, local\ + __asm sub eax, 32\ + __asm neg eax\ + __asm mov outval, eax\ +} +#else +#define WELS_GET_PREFIX_BITS(inval, outval) outval = GetPrefixBits(inval) +#endif + +static inline void InitVlcTable (SVlcTable* pVlcTable) { +pVlcTable->kpChromaCoeffTokenVlcTable = g_kuiVlcChromaTable; + +pVlcTable->kpCoeffTokenVlcTable[0][0] = g_kuiVlcTable_0; +pVlcTable->kpCoeffTokenVlcTable[0][1] = g_kuiVlcTable_1; +pVlcTable->kpCoeffTokenVlcTable[0][2] = g_kuiVlcTable_2; +pVlcTable->kpCoeffTokenVlcTable[0][3] = g_kuiVlcTable_3; + +pVlcTable->kpCoeffTokenVlcTable[1][0] = g_kuiVlcTable_0_0; +pVlcTable->kpCoeffTokenVlcTable[1][1] = g_kuiVlcTable_0_1; +pVlcTable->kpCoeffTokenVlcTable[1][2] = g_kuiVlcTable_0_2; +pVlcTable->kpCoeffTokenVlcTable[1][3] = g_kuiVlcTable_0_3; + +pVlcTable->kpCoeffTokenVlcTable[2][0] = g_kuiVlcTable_1_0; +pVlcTable->kpCoeffTokenVlcTable[2][1] = g_kuiVlcTable_1_1; +pVlcTable->kpCoeffTokenVlcTable[2][2] = g_kuiVlcTable_1_2; +pVlcTable->kpCoeffTokenVlcTable[2][3] = g_kuiVlcTable_1_3; + +pVlcTable->kpCoeffTokenVlcTable[3][0] = g_kuiVlcTable_2_0; +pVlcTable->kpCoeffTokenVlcTable[3][1] = g_kuiVlcTable_2_1; +pVlcTable->kpCoeffTokenVlcTable[3][2] = g_kuiVlcTable_2_2; +pVlcTable->kpCoeffTokenVlcTable[3][3] = g_kuiVlcTable_2_3; +pVlcTable->kpCoeffTokenVlcTable[3][4] = g_kuiVlcTable_2_4; +pVlcTable->kpCoeffTokenVlcTable[3][5] = g_kuiVlcTable_2_5; +pVlcTable->kpCoeffTokenVlcTable[3][6] = g_kuiVlcTable_2_6; +pVlcTable->kpCoeffTokenVlcTable[3][7] = g_kuiVlcTable_2_7; + +pVlcTable->kpZeroTable[0] = g_kuiZeroLeftTable0; +pVlcTable->kpZeroTable[1] = g_kuiZeroLeftTable1; +pVlcTable->kpZeroTable[2] = g_kuiZeroLeftTable2; +pVlcTable->kpZeroTable[3] = g_kuiZeroLeftTable3; +pVlcTable->kpZeroTable[4] = g_kuiZeroLeftTable4; +pVlcTable->kpZeroTable[5] = g_kuiZeroLeftTable5; +pVlcTable->kpZeroTable[6] = g_kuiZeroLeftTable6; + +pVlcTable->kpTotalZerosTable[0][0] = g_kuiTotalZerosTable0; +pVlcTable->kpTotalZerosTable[0][1] = g_kuiTotalZerosTable1; +pVlcTable->kpTotalZerosTable[0][2] = g_kuiTotalZerosTable2; +pVlcTable->kpTotalZerosTable[0][3] = g_kuiTotalZerosTable3; +pVlcTable->kpTotalZerosTable[0][4] = g_kuiTotalZerosTable4; +pVlcTable->kpTotalZerosTable[0][5] = g_kuiTotalZerosTable5; +pVlcTable->kpTotalZerosTable[0][6] = g_kuiTotalZerosTable6; +pVlcTable->kpTotalZerosTable[0][7] = g_kuiTotalZerosTable7; +pVlcTable->kpTotalZerosTable[0][8] = g_kuiTotalZerosTable8; +pVlcTable->kpTotalZerosTable[0][9] = g_kuiTotalZerosTable9; +pVlcTable->kpTotalZerosTable[0][10] = g_kuiTotalZerosTable10; +pVlcTable->kpTotalZerosTable[0][11] = g_kuiTotalZerosTable11; +pVlcTable->kpTotalZerosTable[0][12] = g_kuiTotalZerosTable12; +pVlcTable->kpTotalZerosTable[0][13] = g_kuiTotalZerosTable13; +pVlcTable->kpTotalZerosTable[0][14] = g_kuiTotalZerosTable14; +pVlcTable->kpTotalZerosTable[1][0] = g_kuiTotalZerosChromaTable0; +pVlcTable->kpTotalZerosTable[1][1] = g_kuiTotalZerosChromaTable1; +pVlcTable->kpTotalZerosTable[1][2] = g_kuiTotalZerosChromaTable2; + +} + +} // namespace WelsDec + +#endif//WELS_VLC_DECODER_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h new file mode 100644 index 000000000..19aae110e --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h @@ -0,0 +1,317 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_common_basis.h +#ifndef WELS_COMMON_BASIS_H__ +#define WELS_COMMON_BASIS_H__ + +#include "typedefs.h" +#include "macros.h" + +#include "wels_common_defs.h" + +using namespace WelsCommon; + +namespace WelsDec { + +/*common use table*/ +extern const uint8_t g_kuiScan8[24]; +extern const uint8_t g_kuiLumaDcZigzagScan[16]; +extern const uint8_t g_kuiChromaDcScan[4]; +extern const uint8_t g_kMbNonZeroCountIdx[24]; +extern const uint8_t g_kCacheNzcScanIdx[4 * 4 + 4 + 4 + 3]; +extern const uint8_t g_kCache26ScanIdx[16]; +extern const uint8_t g_kCache30ScanIdx[16]; +extern const uint8_t g_kNonZeroScanIdxC[4]; +/* Profile IDC */ +typedef uint8_t ProfileIdc; + +/* Position Offset structure */ +typedef struct TagPosOffset { + int32_t iLeftOffset; + int32_t iTopOffset; + int32_t iRightOffset; + int32_t iBottomOffset; +} SPosOffset; + +/* MB Type & Sub-MB Type */ +typedef uint32_t MbType; +typedef uint32_t SubMbType; + +#define I16_LUMA_DC 1 +#define I16_LUMA_AC 2 +#define LUMA_DC_AC 3 +#define CHROMA_DC 4 +#define CHROMA_AC 5 +#define LUMA_DC_AC_8 6 +#define CHROMA_DC_U 7 +#define CHROMA_DC_V 8 +#define CHROMA_AC_U 9 +#define CHROMA_AC_V 10 +#define LUMA_DC_AC_INTRA 11 +#define LUMA_DC_AC_INTER 12 +#define CHROMA_DC_U_INTER 13 +#define CHROMA_DC_V_INTER 14 +#define CHROMA_AC_U_INTER 15 +#define CHROMA_AC_V_INTER 16 +#define LUMA_DC_AC_INTRA_8 17 +#define LUMA_DC_AC_INTER_8 18 + +#define SHIFT_BUFFER(pBitsCache) { pBitsCache->pBuf+=2; pBitsCache->uiRemainBits += 16; pBitsCache->uiCache32Bit |= (((pBitsCache->pBuf[2] << 8) | pBitsCache->pBuf[3]) << (32 - pBitsCache->uiRemainBits)); } +#define POP_BUFFER(pBitsCache, iCount) { pBitsCache->uiCache32Bit <<= iCount; pBitsCache->uiRemainBits -= iCount; } + +static const uint8_t g_kuiZigzagScan[16] = { //4*4block residual zig-zag scan order + 0, 1, 4, 8, + 5, 2, 3, 6, + 9, 12, 13, 10, + 7, 11, 14, 15, +}; + +static const uint8_t g_kuiZigzagScan8x8[64] = { //8x8 block residual zig-zag scan order + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, +}; + +static const uint8_t g_kuiIdx2CtxSignificantCoeffFlag8x8[64] = { // Table 9-43, Page 289 + 0, 1, 2, 3, 4, 5, 5, 4, + 4, 3, 3, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 3, 3, 6, 7, + 7, 7, 8, 9, 10, 9, 8, 7, + 7, 6, 11, 12, 13, 11, 6, 7, + 8, 9, 14, 10, 9, 8, 6, 11, + 12, 13, 11, 6, 9, 14, 10, 9, + 11, 12, 13, 11, 14, 10, 12, 14, +}; + +static const uint8_t g_kuiIdx2CtxLastSignificantCoeffFlag8x8[64] = { // Table 9-43, Page 289 + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, + 7, 7, 7, 7, 8, 8, 8, 8, +}; + +static inline void GetMbResProperty (int32_t* pMBproperty, int32_t* pResidualProperty, bool bCavlc) { + switch (*pResidualProperty) { + case CHROMA_AC_U: + *pMBproperty = 1; + *pResidualProperty = bCavlc ? CHROMA_AC : CHROMA_AC_U; + break; + case CHROMA_AC_V: + *pMBproperty = 2; + *pResidualProperty = bCavlc ? CHROMA_AC : CHROMA_AC_V; + break; + case LUMA_DC_AC_INTRA: + *pMBproperty = 0; + *pResidualProperty = LUMA_DC_AC; + break; + case CHROMA_DC_U: + *pMBproperty = 1; + *pResidualProperty = bCavlc ? CHROMA_DC : CHROMA_DC_U; + break; + case CHROMA_DC_V: + *pMBproperty = 2; + *pResidualProperty = bCavlc ? CHROMA_DC : CHROMA_DC_V; + break; + case I16_LUMA_AC: + *pMBproperty = 0; + break; + case I16_LUMA_DC: + *pMBproperty = 0; + break; + case LUMA_DC_AC_INTER: + *pMBproperty = 3; + *pResidualProperty = LUMA_DC_AC; + break; + case CHROMA_DC_U_INTER: + *pMBproperty = 4; + *pResidualProperty = bCavlc ? CHROMA_DC : CHROMA_DC_U; + break; + case CHROMA_DC_V_INTER: + *pMBproperty = 5; + *pResidualProperty = bCavlc ? CHROMA_DC : CHROMA_DC_V; + break; + case CHROMA_AC_U_INTER: + *pMBproperty = 4; + *pResidualProperty = bCavlc ? CHROMA_AC : CHROMA_AC_U; + break; + case CHROMA_AC_V_INTER: + *pMBproperty = 5; + *pResidualProperty = bCavlc ? CHROMA_AC : CHROMA_AC_V; + break; + // Reference to Table 7-2 + case LUMA_DC_AC_INTRA_8: + *pMBproperty = 6; + *pResidualProperty = LUMA_DC_AC_8; + break; + case LUMA_DC_AC_INTER_8: + *pMBproperty = 7; + *pResidualProperty = LUMA_DC_AC_8; + break; + } +} + +typedef struct TagI16PredInfo { + int8_t iPredMode; + int8_t iLeftAvail; + int8_t iTopAvail; + int8_t iLeftTopAvail; +} SI16PredInfo; +static const SI16PredInfo g_ksI16PredInfo[4] = { + {I16_PRED_V, 0, 1, 0}, + {I16_PRED_H, 1, 0, 0}, + { 0, 0, 0, 0}, + {I16_PRED_P, 1, 1, 1}, +}; + +static const SI16PredInfo g_ksChromaPredInfo[4] = { + { 0, 0, 0, 0}, + {C_PRED_H, 1, 0, 0}, + {C_PRED_V, 0, 1, 0}, + {C_PRED_P, 1, 1, 1}, +}; + + +typedef struct TagI4PredInfo { + int8_t iPredMode; + int8_t iLeftAvail; + int8_t iTopAvail; + int8_t iLeftTopAvail; + // int8_t right_top_avail; //when right_top unavailable but top avail, we can pad the right-top with the rightmost pixel of top +} SI4PredInfo; +static const SI4PredInfo g_ksI4PredInfo[9] = { + { I4_PRED_V, 0, 1, 0}, + { I4_PRED_H, 1, 0, 0}, + { 0, 0, 0, 0}, + {I4_PRED_DDL, 0, 1, 0}, + {I4_PRED_DDR, 1, 1, 1}, + { I4_PRED_VR, 1, 1, 1}, + { I4_PRED_HD, 1, 1, 1}, + { I4_PRED_VL, 0, 1, 0}, + { I4_PRED_HU, 1, 0, 0}, +}; + +static const uint8_t g_kuiI16CbpTable[6] = {0, 16, 32, 15, 31, 47}; + + +typedef struct TagPartMbInfo { + MbType iType; + int8_t iPartCount; //P_16*16, P_16*8, P_8*16, P_8*8 based on 8*8 block; P_8*4, P_4*8, P_4*4 based on 4*4 block + int8_t iPartWidth; //based on 4*4 block +} SPartMbInfo; + +//Table 7.13. Macroblock type values 0 to 4 for P slices. +static const SPartMbInfo g_ksInterPMbTypeInfo[5] = { + {MB_TYPE_16x16, 1, 4}, + {MB_TYPE_16x8, 2, 4}, + {MB_TYPE_8x16, 2, 2}, + {MB_TYPE_8x8, 4, 4}, + {MB_TYPE_8x8_REF0, 4, 4}, //ref0--ref_idx not present in bit-stream and default as 0 +}; + +//Table 7.14. Macroblock type values 0 to 22 for B slices. +static const SPartMbInfo g_ksInterBMbTypeInfo[] = { + // Part 0 Part 1 + { MB_TYPE_DIRECT, 1, 4 }, //B_Direct_16x16 + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, 4 }, //B_L0_16x16 + { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, 4 }, //B_L1_16x16 + { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, 4 }, //B_Bi_16x16 + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, 4 }, //B_L0_L0_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, 2 }, //B_L0_L0_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 4 }, //B_L1_L1_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 2 }, //B_L1_L1_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, 4 }, //B_L0_L1_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, 2 }, //B_L0_L1_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 4 }, //B_L1_L0_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 2 }, //B_L1_L0_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 4 }, //B_L0_Bi_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 2 }, //B_L0_Bi_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 4 }, //B_L1_Bi_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 2 }, //B_L1_Bi_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 4 }, //B_Bi_L0_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 2 }, //B_Bi_L0_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 4 }, //B_Bi_L1_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 2 }, //B_Bi_L1_8x16 + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 4 }, //B_Bi_Bi_16x8 + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 2 }, //B_Bi_Bi_8x16 + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, 4 } //B_8x8 +}; + +//Table 7.17 Sub-macroblock types in B macroblocks. +static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = { + {SUB_MB_TYPE_8x8, 1, 2}, + {SUB_MB_TYPE_8x4, 2, 2}, + {SUB_MB_TYPE_4x8, 2, 1}, + {SUB_MB_TYPE_4x4, 4, 1}, +}; + +//Table 7.18 Sub-macroblock types in B macroblocks. +static const SPartMbInfo g_ksInterBSubMbTypeInfo[] = { + { MB_TYPE_DIRECT, 1, 2 }, //B_Direct_8x8 + { SUB_MB_TYPE_8x8 | MB_TYPE_P0L0, 1, 2 }, //B_L0_8x8 + { SUB_MB_TYPE_8x8 | MB_TYPE_P0L1, 1, 2 }, //B_L1_8x8 + { SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, 2 }, //B_Bi_8x8 + { SUB_MB_TYPE_8x4 | MB_TYPE_P0L0, 2, 2 }, //B_L0_8x4 + { SUB_MB_TYPE_4x8 | MB_TYPE_P0L0, 2, 1 }, //B_L0_4x8 + { SUB_MB_TYPE_8x4 | MB_TYPE_P0L1, 2, 2 }, //B_L1_8x4 + { SUB_MB_TYPE_4x8 | MB_TYPE_P0L1, 2, 1 }, //B_L1_4x8 + { SUB_MB_TYPE_8x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 2, 2 }, //B_Bi_8x4 + { SUB_MB_TYPE_4x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 2, 1 }, //B_Bi_4x8 + { SUB_MB_TYPE_4x4 | MB_TYPE_P0L0, 4, 1 }, //B_L0_4x4 + { SUB_MB_TYPE_4x4 | MB_TYPE_P0L1, 4, 1 }, //B_L1_4x4 + { SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 4, 1 } //B_Bi_4x4 +}; + +typedef struct TagSar { + uint32_t uiWidth; + uint32_t uiHeight; +} sSar; +static const sSar g_ksVuiSampleAspectRatio[17] = { //Table E-1 + { 0, 0}, { 1, 1}, {12, 11}, { 10, 11}, {16, 11}, //0~4 + {40, 33}, {24, 11}, {20, 11}, { 32, 11}, {80, 33}, //5~9 + {18, 11}, {15, 11}, {64, 33}, {160, 99}, { 4, 3}, //10~14 + { 3, 2}, { 2, 1} //15~16 +}; + + +} // namespace WelsDec + +#endif//WELS_COMMON_BASIS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_const.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_const.h new file mode 100644 index 000000000..3dc949e37 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_const.h @@ -0,0 +1,65 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_const.h +#ifndef WELS_CONST_H__ +#define WELS_CONST_H__ + +#include "wels_const_common.h" + +/* Some list size */ + +#define MB_SUB_PARTITION_SIZE 4 // Sub partition size in a 8x8 sub-block +#define NAL_UNIT_HEADER_EXT_SIZE 3 // Size of NAL unit header for extension in byte +#define MAX_PPS_COUNT 256 // Count number of PPS + +#define MAX_REF_PIC_COUNT 16 // MAX Short + Long reference pictures +#define MIN_REF_PIC_COUNT 1 // minimal count number of reference pictures, 1 short + 2 key reference based? +#define MAX_SHORT_REF_COUNT 16 // maximal count number of short reference pictures +#define MAX_LONG_REF_COUNT 16 // maximal count number of long reference pictures +#define MAX_DPB_COUNT (MAX_REF_PIC_COUNT + 1) // 1 additional position for re-order and other process + +#define MAX_MMCO_COUNT 66 + +#define MAX_SLICEGROUP_IDS 8 // Count number of Slice Groups + +#define MAX_LAYER_NUM 8 + +#define LAYER_NUM_EXCHANGEABLE 1 + +#define MAX_NAL_UNIT_NUM_IN_AU 32 // predefined maximal number of NAL Units in an access unit +#define MIN_ACCESS_UNIT_CAPACITY 1048576 // Min AU capacity in bytes: (1<<20) = 1024 KB predefined +#define MAX_BUFFERED_NUM 3 //mamixum stored number of AU|packet to prevent overwrite +#define MAX_ACCESS_UNIT_CAPACITY 7077888 //Maximum AU size in bytes for level 5.2 for single frame +#define MAX_MACROBLOCK_CAPACITY 5000 //Maximal legal MB capacity, 15000 bits is enough + +#endif//WELS_CONST_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h new file mode 100644 index 000000000..ebb8015c9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h @@ -0,0 +1,170 @@ +/*! + * \copy + * Copyright (c) 2009-2019, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_decoder_thread.h + * + * \brief Interfaces introduced in thread programming + * + * \date 08/06/2018 Created + * + ************************************************************************************* + */ + +#ifndef _WELS_DECODER_THREAD_H_ +#define _WELS_DECODER_THREAD_H_ + +#include "WelsThreadLib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define WELS_DEC_MAX_NUM_CPU 16 +#define WELS_DEC_MAX_THREAD_STACK_SIZE 4096 +#define WELS_DEC_THREAD_COMMAND_RUN 0 +#define WELS_DEC_THREAD_COMMAND_ABORT 1 + +#if defined(_WIN32) || defined(__CYGWIN__) +typedef struct tagWelsDecSemphore { + WELS_THREAD_HANDLE h; +} SWelsDecSemphore; + +typedef struct tagWelsDecEvent { + WELS_THREAD_HANDLE h; + int isSignaled; +} SWelsDecEvent; + +typedef struct tagWelsDecThread { + WELS_THREAD_HANDLE h; +} SWelsDecThread; + +#define WelsDecThreadFunc(fn,a) DWORD WINAPI fn(LPVOID a) +#define WelsDecThreadFuncArg(a) LPWELS_THREAD_ROUTINE a +#define WELS_DEC_THREAD_WAIT_TIMEDOUT WAIT_TIMEOUT +#define WELS_DEC_THREAD_WAIT_SIGNALED WAIT_OBJECT_0 +#define WELS_DEC_THREAD_WAIT_INFINITE INFINITE + +#else // NON-WINDOWS + +typedef pthread_mutexattr_t WELS_MUTEX_ATTR; + +typedef struct tagWelsDecSemphore { + long max; + long v; + WELS_EVENT e; + WELS_MUTEX m; +} SWelsDecSemphore; + +typedef struct tagWelsDecEvent { + int manualReset; + int isSignaled; + pthread_cond_t c; + WELS_MUTEX m; +} SWelsDecEvent; + +typedef struct tagWelsDecThread { + WELS_THREAD_HANDLE h; +} SWelsDecThread; + +#define WelsDecThreadFunc(fn,a) void* fn(void* a) +#define WelsDecThreadFuncArg(a) void* (*a)(void*) + +#define WELS_DEC_THREAD_WAIT_TIMEDOUT ETIMEDOUT +#define WELS_DEC_THREAD_WAIT_SIGNALED EINTR +#define WELS_DEC_THREAD_WAIT_INFINITE -1 + +#endif//_WIN32 + +#define WelsDecThreadReturn WELS_THREAD_ROUTINE_RETURN(0); + +int32_t GetCPUCount(); + +// Event +int EventCreate (SWelsDecEvent* e, int manualReset, int initialState); +void EventPost (SWelsDecEvent* e); +int EventWait (SWelsDecEvent* e, int32_t timeout); +void EventReset (SWelsDecEvent* e); +void EventDestroy (SWelsDecEvent* e); + +// Semaphore +int SemCreate (SWelsDecSemphore* s, long value, long max); +int SemWait (SWelsDecSemphore* s, int32_t timeout); +void SemRelease (SWelsDecSemphore* s, long* prev_count); +void SemDestroy (SWelsDecSemphore* s); + +// Thread +int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta); +int ThreadWait (SWelsDecThread* t); + +#define DECLARE_PROCTHREAD(name, argument) \ + WelsDecThreadFunc(name,argument) + +#define DECLARE_PROCTHREAD_PTR(name) \ + LPWELS_THREAD_ROUTINE name + +#define CREATE_THREAD(ph, threadproc,argument) \ + ThreadCreate(ph, threadproc, (void*)argument) + +#define CREATE_EVENT(ph, manualreset,initial_state,name) \ + EventCreate(ph,(int)(manualreset),(int)(initial_state)) + +#define CREATE_SEMAPHORE(ph, initial_count,max_count, name) \ + SemCreate(ph, (long)initial_count,(long)(max_count)) + +#define CLOSE_EVENT(ph) \ + EventDestroy(ph) + +#define CLOSE_SEMAPHORE(ph) \ + SemDestroy(ph) + +#define SET_EVENT(ph) \ + EventPost(ph) + +#define RESET_EVENT(ph) \ + EventReset(ph) + +#define RELEASE_SEMAPHORE(ph) \ + SemRelease(ph,NULL) + +#define WAIT_EVENT(ph,timeout) \ + EventWait(ph, (int32_t)timeout) + +#define WAIT_THREAD(ph) \ + ThreadWait(ph) + +#define WAIT_SEMAPHORE(ph,timeout) \ + SemWait(ph,(int32_t)timeout) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp new file mode 100644 index 000000000..91f89b437 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp @@ -0,0 +1,1787 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file au_parser.c + * + * \brief Interfaces introduced in Access Unit level based parser + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ +#include "codec_def.h" +#include "au_parser.h" +#include "decoder.h" +#include "error_code.h" +#include "memmgr_nal_unit.h" +#include "decoder_core.h" +#include "bit_stream.h" +#include "memory_align.h" + +#define _PARSE_NALHRD_VCLHRD_PARAMS_ 1 + +namespace WelsDec { +/*! + ************************************************************************************* + * \brief Start Code Prefix (0x 00 00 00 01) detection + * + * \param pBuf bitstream payload buffer + * \param pOffset offset between NAL rbsp and original bitsteam that + * start code prefix is seperated from. + * \param iBufSize count size of buffer + * + * \return RBSP buffer of start code prefix exclusive + * + * \note N/A + ************************************************************************************* + */ +uint8_t* DetectStartCodePrefix (const uint8_t* kpBuf, int32_t* pOffset, int32_t iBufSize) { + uint8_t* pBits = (uint8_t*)kpBuf; + + do { + int32_t iIdx = 0; + while ((iIdx < iBufSize) && (! (*pBits))) { + ++ pBits; + ++ iIdx; + } + if (iIdx >= iBufSize) break; + + ++ iIdx; + ++ pBits; + + if ((iIdx >= 3) && ((* (pBits - 1)) == 0x1)) { + *pOffset = (int32_t) (((uintptr_t)pBits) - ((uintptr_t)kpBuf)); + return pBits; + } + + iBufSize -= iIdx; + } while (1); + + return NULL; +} + +/*! + ************************************************************************************* + * \brief to parse nal unit + * + * \param pCtx decoder context + * \param pNalUnitHeader parsed result of NAL Unit Header to output + * \param pSrcRbsp bitstream buffer to input + * \param iSrcRbspLen length size of bitstream buffer payload + * \param pSrcNal + * \param iSrcNalLen + * \param pConsumedBytes consumed bytes during parsing + * + * \return decoded bytes payload, might be (pSrcRbsp+1) if no escapes + * + * \note N/A + ************************************************************************************* + */ +uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeader, uint8_t* pSrcRbsp, + int32_t iSrcRbspLen, uint8_t* pSrcNal, int32_t iSrcNalLen, int32_t* pConsumedBytes) { + PNalUnit pCurNal = NULL; + uint8_t* pNal = pSrcRbsp; + int32_t iNalSize = iSrcRbspLen; + PBitStringAux pBs = NULL; + bool bExtensionFlag = false; + int32_t iErr = ERR_NONE; + int32_t iBitSize = 0; + SDataBuffer* pSavedData = &pCtx->sSavedData; + SLogContext* pLogCtx = & (pCtx->sLogCtx); + pNalUnitHeader->eNalUnitType = NAL_UNIT_UNSPEC_0;//SHOULD init it. because pCtx->sCurNalHead is common variable. + + //remove the consecutive ZERO at the end of current NAL in the reverse order.--2011.6.1 + { + int32_t iIndex = iSrcRbspLen - 1; + uint8_t uiBsZero = 0; + while (iIndex >= 0) { + uiBsZero = pSrcRbsp[iIndex]; + if (0 == uiBsZero) { + --iNalSize; + ++ (*pConsumedBytes); + --iIndex; + } else { + break; + } + } + } + + pNalUnitHeader->uiForbiddenZeroBit = (uint8_t) (pNal[0] >> 7); // uiForbiddenZeroBit + if (pNalUnitHeader->uiForbiddenZeroBit) { //2010.4.14 + pCtx->iErrorCode |= dsBitstreamError; + return NULL; //uiForbiddenZeroBit should always equal to 0 + } + + pNalUnitHeader->uiNalRefIdc = (uint8_t) (pNal[0] >> 5); // uiNalRefIdc + pNalUnitHeader->eNalUnitType = (EWelsNalUnitType) (pNal[0] & 0x1f); // eNalUnitType + + ++pNal; + --iNalSize; + ++ (*pConsumedBytes); + + if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_SPS_NAL (pNalUnitHeader->eNalUnitType) + || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bSpsExistAheadFlag)) { + if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSpsErrorIgnored == 0) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "parse_nal(), no exist Sequence Parameter Sets ahead of sequence when try to decode NAL(type:%d).", + pNalUnitHeader->eNalUnitType); + } else { + pCtx->sSpsPpsCtx.iSpsErrorIgnored++; + } + pCtx->pDecoderStatistics->iSpsNoExistNalNum++; + pCtx->iErrorCode = dsNoParamSets; + return NULL; + } + pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0; + if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_PARAM_SETS_NALS (pNalUnitHeader->eNalUnitType) + || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) { + if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iPpsErrorIgnored == 0) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "parse_nal(), no exist Picture Parameter Sets ahead of sequence when try to decode NAL(type:%d).", + pNalUnitHeader->eNalUnitType); + } else { + pCtx->sSpsPpsCtx.iPpsErrorIgnored++; + } + pCtx->pDecoderStatistics->iPpsNoExistNalNum++; + pCtx->iErrorCode = dsNoParamSets; + return NULL; + } + pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0; + if ((IS_VCL_NAL_AVC_BASE (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag + || pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) || + (IS_NEW_INTRODUCED_SVC_NAL (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag + || pCtx->sSpsPpsCtx.bSubspsExistAheadFlag + || pCtx->sSpsPpsCtx.bPpsExistAheadFlag))) { + if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSubSpsErrorIgnored == 0) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParseNalHeader(), no exist Parameter Sets ahead of sequence when try to decode slice(type:%d).", + pNalUnitHeader->eNalUnitType); + } else { + pCtx->sSpsPpsCtx.iSubSpsErrorIgnored++; + } + pCtx->pDecoderStatistics->iSubSpsNoExistNalNum++; + pCtx->iErrorCode |= dsNoParamSets; + return NULL; + } + pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0; + + switch (pNalUnitHeader->eNalUnitType) { + case NAL_UNIT_AU_DELIMITER: + case NAL_UNIT_SEI: + if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { + pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; + pCtx->bAuReadyFlag = true; + } + break; + + case NAL_UNIT_PREFIX: + pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal; + pCurNal->uiTimeStamp = pCtx->uiTimeStamp; + + if (iNalSize < NAL_UNIT_HEADER_EXT_SIZE) { + PAccessUnit pCurAu = pCtx->pAccessUnitList; + uint32_t uiAvailNalNum = pCurAu->uiAvailUnitsNum; + + if (uiAvailNalNum > 0) { + pCurAu->uiEndPos = uiAvailNalNum - 1; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->bAuReadyFlag = true; + } + } + pCurNal->sNalData.sPrefixNal.bPrefixNalCorrectFlag = false; + pCtx->iErrorCode |= dsBitstreamError; + return NULL; + } + + DecodeNalHeaderExt (pCurNal, pNal); + if ((pCurNal->sNalHeaderExt.uiQualityId != 0) || (pCurNal->sNalHeaderExt.bUseRefBasePicFlag != 0)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParseNalHeader() in Prefix Nal Unit:uiQualityId (%d) != 0, bUseRefBasePicFlag (%d) != 0, not supported!", + pCurNal->sNalHeaderExt.uiQualityId, pCurNal->sNalHeaderExt.bUseRefBasePicFlag); + PAccessUnit pCurAu = pCtx->pAccessUnitList; + uint32_t uiAvailNalNum = pCurAu->uiAvailUnitsNum; + + if (uiAvailNalNum > 0) { + pCurAu->uiEndPos = uiAvailNalNum - 1; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->bAuReadyFlag = true; + } + } + pCurNal->sNalData.sPrefixNal.bPrefixNalCorrectFlag = false; + pCtx->iErrorCode |= dsBitstreamError; + return NULL; + } + + pNal += NAL_UNIT_HEADER_EXT_SIZE; + iNalSize -= NAL_UNIT_HEADER_EXT_SIZE; + *pConsumedBytes += NAL_UNIT_HEADER_EXT_SIZE; + + pCurNal->sNalHeaderExt.sNalUnitHeader.uiForbiddenZeroBit = pNalUnitHeader->uiForbiddenZeroBit; + pCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc = pNalUnitHeader->uiNalRefIdc; + pCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType = pNalUnitHeader->eNalUnitType; + if (pNalUnitHeader->uiNalRefIdc != 0) { + pBs = &pCtx->sBs; + iBitSize = (iNalSize << 3) - BsGetTrailingBits (pNal + iNalSize - 1); // convert into bit + + iErr = DecInitBits (pBs, pNal, iBitSize); + if (iErr) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "NAL_UNIT_PREFIX: DecInitBits() fail due invalid access."); + pCtx->iErrorCode |= dsBitstreamError; + return NULL; + } + ParsePrefixNalUnit (pCtx, pBs); + } + pCurNal->sNalData.sPrefixNal.bPrefixNalCorrectFlag = true; + + break; + case NAL_UNIT_CODED_SLICE_EXT: + bExtensionFlag = true; + case NAL_UNIT_CODED_SLICE: + case NAL_UNIT_CODED_SLICE_IDR: { + PAccessUnit pCurAu = NULL; + uint32_t uiAvailNalNum; + pCurNal = MemGetNextNal (&pCtx->pAccessUnitList, pCtx->pMemAlign); + if (NULL == pCurNal) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "MemGetNextNal() fail due out of memory."); + pCtx->iErrorCode |= dsOutOfMemory; + return NULL; + } + pCurNal->uiTimeStamp = pCtx->uiTimeStamp; + pCurNal->sNalHeaderExt.sNalUnitHeader.uiForbiddenZeroBit = pNalUnitHeader->uiForbiddenZeroBit; + pCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc = pNalUnitHeader->uiNalRefIdc; + pCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType = pNalUnitHeader->eNalUnitType; + pCurAu = pCtx->pAccessUnitList; + uiAvailNalNum = pCurAu->uiAvailUnitsNum; + + + if (pNalUnitHeader->eNalUnitType == NAL_UNIT_CODED_SLICE_EXT) { + if (iNalSize < NAL_UNIT_HEADER_EXT_SIZE) { + ForceClearCurrentNal (pCurAu); + + if (uiAvailNalNum > 1) { + pCurAu->uiEndPos = uiAvailNalNum - 2; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->bAuReadyFlag = true; + } + } + pCtx->iErrorCode |= dsBitstreamError; + return NULL; + } + + DecodeNalHeaderExt (pCurNal, pNal); + if (pCurNal->sNalHeaderExt.uiQualityId != 0 || + pCurNal->sNalHeaderExt.bUseRefBasePicFlag) { + if (pCurNal->sNalHeaderExt.uiQualityId != 0) + WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseNalHeader():uiQualityId (%d) != 0, MGS not supported!", + pCurNal->sNalHeaderExt.uiQualityId); + if (pCurNal->sNalHeaderExt.bUseRefBasePicFlag != 0) + WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseNalHeader():bUseRefBasePicFlag (%d) != 0, MGS not supported!", + pCurNal->sNalHeaderExt.bUseRefBasePicFlag); + + ForceClearCurrentNal (pCurAu); + + if (uiAvailNalNum > 1) { + pCurAu->uiEndPos = uiAvailNalNum - 2; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->bAuReadyFlag = true; + } + } + pCtx->iErrorCode |= dsBitstreamError; + return NULL; + } + pNal += NAL_UNIT_HEADER_EXT_SIZE; + iNalSize -= NAL_UNIT_HEADER_EXT_SIZE; + *pConsumedBytes += NAL_UNIT_HEADER_EXT_SIZE; + + if (pCtx->pParam->bParseOnly) { + pCurNal->sNalData.sVclNal.pNalPos = pSavedData->pCurPos; + int32_t iTrailingZeroByte = 0; + while (pSrcNal[iSrcNalLen - iTrailingZeroByte - 1] == 0x0) //remove final trailing 0 bytes + iTrailingZeroByte++; + int32_t iActualLen = iSrcNalLen - iTrailingZeroByte; + pCurNal->sNalData.sVclNal.iNalLength = iActualLen - NAL_UNIT_HEADER_EXT_SIZE; + //unify start code as 0x0001 + int32_t iCurrStartByte = 4; //4 for 0x0001, 3 for 0x001 + if (pSrcNal[0] == 0x0 && pSrcNal[1] == 0x0 && pSrcNal[2] == 0x1) { //if 0x001 + iCurrStartByte = 3; + pCurNal->sNalData.sVclNal.iNalLength++; + } + if (pCurNal->sNalHeaderExt.bIdrFlag) { + * (pSrcNal + iCurrStartByte) &= 0xE0; + * (pSrcNal + iCurrStartByte) |= 0x05; + } else { + * (pSrcNal + iCurrStartByte) &= 0xE0; + * (pSrcNal + iCurrStartByte) |= 0x01; + } + pSavedData->pCurPos[0] = pSavedData->pCurPos[1] = pSavedData->pCurPos[2] = 0x0; + pSavedData->pCurPos[3] = 0x1; + pSavedData->pCurPos[4] = * (pSrcNal + iCurrStartByte); + pSavedData->pCurPos += 5; + int32_t iOffset = iCurrStartByte + 1 + NAL_UNIT_HEADER_EXT_SIZE; + memcpy (pSavedData->pCurPos, pSrcNal + iOffset, iActualLen - iOffset); + pSavedData->pCurPos += iActualLen - iOffset; + } + } else { + if (pCtx->pParam->bParseOnly) { + pCurNal->sNalData.sVclNal.pNalPos = pSavedData->pCurPos; + int32_t iTrailingZeroByte = 0; + while (pSrcNal[iSrcNalLen - iTrailingZeroByte - 1] == 0x0) //remove final trailing 0 bytes + iTrailingZeroByte++; + int32_t iActualLen = iSrcNalLen - iTrailingZeroByte; + pCurNal->sNalData.sVclNal.iNalLength = iActualLen; + //unify start code as 0x0001 + int32_t iStartDeltaByte = 0; //0 for 0x0001, 1 for 0x001 + if (pSrcNal[0] == 0x0 && pSrcNal[1] == 0x0 && pSrcNal[2] == 0x1) { //if 0x001 + pSavedData->pCurPos[0] = 0x0; + iStartDeltaByte = 1; + pCurNal->sNalData.sVclNal.iNalLength++; + } + memcpy (pSavedData->pCurPos + iStartDeltaByte, pSrcNal, iActualLen); + pSavedData->pCurPos += iStartDeltaByte + iActualLen; + } + if (NAL_UNIT_PREFIX == pCtx->sSpsPpsCtx.sPrefixNal.sNalHeaderExt.sNalUnitHeader.eNalUnitType) { + if (pCtx->sSpsPpsCtx.sPrefixNal.sNalData.sPrefixNal.bPrefixNalCorrectFlag) { + PrefetchNalHeaderExtSyntax (pCtx, pCurNal, &pCtx->sSpsPpsCtx.sPrefixNal); + } + } + + pCurNal->sNalHeaderExt.bIdrFlag = (NAL_UNIT_CODED_SLICE_IDR == pNalUnitHeader->eNalUnitType) ? true : + false; //SHOULD update this flag for AVC if no prefix NAL + pCurNal->sNalHeaderExt.iNoInterLayerPredFlag = 1; + } + + pBs = &pCurAu->pNalUnitsList[uiAvailNalNum - 1]->sNalData.sVclNal.sSliceBitsRead; + iBitSize = (iNalSize << 3) - BsGetTrailingBits (pNal + iNalSize - 1); // convert into bit + iErr = DecInitBits (pBs, pNal, iBitSize); + if (iErr) { + ForceClearCurrentNal (pCurAu); + if (uiAvailNalNum > 1) { + pCurAu->uiEndPos = uiAvailNalNum - 2; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->bAuReadyFlag = true; + } + } + WelsLog (pLogCtx, WELS_LOG_ERROR, "NAL_UNIT_CODED_SLICE: DecInitBits() fail due invalid access."); + pCtx->iErrorCode |= dsBitstreamError; + return NULL; + } + iErr = ParseSliceHeaderSyntaxs (pCtx, pBs, bExtensionFlag); + if (iErr != ERR_NONE) { + if ((uiAvailNalNum == 1) && (pCurNal->sNalHeaderExt.bIdrFlag)) { //IDR parse error + ResetActiveSPSForEachLayer (pCtx); + } + //if current NAL occur error when parsing, should clean it from pNalUnitsList + //otherwise, when Next good NAL decoding, this corrupt NAL is considered as normal NAL and lead to decoder crash + ForceClearCurrentNal (pCurAu); + + if (uiAvailNalNum > 1) { + pCurAu->uiEndPos = uiAvailNalNum - 2; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->bAuReadyFlag = true; + } + } + pCtx->iErrorCode |= dsBitstreamError; + return NULL; + } + + if ((uiAvailNalNum == 1) + && CheckNextAuNewSeq (pCtx, pCurNal, pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps)) { + ResetActiveSPSForEachLayer (pCtx); + } + if ((uiAvailNalNum > 1) && + CheckAccessUnitBoundary (pCtx, pCurAu->pNalUnitsList[uiAvailNalNum - 1], pCurAu->pNalUnitsList[uiAvailNalNum - 2], + pCurAu->pNalUnitsList[uiAvailNalNum - 1]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps)) { + pCurAu->uiEndPos = uiAvailNalNum - 2; + pCtx->bAuReadyFlag = true; + pCtx->bNextNewSeqBegin = CheckNextAuNewSeq (pCtx, pCurNal, pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps); + + } + } + break; + default: + break; + } + + return pNal; +} + + +bool CheckAccessUnitBoundaryExt (PNalUnitHeaderExt pLastNalHdrExt, PNalUnitHeaderExt pCurNalHeaderExt, + PSliceHeader pLastSliceHeader, PSliceHeader pCurSliceHeader) { + const PSps kpSps = pCurSliceHeader->pSps; + + //Sub-clause 7.1.4.1.1 temporal_id + if (pLastNalHdrExt->uiTemporalId != pCurNalHeaderExt->uiTemporalId) { + return true; + } + + // Subclause 7.4.1.2.5 + if (pLastSliceHeader->iRedundantPicCnt > pCurSliceHeader->iRedundantPicCnt) + return true; + + // Subclause G7.4.1.2.4 + if (pLastNalHdrExt->uiDependencyId > pCurNalHeaderExt->uiDependencyId) + return true; + if (pLastNalHdrExt->uiQualityId > pCurNalHeaderExt->uiQualityId) + return true; + + // Subclause 7.4.1.2.4 + if (pLastSliceHeader->iFrameNum != pCurSliceHeader->iFrameNum) + return true; + if (pLastSliceHeader->iPpsId != pCurSliceHeader->iPpsId) + return true; + if (pLastSliceHeader->pSps->iSpsId != pCurSliceHeader->pSps->iSpsId) + return true; + if (pLastSliceHeader->bFieldPicFlag != pCurSliceHeader->bFieldPicFlag) + return true; + if (pLastSliceHeader->bBottomFiledFlag != pCurSliceHeader->bBottomFiledFlag) + return true; + if ((pLastNalHdrExt->sNalUnitHeader.uiNalRefIdc != NRI_PRI_LOWEST) != (pCurNalHeaderExt->sNalUnitHeader.uiNalRefIdc != + NRI_PRI_LOWEST)) + return true; + if (pLastNalHdrExt->bIdrFlag != pCurNalHeaderExt->bIdrFlag) + return true; + if (pCurNalHeaderExt->bIdrFlag) { + if (pLastSliceHeader->uiIdrPicId != pCurSliceHeader->uiIdrPicId) + return true; + } + if (kpSps->uiPocType == 0) { + if (pLastSliceHeader->iPicOrderCntLsb != pCurSliceHeader->iPicOrderCntLsb) + return true; + if (pLastSliceHeader->iDeltaPicOrderCntBottom != pCurSliceHeader->iDeltaPicOrderCntBottom) + return true; + } else if (kpSps->uiPocType == 1) { + if (pLastSliceHeader->iDeltaPicOrderCnt[0] != pCurSliceHeader->iDeltaPicOrderCnt[0]) + return true; + if (pLastSliceHeader->iDeltaPicOrderCnt[1] != pCurSliceHeader->iDeltaPicOrderCnt[1]) + return true; + } + if (memcmp (pLastSliceHeader->pPps, pCurSliceHeader->pPps, sizeof (SPps)) != 0 + || memcmp (pLastSliceHeader->pSps, pCurSliceHeader->pSps, sizeof (SSps)) != 0) { + return true; + } + return false; +} + + +bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PNalUnit kpLastNal, + const PSps kpSps) { + const PNalUnitHeaderExt kpLastNalHeaderExt = &kpLastNal->sNalHeaderExt; + const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt; + const SSliceHeader* kpLastSliceHeader = &kpLastNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; + const SSliceHeader* kpCurSliceHeader = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; + if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL + && pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) { + return true; // the active sps changed, new sequence begins, so the current au is ready + } + + //Sub-clause 7.1.4.1.1 temporal_id + if (kpLastNalHeaderExt->uiTemporalId != kpCurNalHeaderExt->uiTemporalId) { + return true; + } + if (kpLastSliceHeader->iFrameNum != kpCurSliceHeader->iFrameNum) + return true; + // Subclause 7.4.1.2.5 + if (kpLastSliceHeader->iRedundantPicCnt > kpCurSliceHeader->iRedundantPicCnt) + return true; + + // Subclause G7.4.1.2.4 + if (kpLastNalHeaderExt->uiDependencyId > kpCurNalHeaderExt->uiDependencyId) + return true; + // Subclause 7.4.1.2.4 + if (kpLastNalHeaderExt->uiDependencyId == kpCurNalHeaderExt->uiDependencyId + && kpLastSliceHeader->iPpsId != kpCurSliceHeader->iPpsId) + return true; + if (kpLastSliceHeader->bFieldPicFlag != kpCurSliceHeader->bFieldPicFlag) + return true; + if (kpLastSliceHeader->bBottomFiledFlag != kpCurSliceHeader->bBottomFiledFlag) + return true; + if ((kpLastNalHeaderExt->sNalUnitHeader.uiNalRefIdc != NRI_PRI_LOWEST) != (kpCurNalHeaderExt->sNalUnitHeader.uiNalRefIdc + != NRI_PRI_LOWEST)) + return true; + if (kpLastNalHeaderExt->bIdrFlag != kpCurNalHeaderExt->bIdrFlag) + return true; + if (kpCurNalHeaderExt->bIdrFlag) { + if (kpLastSliceHeader->uiIdrPicId != kpCurSliceHeader->uiIdrPicId) + return true; + } + if (kpSps->uiPocType == 0) { + if (kpLastSliceHeader->iPicOrderCntLsb != kpCurSliceHeader->iPicOrderCntLsb) + return true; + if (kpLastSliceHeader->iDeltaPicOrderCntBottom != kpCurSliceHeader->iDeltaPicOrderCntBottom) + return true; + } else if (kpSps->uiPocType == 1) { + if (kpLastSliceHeader->iDeltaPicOrderCnt[0] != kpCurSliceHeader->iDeltaPicOrderCnt[0]) + return true; + if (kpLastSliceHeader->iDeltaPicOrderCnt[1] != kpCurSliceHeader->iDeltaPicOrderCnt[1]) + return true; + } + + return false; +} + +bool CheckNextAuNewSeq (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PSps kpSps) { + const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt; + if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL + && pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) + return true; + if (kpCurNalHeaderExt->bIdrFlag) + return true; + + return false; +} + +/*! + ************************************************************************************* + * \brief to parse NON VCL NAL Units + * + * \param pCtx decoder context + * \param rbsp rbsp buffer of NAL Unit + * \param src_len length of rbsp buffer + * + * \return 0 - successed + * 1 - failed + * + ************************************************************************************* + */ +int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t kiSrcLen, uint8_t* pSrcNal, + const int32_t kSrcNalLen) { + PBitStringAux pBs = NULL; + EWelsNalUnitType eNalType = NAL_UNIT_UNSPEC_0; // make initial value as unspecified + int32_t iPicWidth = 0; + int32_t iPicHeight = 0; + int32_t iBitSize = 0; + int32_t iErr = ERR_NONE; + if (kiSrcLen <= 0) + return iErr; + + pBs = &pCtx->sBs; // SBitStringAux instance for non VCL NALs decoding + iBitSize = (kiSrcLen << 3) - BsGetTrailingBits (pRbsp + kiSrcLen - 1); // convert into bit + eNalType = pCtx->sCurNalHead.eNalUnitType; + + switch (eNalType) { + case NAL_UNIT_SPS: + case NAL_UNIT_SUBSET_SPS: + if (iBitSize > 0) { + iErr = DecInitBits (pBs, pRbsp, iBitSize); + if (ERR_NONE != iErr) { + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) + pCtx->iErrorCode |= dsNoParamSets; + else + pCtx->iErrorCode |= dsBitstreamError; + return iErr; + } + } + iErr = ParseSps (pCtx, pBs, &iPicWidth, &iPicHeight, pSrcNal, kSrcNalLen); + if (ERR_NONE != iErr) { // modified for pSps/pSubsetSps invalid, 12/1/2009 + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) + pCtx->iErrorCode |= dsNoParamSets; + else + pCtx->iErrorCode |= dsBitstreamError; + return iErr; + } + pCtx->bHasNewSps = true; + break; + + case NAL_UNIT_PPS: + if (iBitSize > 0) { + iErr = DecInitBits (pBs, pRbsp, iBitSize); + if (ERR_NONE != iErr) { + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) + pCtx->iErrorCode |= dsNoParamSets; + else + pCtx->iErrorCode |= dsBitstreamError; + return iErr; + } + } + iErr = ParsePps (pCtx, &pCtx->sSpsPpsCtx.sPpsBuffer[0], pBs, pSrcNal, kSrcNalLen); + if (ERR_NONE != iErr) { // modified for pps invalid, 12/1/2009 + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) + pCtx->iErrorCode |= dsNoParamSets; + else + pCtx->iErrorCode |= dsBitstreamError; + pCtx->bHasNewSps = false; + return iErr; + } + + pCtx->sSpsPpsCtx.bPpsExistAheadFlag = true; + ++ (pCtx->sSpsPpsCtx.iSeqId); + break; + + case NAL_UNIT_SEI: + + break; + + case NAL_UNIT_PREFIX: + break; + case NAL_UNIT_CODED_SLICE_DPA: + case NAL_UNIT_CODED_SLICE_DPB: + case NAL_UNIT_CODED_SLICE_DPC: + + break; + + default: + break; + } + + return iErr; +} + +int32_t ParseRefBasePicMarking (PBitStringAux pBs, PRefBasePicMarking pRefBasePicMarking) { + uint32_t uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_ref_base_pic_marking_mode_flag + const bool kbAdaptiveMarkingModeFlag = !!uiCode; + pRefBasePicMarking->bAdaptiveRefBasePicMarkingModeFlag = kbAdaptiveMarkingModeFlag; + if (kbAdaptiveMarkingModeFlag) { + int32_t iIdx = 0; + do { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //MMCO_base + const uint32_t kuiMmco = uiCode; + + pRefBasePicMarking->mmco_base[iIdx].uiMmcoType = kuiMmco; + + if (kuiMmco == MMCO_END) + break; + + if (kuiMmco == MMCO_SHORT2UNUSED) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //difference_of_base_pic_nums_minus1 + pRefBasePicMarking->mmco_base[iIdx].uiDiffOfPicNums = 1 + uiCode; + pRefBasePicMarking->mmco_base[iIdx].iShortFrameNum = 0; + } else if (kuiMmco == MMCO_LONG2UNUSED) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_base_pic_num + pRefBasePicMarking->mmco_base[iIdx].uiLongTermPicNum = uiCode; + } + ++ iIdx; + } while (iIdx < MAX_MMCO_COUNT); + } + return ERR_NONE; +} + +int32_t ParsePrefixNalUnit (PWelsDecoderContext pCtx, PBitStringAux pBs) { + PNalUnit pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal; + uint32_t uiCode; + + if (pCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) { + PNalUnitHeaderExt head_ext = &pCurNal->sNalHeaderExt; + PPrefixNalUnit sPrefixNal = &pCurNal->sNalData.sPrefixNal; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //store_ref_base_pic_flag + sPrefixNal->bStoreRefBasePicFlag = !!uiCode; + if ((head_ext->bUseRefBasePicFlag || sPrefixNal->bStoreRefBasePicFlag) && !head_ext->bIdrFlag) { + WELS_READ_VERIFY (ParseRefBasePicMarking (pBs, &sPrefixNal->sRefPicBaseMarking)); + } + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //additional_prefix_nal_unit_extension_flag + sPrefixNal->bPrefixNalUnitAdditionalExtFlag = !!uiCode; + if (sPrefixNal->bPrefixNalUnitAdditionalExtFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //additional_prefix_nal_unit_extension_data_flag + sPrefixNal->bPrefixNalUnitExtFlag = !!uiCode; + } + } + return ERR_NONE; +} + +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_LEFT_OFFSET_MIN -32768 +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_LEFT_OFFSET_MAX 32767 +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_TOP_OFFSET_MIN -32768 +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_TOP_OFFSET_MAX 32767 +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_RIGHT_OFFSET_MIN -32768 +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_RIGHT_OFFSET_MAX 32767 +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_BOTTOM_OFFSET_MIN -32768 +#define SUBSET_SPS_SEQ_SCALED_REF_LAYER_BOTTOM_OFFSET_MAX 32767 + + + + +int32_t DecodeSpsSvcExt (PWelsDecoderContext pCtx, PSubsetSps pSpsExt, PBitStringAux pBs) { + PSpsSvcExt pExt = NULL; + uint32_t uiCode; + int32_t iCode; + + pExt = &pSpsExt->sSpsSvcExt; + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //inter_layer_deblocking_filter_control_present_flag + pExt->bInterLayerDeblockingFilterCtrlPresentFlag = !!uiCode; + WELS_READ_VERIFY (BsGetBits (pBs, 2, &uiCode)); //extended_spatial_scalability_idc + pExt->uiExtendedSpatialScalability = uiCode; + if (pExt->uiExtendedSpatialScalability > 2) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "DecodeSpsSvcExt():extended_spatial_scalability (%d) != 0, ESS not supported!", + pExt->uiExtendedSpatialScalability); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_ESS); + } + + pExt->uiChromaPhaseXPlus1Flag = + 0; // FIXME: Incoherent with JVT X201 standard (= 1), but conformance to JSVM (= 0) implementation. + pExt->uiChromaPhaseYPlus1 = 1; + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //chroma_phase_x_plus1_flag + pExt->uiChromaPhaseXPlus1Flag = uiCode; + WELS_READ_VERIFY (BsGetBits (pBs, 2, &uiCode)); //chroma_phase_y_plus1 + pExt->uiChromaPhaseYPlus1 = uiCode; + + pExt->uiSeqRefLayerChromaPhaseXPlus1Flag = pExt->uiChromaPhaseXPlus1Flag; + pExt->uiSeqRefLayerChromaPhaseYPlus1 = pExt->uiChromaPhaseYPlus1; + memset (&pExt->sSeqScaledRefLayer, 0, sizeof (SPosOffset)); + + if (pExt->uiExtendedSpatialScalability == 1) { + SPosOffset* const kpPos = &pExt->sSeqScaledRefLayer; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //seq_ref_layer_chroma_phase_x_plus1_flag + pExt->uiSeqRefLayerChromaPhaseXPlus1Flag = uiCode; + WELS_READ_VERIFY (BsGetBits (pBs, 2, &uiCode)); //seq_ref_layer_chroma_phase_y_plus1 + pExt->uiSeqRefLayerChromaPhaseYPlus1 = uiCode; + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //seq_scaled_ref_layer_left_offset + kpPos->iLeftOffset = iCode; + WELS_CHECK_SE_BOTH_WARNING (kpPos->iLeftOffset, SUBSET_SPS_SEQ_SCALED_REF_LAYER_LEFT_OFFSET_MIN, + SUBSET_SPS_SEQ_SCALED_REF_LAYER_LEFT_OFFSET_MAX, "seq_scaled_ref_layer_left_offset"); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //seq_scaled_ref_layer_top_offset + kpPos->iTopOffset = iCode; + WELS_CHECK_SE_BOTH_WARNING (kpPos->iTopOffset, SUBSET_SPS_SEQ_SCALED_REF_LAYER_TOP_OFFSET_MIN, + SUBSET_SPS_SEQ_SCALED_REF_LAYER_TOP_OFFSET_MAX, "seq_scaled_ref_layer_top_offset"); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //seq_scaled_ref_layer_right_offset + kpPos->iRightOffset = iCode; + WELS_CHECK_SE_BOTH_WARNING (kpPos->iRightOffset, SUBSET_SPS_SEQ_SCALED_REF_LAYER_RIGHT_OFFSET_MIN, + SUBSET_SPS_SEQ_SCALED_REF_LAYER_RIGHT_OFFSET_MAX, "seq_scaled_ref_layer_right_offset"); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //seq_scaled_ref_layer_bottom_offset + kpPos->iBottomOffset = iCode; + WELS_CHECK_SE_BOTH_WARNING (kpPos->iBottomOffset, SUBSET_SPS_SEQ_SCALED_REF_LAYER_BOTTOM_OFFSET_MIN, + SUBSET_SPS_SEQ_SCALED_REF_LAYER_BOTTOM_OFFSET_MAX, "seq_scaled_ref_layer_bottom_offset"); + } + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //seq_tcoeff_level_prediction_flag + pExt->bSeqTCoeffLevelPredFlag = !!uiCode; + pExt->bAdaptiveTCoeffLevelPredFlag = false; + if (pExt->bSeqTCoeffLevelPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_tcoeff_level_prediction_flag + pExt->bAdaptiveTCoeffLevelPredFlag = !!uiCode; + } + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //slice_header_restriction_flag + pExt->bSliceHeaderRestrictionFlag = !!uiCode; + + + + return ERR_NONE; +} + +const SLevelLimits* GetLevelLimits (int32_t iLevelIdx, bool bConstraint3) { + switch (iLevelIdx) { + case 9: + return &g_ksLevelLimits[1]; + case 10: + return &g_ksLevelLimits[0]; + case 11: + if (bConstraint3) + return &g_ksLevelLimits[1]; + else + return &g_ksLevelLimits[2]; + case 12: + return &g_ksLevelLimits[3]; + case 13: + return &g_ksLevelLimits[4]; + case 20: + return &g_ksLevelLimits[5]; + case 21: + return &g_ksLevelLimits[6]; + case 22: + return &g_ksLevelLimits[7]; + case 30: + return &g_ksLevelLimits[8]; + case 31: + return &g_ksLevelLimits[9]; + case 32: + return &g_ksLevelLimits[10]; + case 40: + return &g_ksLevelLimits[11]; + case 41: + return &g_ksLevelLimits[12]; + case 42: + return &g_ksLevelLimits[13]; + case 50: + return &g_ksLevelLimits[14]; + case 51: + return &g_ksLevelLimits[15]; + case 52: + return &g_ksLevelLimits[16]; + default: + return NULL; + } + return NULL; +} + +bool CheckSpsActive (PWelsDecoderContext pCtx, PSps pSps, bool bUseSubsetFlag) { + for (int i = 0; i < MAX_LAYER_NUM; i++) { + if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == pSps) + return true; + } + // Pre-active, will be used soon + if (bUseSubsetFlag) { + if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSubspsAvailFlags[pSps->iSpsId]) { + if (pCtx->iTotalNumMbRec > 0) { + return true; + } + if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { + int i = 0, iNum = (int32_t) pCtx->pAccessUnitList->uiAvailUnitsNum; + while (i < iNum) { + PNalUnit pNalUnit = pCtx->pAccessUnitList->pNalUnitsList[i]; + if (pNalUnit->sNalData.sVclNal.bSliceHeaderExtFlag) { //ext data + PSps pNextUsedSps = pNalUnit->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps; + if (pNextUsedSps->iSpsId == pSps->iSpsId) + return true; + } + ++i; + } + } + } + } else { + if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSpsAvailFlags[pSps->iSpsId]) { + if (pCtx->iTotalNumMbRec > 0) { + return true; + } + if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { + int i = 0, iNum = (int32_t) pCtx->pAccessUnitList->uiAvailUnitsNum; + while (i < iNum) { + PNalUnit pNalUnit = pCtx->pAccessUnitList->pNalUnitsList[i]; + if (!pNalUnit->sNalData.sVclNal.bSliceHeaderExtFlag) { //non-ext data + PSps pNextUsedSps = pNalUnit->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps; + if (pNextUsedSps->iSpsId == pSps->iSpsId) + return true; + } + ++i; + } + } + } + } + return false; +} + +#define SPS_LOG2_MAX_FRAME_NUM_MINUS4_MAX 12 +#define SPS_LOG2_MAX_PIC_ORDER_CNT_LSB_MINUS4_MAX 12 +#define SPS_NUM_REF_FRAMES_IN_PIC_ORDER_CNT_CYCLE_MAX 255 +#define SPS_MAX_NUM_REF_FRAMES_MAX 16 +#define PPS_PIC_INIT_QP_QS_MIN 0 +#define PPS_PIC_INIT_QP_QS_MAX 51 +#define PPS_CHROMA_QP_INDEX_OFFSET_MIN -12 +#define PPS_CHROMA_QP_INDEX_OFFSET_MAX 12 +#define SCALING_LIST_DELTA_SCALE_MAX 127 +#define SCALING_LIST_DELTA_SCALE_MIN -128 + +/*! + ************************************************************************************* + * \brief to parse Sequence Parameter Set (SPS) + * + * \param pCtx Decoder context + * \param pBsAux bitstream reader auxiliary + * \param pPicWidth picture width current Sps represented + * \param pPicHeight picture height current Sps represented + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case eNalUnitType is SPS. + ************************************************************************************* + */ + +int32_t ParseSps (PWelsDecoderContext pCtx, PBitStringAux pBsAux, int32_t* pPicWidth, int32_t* pPicHeight, + uint8_t* pSrcNal, const int32_t kSrcNalLen) { + PBitStringAux pBs = pBsAux; + SSubsetSps sTempSubsetSps; + PSps pSps = NULL; + PSubsetSps pSubsetSps = NULL; + SNalUnitHeader* pNalHead = &pCtx->sCurNalHead; + ProfileIdc uiProfileIdc; + uint8_t uiLevelIdc; + int32_t iSpsId; + uint32_t uiCode; + int32_t iCode; + int32_t iRet = ERR_NONE; + bool bConstraintSetFlags[6] = { false }; + const bool kbUseSubsetFlag = IS_SUBSET_SPS_NAL (pNalHead->eNalUnitType); + + WELS_READ_VERIFY (BsGetBits (pBs, 8, &uiCode)); //profile_idc + uiProfileIdc = uiCode; + if (uiProfileIdc != PRO_BASELINE && uiProfileIdc != PRO_MAIN && uiProfileIdc != PRO_SCALABLE_BASELINE + && uiProfileIdc != PRO_SCALABLE_HIGH + && uiProfileIdc != PRO_EXTENDED && uiProfileIdc != PRO_HIGH) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "SPS ID can not be supported!\n"); + return false; + } + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constraint_set0_flag + bConstraintSetFlags[0] = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constraint_set1_flag + bConstraintSetFlags[1] = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constraint_set2_flag + bConstraintSetFlags[2] = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constraint_set3_flag + bConstraintSetFlags[3] = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constraint_set4_flag + bConstraintSetFlags[4] = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constraint_set5_flag + bConstraintSetFlags[5] = !!uiCode; + WELS_READ_VERIFY (BsGetBits (pBs, 2, &uiCode)); // reserved_zero_2bits, equal to 0 + WELS_READ_VERIFY (BsGetBits (pBs, 8, &uiCode)); // level_idc + uiLevelIdc = uiCode; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //seq_parameter_set_id + if (uiCode >= MAX_SPS_COUNT) { // Modified to check invalid negative iSpsId, 12/1/2009 + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, " iSpsId is out of range! \n"); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_SPS_ID_OVERFLOW); + } + iSpsId = uiCode; + pSubsetSps = &sTempSubsetSps; + pSps = &sTempSubsetSps.sSps; + memset (pSubsetSps, 0, sizeof (SSubsetSps)); + // Use the level 5.2 for compatibility + const SLevelLimits* pSMaxLevelLimits = GetLevelLimits (52, false); + const SLevelLimits* pSLevelLimits = GetLevelLimits (uiLevelIdc, bConstraintSetFlags[3]); + if (NULL == pSLevelLimits) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "ParseSps(): level_idx (%d).\n", uiLevelIdc); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_NON_BASELINE); + } else pSps->pSLevelLimits = pSLevelLimits; + // syntax elements in default + pSps->uiChromaFormatIdc = 1; + pSps->uiChromaArrayType = 1; + + pSps->uiProfileIdc = uiProfileIdc; + pSps->uiLevelIdc = uiLevelIdc; + pSps->iSpsId = iSpsId; + + if (PRO_SCALABLE_BASELINE == uiProfileIdc || PRO_SCALABLE_HIGH == uiProfileIdc || + PRO_HIGH == uiProfileIdc || PRO_HIGH10 == uiProfileIdc || + PRO_HIGH422 == uiProfileIdc || PRO_HIGH444 == uiProfileIdc || + PRO_CAVLC444 == uiProfileIdc || 44 == uiProfileIdc) { + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //chroma_format_idc + pSps->uiChromaFormatIdc = uiCode; +// if (pSps->uiChromaFormatIdc != 1) { +// WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "ParseSps(): chroma_format_idc (%d) = 1 supported.", +// pSps->uiChromaFormatIdc); +// return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_NON_BASELINE); +// } + if (pSps->uiChromaFormatIdc > 1) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "ParseSps(): chroma_format_idc (%d) <=1 supported.", + pSps->uiChromaFormatIdc); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_NON_BASELINE); + + }// To support 4:0:0; 4:2:0 + pSps->uiChromaArrayType = pSps->uiChromaFormatIdc; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //bit_depth_luma_minus8 + if (uiCode != 0) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "ParseSps(): bit_depth_luma (%d) Only 8 bit supported.", 8 + uiCode); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_NON_BASELINE); + } + pSps->uiBitDepthLuma = 8; + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //bit_depth_chroma_minus8 + if (uiCode != 0) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "ParseSps(): bit_depth_chroma (%d). Only 8 bit supported.", 8 + uiCode); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_NON_BASELINE); + } + pSps->uiBitDepthChroma = 8; + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //qpprime_y_zero_transform_bypass_flag + pSps->bQpPrimeYZeroTransfBypassFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //seq_scaling_matrix_present_flag + pSps->bSeqScalingMatrixPresentFlag = !!uiCode; + + if (pSps->bSeqScalingMatrixPresentFlag) { + WELS_READ_VERIFY (ParseScalingList (pSps, pBs, 0, 0, pSps->bSeqScalingListPresentFlag, pSps->iScalingList4x4, + pSps->iScalingList8x8)); + } + } + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //log2_max_frame_num_minus4 + WELS_CHECK_SE_UPPER_ERROR (uiCode, SPS_LOG2_MAX_FRAME_NUM_MINUS4_MAX, "log2_max_frame_num_minus4", + GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_LOG2_MAX_FRAME_NUM_MINUS4)); + pSps->uiLog2MaxFrameNum = LOG2_MAX_FRAME_NUM_OFFSET + uiCode; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_order_cnt_type + pSps->uiPocType = uiCode; + + if (0 == pSps->uiPocType) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //log2_max_pic_order_cnt_lsb_minus4 + // log2_max_pic_order_cnt_lsb_minus4 should be in range 0 to 12, inclusive. (sec. 7.4.3) + WELS_CHECK_SE_UPPER_ERROR (uiCode, SPS_LOG2_MAX_PIC_ORDER_CNT_LSB_MINUS4_MAX, "log2_max_pic_order_cnt_lsb_minus4", + GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_LOG2_MAX_PIC_ORDER_CNT_LSB_MINUS4)); + pSps->iLog2MaxPocLsb = LOG2_MAX_PIC_ORDER_CNT_LSB_OFFSET + uiCode; // log2_max_pic_order_cnt_lsb_minus4 + + } else if (1 == pSps->uiPocType) { + int32_t i; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //delta_pic_order_always_zero_flag + pSps->bDeltaPicOrderAlwaysZeroFlag = !!uiCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //offset_for_non_ref_pic + pSps->iOffsetForNonRefPic = iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //offset_for_top_to_bottom_field + pSps->iOffsetForTopToBottomField = iCode; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_frames_in_pic_order_cnt_cycle + WELS_CHECK_SE_UPPER_ERROR (uiCode, SPS_NUM_REF_FRAMES_IN_PIC_ORDER_CNT_CYCLE_MAX, + "num_ref_frames_in_pic_order_cnt_cycle", GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, + ERR_INFO_INVALID_NUM_REF_FRAME_IN_PIC_ORDER_CNT_CYCLE)); + pSps->iNumRefFramesInPocCycle = uiCode; + for (i = 0; i < pSps->iNumRefFramesInPocCycle; i++) { + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //offset_for_ref_frame[ i ] + pSps->iOffsetForRefFrame[ i ] = iCode; + } + } + if (pSps->uiPocType > 2) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, " illegal pic_order_cnt_type: %d ! ", pSps->uiPocType); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_POC_TYPE); + } + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //max_num_ref_frames + pSps->iNumRefFrames = uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //gaps_in_frame_num_value_allowed_flag + pSps->bGapsInFrameNumValueAllowedFlag = !!uiCode; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_width_in_mbs_minus1 + pSps->iMbWidth = PIC_WIDTH_IN_MBS_OFFSET + uiCode; + if (pSps->iMbWidth > MAX_MB_SIZE || pSps->iMbWidth == 0) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "pic_width_in_mbs(%d) invalid!", pSps->iMbWidth); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_MAX_MB_SIZE); + } + if (((uint64_t)pSps->iMbWidth * (uint64_t)pSps->iMbWidth) > (uint64_t) (8 * pSLevelLimits->uiMaxFS)) { + if (((uint64_t)pSps->iMbWidth * (uint64_t)pSps->iMbWidth) > (uint64_t) (8 * pSMaxLevelLimits->uiMaxFS)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "the pic_width_in_mbs exceeds the level limits!"); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_MAX_MB_SIZE); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "the pic_width_in_mbs exceeds the level limits!"); + } + } + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_height_in_map_units_minus1 + pSps->iMbHeight = PIC_HEIGHT_IN_MAP_UNITS_OFFSET + uiCode; + if (pSps->iMbHeight > MAX_MB_SIZE || pSps->iMbHeight == 0) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "pic_height_in_mbs(%d) invalid!", pSps->iMbHeight); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_MAX_MB_SIZE); + } + if (((uint64_t)pSps->iMbHeight * (uint64_t)pSps->iMbHeight) > (uint64_t) (8 * pSLevelLimits->uiMaxFS)) { + if (((uint64_t)pSps->iMbHeight * (uint64_t)pSps->iMbHeight) > (uint64_t) (8 * pSMaxLevelLimits->uiMaxFS)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "the pic_height_in_mbs exceeds the level limits!"); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_MAX_MB_SIZE); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "the pic_height_in_mbs exceeds the level limits!"); + } + } + uint64_t uiTmp64 = (uint64_t)pSps->iMbWidth * (uint64_t)pSps->iMbHeight; + if (uiTmp64 > (uint64_t)pSLevelLimits->uiMaxFS) { + if (uiTmp64 > (uint64_t)pSMaxLevelLimits->uiMaxFS) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "the total count of mb exceeds the level limits!"); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_MAX_MB_SIZE); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "the total count of mb exceeds the level limits!"); + } + } + pSps->uiTotalMbCount = (uint32_t)uiTmp64; + WELS_CHECK_SE_UPPER_ERROR (pSps->iNumRefFrames, SPS_MAX_NUM_REF_FRAMES_MAX, "max_num_ref_frames", + GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_MAX_NUM_REF_FRAMES)); + // here we check max_num_ref_frames + uint32_t uiMaxDpbMbs = pSLevelLimits->uiMaxDPBMbs; + uint32_t uiMaxDpbFrames = uiMaxDpbMbs / pSps->uiTotalMbCount; + if (uiMaxDpbFrames > SPS_MAX_NUM_REF_FRAMES_MAX) + uiMaxDpbFrames = SPS_MAX_NUM_REF_FRAMES_MAX; + if ((uint32_t)pSps->iNumRefFrames > uiMaxDpbFrames) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, " max_num_ref_frames exceeds level limits!"); + } + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //frame_mbs_only_flag + pSps->bFrameMbsOnlyFlag = !!uiCode; + if (!pSps->bFrameMbsOnlyFlag) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "ParseSps(): frame_mbs_only_flag (%d) not supported.", + pSps->bFrameMbsOnlyFlag); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_MBAFF); + } + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //direct_8x8_inference_flag + pSps->bDirect8x8InferenceFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //frame_cropping_flag + pSps->bFrameCroppingFlag = !!uiCode; + if (pSps->bFrameCroppingFlag) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //frame_crop_left_offset + pSps->sFrameCrop.iLeftOffset = uiCode; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //frame_crop_right_offset + pSps->sFrameCrop.iRightOffset = uiCode; + if ((pSps->sFrameCrop.iLeftOffset + pSps->sFrameCrop.iRightOffset) > ((int32_t)pSps->iMbWidth * 16 / 2)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "frame_crop_left_offset + frame_crop_right_offset exceeds limits!"); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_CROPPING_DATA); + } + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //frame_crop_top_offset + pSps->sFrameCrop.iTopOffset = uiCode; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //frame_crop_bottom_offset + pSps->sFrameCrop.iBottomOffset = uiCode; + if ((pSps->sFrameCrop.iTopOffset + pSps->sFrameCrop.iBottomOffset) > ((int32_t)pSps->iMbHeight * 16 / 2)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "frame_crop_top_offset + frame_crop_right_offset exceeds limits!"); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_CROPPING_DATA); + } + } else { + pSps->sFrameCrop.iLeftOffset = 0; // frame_crop_left_offset + pSps->sFrameCrop.iRightOffset = 0; // frame_crop_right_offset + pSps->sFrameCrop.iTopOffset = 0; // frame_crop_top_offset + pSps->sFrameCrop.iBottomOffset = 0; // frame_crop_bottom_offset + } + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //vui_parameters_present_flag + pSps->bVuiParamPresentFlag = !!uiCode; + if (pSps->bVuiParamPresentFlag) { + int iRetVui = ParseVui (pCtx, pSps, pBsAux); + if (iRetVui == GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_VUI_HRD)) { + if (kbUseSubsetFlag) { //Currently do no support VUI with HRD enable in subsetSPS + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "hrd parse in vui of subsetSPS is not supported!"); + return iRetVui; + } + } else { + WELS_READ_VERIFY (iRetVui); + } + } + + if (pCtx->pParam->bParseOnly) { + if (kSrcNalLen >= SPS_PPS_BS_SIZE - 4) { //sps bs exceeds! + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sps payload size (%d) too large for parse only (%d), not supported!", + kSrcNalLen, SPS_PPS_BS_SIZE - 4); + pCtx->iErrorCode |= dsBitstreamError; + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_OUT_OF_MEMORY); + } + if (!kbUseSubsetFlag) { //SPS + SSpsBsInfo* pSpsBs = &pCtx->sSpsBsInfo [iSpsId]; + pSpsBs->iSpsId = iSpsId; + int32_t iTrailingZeroByte = 0; + while (pSrcNal[kSrcNalLen - iTrailingZeroByte - 1] == 0x0) //remove final trailing 0 bytes + iTrailingZeroByte++; + int32_t iActualLen = kSrcNalLen - iTrailingZeroByte; + pSpsBs->uiSpsBsLen = (uint16_t) iActualLen; + //unify start code as 0x0001 + int32_t iStartDeltaByte = 0; //0 for 0x0001, 1 for 0x001 + if (pSrcNal[0] == 0x0 && pSrcNal[1] == 0x0 && pSrcNal[2] == 0x1) { //if 0x001 + pSpsBs->pSpsBsBuf[0] = 0x0; //add 0 to form 0x0001 + iStartDeltaByte++; + pSpsBs->uiSpsBsLen++; + } + memcpy (pSpsBs->pSpsBsBuf + iStartDeltaByte, pSrcNal, iActualLen); + } else { //subset SPS + SSpsBsInfo* pSpsBs = &pCtx->sSubsetSpsBsInfo [iSpsId]; + pSpsBs->iSpsId = iSpsId; + pSpsBs->pSpsBsBuf [0] = pSpsBs->pSpsBsBuf [1] = pSpsBs->pSpsBsBuf [2] = 0x00; + pSpsBs->pSpsBsBuf [3] = 0x01; + pSpsBs->pSpsBsBuf [4] = 0x67; + + //re-write subset SPS to SPS + SBitStringAux sSubsetSpsBs; + CMemoryAlign* pMa = pCtx->pMemAlign; + + uint8_t* pBsBuf = static_cast (pMa->WelsMallocz (SPS_PPS_BS_SIZE + 4, + "Temp buffer for parse only usage.")); //to reserve 4 bytes for UVLC writing buffer + if (NULL == pBsBuf) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "sps buffer alloc failed for parse only!"); + pCtx->iErrorCode |= dsOutOfMemory; + return pCtx->iErrorCode; + } + InitBits (&sSubsetSpsBs, pBsBuf, (int32_t) (pBs->pEndBuf - pBs->pStartBuf)); + BsWriteBits (&sSubsetSpsBs, 8, 77); //profile_idc, forced to Main profile + BsWriteOneBit (&sSubsetSpsBs, pSps->bConstraintSet0Flag); // constraint_set0_flag + BsWriteOneBit (&sSubsetSpsBs, pSps->bConstraintSet1Flag); // constraint_set1_flag + BsWriteOneBit (&sSubsetSpsBs, pSps->bConstraintSet2Flag); // constraint_set2_flag + BsWriteOneBit (&sSubsetSpsBs, pSps->bConstraintSet3Flag); // constraint_set3_flag + BsWriteBits (&sSubsetSpsBs, 4, 0); //constraint_set4_flag, constraint_set5_flag, reserved_zero_2bits + BsWriteBits (&sSubsetSpsBs, 8, pSps->uiLevelIdc); //level_idc + BsWriteUE (&sSubsetSpsBs, pSps->iSpsId); //sps_id + BsWriteUE (&sSubsetSpsBs, pSps->uiLog2MaxFrameNum - 4); //log2_max_frame_num_minus4 + BsWriteUE (&sSubsetSpsBs, pSps->uiPocType); //pic_order_cnt_type + if (pSps->uiPocType == 0) { + BsWriteUE (&sSubsetSpsBs, pSps->iLog2MaxPocLsb - 4); //log2_max_pic_order_cnt_lsb_minus4 + } else if (pSps->uiPocType == 1) { + BsWriteOneBit (&sSubsetSpsBs, pSps->bDeltaPicOrderAlwaysZeroFlag); //delta_pic_order_always_zero_flag + BsWriteSE (&sSubsetSpsBs, pSps->iOffsetForNonRefPic); //offset_for_no_ref_pic + BsWriteSE (&sSubsetSpsBs, pSps->iOffsetForTopToBottomField); //offset_for_top_to_bottom_field + BsWriteUE (&sSubsetSpsBs, pSps->iNumRefFramesInPocCycle); //num_ref_frames_in_pic_order_cnt_cycle + for (int32_t i = 0; i < pSps->iNumRefFramesInPocCycle; ++i) { + BsWriteSE (&sSubsetSpsBs, pSps->iOffsetForRefFrame[i]); //offset_for_ref_frame[i] + } + } + BsWriteUE (&sSubsetSpsBs, pSps->iNumRefFrames); //max_num_ref_frames + BsWriteOneBit (&sSubsetSpsBs, pSps->bGapsInFrameNumValueAllowedFlag); //gaps_in_frame_num_value_allowed_flag + BsWriteUE (&sSubsetSpsBs, pSps->iMbWidth - 1); //pic_width_in_mbs_minus1 + BsWriteUE (&sSubsetSpsBs, pSps->iMbHeight - 1); //pic_height_in_map_units_minus1 + BsWriteOneBit (&sSubsetSpsBs, pSps->bFrameMbsOnlyFlag); //frame_mbs_only_flag + if (!pSps->bFrameMbsOnlyFlag) { + BsWriteOneBit (&sSubsetSpsBs, pSps->bMbaffFlag); //mb_adaptive_frame_field_flag + } + BsWriteOneBit (&sSubsetSpsBs, pSps->bDirect8x8InferenceFlag); //direct_8x8_inference_flag + BsWriteOneBit (&sSubsetSpsBs, pSps->bFrameCroppingFlag); //frame_cropping_flag + if (pSps->bFrameCroppingFlag) { + BsWriteUE (&sSubsetSpsBs, pSps->sFrameCrop.iLeftOffset); //frame_crop_left_offset + BsWriteUE (&sSubsetSpsBs, pSps->sFrameCrop.iRightOffset); //frame_crop_right_offset + BsWriteUE (&sSubsetSpsBs, pSps->sFrameCrop.iTopOffset); //frame_crop_top_offset + BsWriteUE (&sSubsetSpsBs, pSps->sFrameCrop.iBottomOffset); //frame_crop_bottom_offset + } + BsWriteOneBit (&sSubsetSpsBs, 0); //vui_parameters_present_flag + BsRbspTrailingBits (&sSubsetSpsBs); //finished, rbsp trailing bit + int32_t iRbspSize = (int32_t) (sSubsetSpsBs.pCurBuf - sSubsetSpsBs.pStartBuf); + RBSP2EBSP (pSpsBs->pSpsBsBuf + 5, sSubsetSpsBs.pStartBuf, iRbspSize); + pSpsBs->uiSpsBsLen = (uint16_t) (sSubsetSpsBs.pCurBuf - sSubsetSpsBs.pStartBuf + 5); + if (pBsBuf) { + pMa->WelsFree (pBsBuf, "pBsBuf for parse only usage"); + } + } + } + // Check if SPS SVC extension applicated + if (kbUseSubsetFlag && (PRO_SCALABLE_BASELINE == uiProfileIdc || PRO_SCALABLE_HIGH == uiProfileIdc)) { + if ((iRet = DecodeSpsSvcExt (pCtx, pSubsetSps, pBs)) != ERR_NONE) { + return iRet; + } + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //svc_vui_parameters_present_flag + pSubsetSps->bSvcVuiParamPresentFlag = !!uiCode; + if (pSubsetSps->bSvcVuiParamPresentFlag) { + } + } + + + if (PRO_SCALABLE_BASELINE == uiProfileIdc || PRO_SCALABLE_HIGH == uiProfileIdc) + pCtx->sSpsPpsCtx.bAvcBasedFlag = false; + + *pPicWidth = pSps->iMbWidth << 4; + *pPicHeight = pSps->iMbHeight << 4; + PSps pTmpSps = NULL; + if (kbUseSubsetFlag) { + pTmpSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId].sSps; + } else { + pTmpSps = &pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId]; + } + if (CheckSpsActive (pCtx, pTmpSps, kbUseSubsetFlag)) { + // we are overwriting the active sps, copy a temp buffer + if (kbUseSubsetFlag) { + if (memcmp (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)) != 0) { + if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps)); + pCtx->bAuReadyFlag = true; + pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS; + } else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSubsetSps->sSps.iSpsId)) { + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS; + } else { + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)); + } + } + } else { + if (memcmp (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)) != 0) { + if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS; + pCtx->bAuReadyFlag = true; + pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; + } else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSps->iSpsId)) { + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS; + } else { + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)); + } + } + } + } + // Not overwrite active sps, just copy to final place + else if (kbUseSubsetFlag) { + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)); + pCtx->sSpsPpsCtx.bSubspsAvailFlags[iSpsId] = true; + pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = true; + } else { + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)); + pCtx->sSpsPpsCtx.bSpsAvailFlags[iSpsId] = true; + pCtx->sSpsPpsCtx.bSpsExistAheadFlag = true; + } + return ERR_NONE; +} + +/*! + ************************************************************************************* + * \brief to parse Picture Parameter Set (PPS) + * + * \param pCtx Decoder context + * \param pPpsList pps list + * \param pBsAux bitstream reader auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case eNalUnitType is PPS. + ************************************************************************************* + */ +int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux, uint8_t* pSrcNal, + const int32_t kSrcNalLen) { + + PPps pPps = NULL; + SPps sTempPps; + uint32_t uiPpsId = 0; + uint32_t iTmp; + uint32_t uiCode; + int32_t iCode; + + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //pic_parameter_set_id + uiPpsId = uiCode; + if (uiPpsId >= MAX_PPS_COUNT) { + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_PPS_ID_OVERFLOW); + } + pPps = &sTempPps; + memset (pPps, 0, sizeof (SPps)); + + pPps->iPpsId = uiPpsId; + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //seq_parameter_set_id + pPps->iSpsId = uiCode; + + if (pPps->iSpsId >= MAX_SPS_COUNT) { + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_SPS_ID_OVERFLOW); + } + + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //entropy_coding_mode_flag + pPps->bEntropyCodingModeFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //bottom_field_pic_order_in_frame_present_flag + pPps->bPicOrderPresentFlag = !!uiCode; + + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //num_slice_groups_minus1 + pPps->uiNumSliceGroups = NUM_SLICE_GROUPS_OFFSET + uiCode; + + if (pPps->uiNumSliceGroups > MAX_SLICEGROUP_IDS) { + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_SLICEGROUP); + } + + if (pPps->uiNumSliceGroups > 1) { + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //slice_group_map_type + pPps->uiSliceGroupMapType = uiCode; + if (pPps->uiSliceGroupMapType > 1) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "ParsePps(): slice_group_map_type (%d): support only 0,1.", + pPps->uiSliceGroupMapType); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_FMOTYPE); + } + + switch (pPps->uiSliceGroupMapType) { + case 0: + for (iTmp = 0; iTmp < pPps->uiNumSliceGroups; iTmp++) { + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //run_length_minus1[ iGroup ] + pPps->uiRunLength[iTmp] = RUN_LENGTH_OFFSET + uiCode; + } + break; + default: + break; + } + } + + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //num_ref_idx_l0_default_active_minus1 + pPps->uiNumRefIdxL0Active = NUM_REF_IDX_L0_DEFAULT_ACTIVE_OFFSET + uiCode; + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //num_ref_idx_l1_default_active_minus1 + pPps->uiNumRefIdxL1Active = NUM_REF_IDX_L1_DEFAULT_ACTIVE_OFFSET + uiCode; + + if (pPps->uiNumRefIdxL0Active > MAX_REF_PIC_COUNT || + pPps->uiNumRefIdxL1Active > MAX_REF_PIC_COUNT) { + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_REF_COUNT_OVERFLOW); + } + + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //weighted_pred_flag + pPps->bWeightedPredFlag = !!uiCode; + WELS_READ_VERIFY (BsGetBits (pBsAux, 2, &uiCode)); //weighted_bipred_idc + pPps->uiWeightedBipredIdc = uiCode; + // weighted_bipred_idc > 0 NOT supported now, but no impact when we ignore it + + WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //pic_init_qp_minus26 + pPps->iPicInitQp = PIC_INIT_QP_OFFSET + iCode; + WELS_CHECK_SE_BOTH_ERROR (pPps->iPicInitQp, PPS_PIC_INIT_QP_QS_MIN, PPS_PIC_INIT_QP_QS_MAX, "pic_init_qp_minus26 + 26", + GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_PIC_INIT_QP)); + WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //pic_init_qs_minus26 + pPps->iPicInitQs = PIC_INIT_QS_OFFSET + iCode; + WELS_CHECK_SE_BOTH_ERROR (pPps->iPicInitQs, PPS_PIC_INIT_QP_QS_MIN, PPS_PIC_INIT_QP_QS_MAX, "pic_init_qs_minus26 + 26", + GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_PIC_INIT_QS)); + WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //chroma_qp_index_offset,cb + pPps->iChromaQpIndexOffset[0] = iCode; + WELS_CHECK_SE_BOTH_ERROR (pPps->iChromaQpIndexOffset[0], PPS_CHROMA_QP_INDEX_OFFSET_MIN, PPS_CHROMA_QP_INDEX_OFFSET_MAX, + "chroma_qp_index_offset", GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET)); + pPps->iChromaQpIndexOffset[1] = pPps->iChromaQpIndexOffset[0];//init cr qp offset + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //deblocking_filter_control_present_flag + pPps->bDeblockingFilterControlPresentFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //constrained_intra_pred_flag + pPps->bConstainedIntraPredFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //redundant_pic_cnt_present_flag + pPps->bRedundantPicCntPresentFlag = !!uiCode; + + if (CheckMoreRBSPData (pBsAux)) { + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //transform_8x8_mode_flag + pPps->bTransform8x8ModeFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_scaling_matrix_present_flag + pPps->bPicScalingMatrixPresentFlag = !!uiCode; + if (pPps->bPicScalingMatrixPresentFlag) { + if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId]) { + WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bTransform8x8ModeFlag, + pPps->bPicScalingListPresentFlag, pPps->iScalingList4x4, pPps->iScalingList8x8)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "ParsePps(): sps_id (%d) does not exist for scaling_list. This PPS (%d) is marked as invalid.", pPps->iSpsId, + pPps->iPpsId); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_SPS_ID); + } + } + WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //second_chroma_qp_index_offset + pPps->iChromaQpIndexOffset[1] = iCode; + WELS_CHECK_SE_BOTH_ERROR (pPps->iChromaQpIndexOffset[1], PPS_CHROMA_QP_INDEX_OFFSET_MIN, + PPS_CHROMA_QP_INDEX_OFFSET_MAX, "chroma_qp_index_offset", GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, + ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET)); + } + + if (pCtx->pPps != NULL && pCtx->pPps->iPpsId == pPps->iPpsId) { + if (memcmp (pCtx->pPps, pPps, sizeof (*pPps)) != 0) { + memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], pPps, sizeof (SPps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_PPS; + if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { + pCtx->bAuReadyFlag = true; + pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; + } + } + } else { + memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[uiPpsId], pPps, sizeof (SPps)); + pCtx->sSpsPpsCtx.bPpsAvailFlags[uiPpsId] = true; + } + if (pCtx->pParam->bParseOnly) { + if (kSrcNalLen >= SPS_PPS_BS_SIZE - 4) { //pps bs exceeds + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "pps payload size (%d) too large for parse only (%d), not supported!", + kSrcNalLen, SPS_PPS_BS_SIZE - 4); + pCtx->iErrorCode |= dsBitstreamError; + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_OUT_OF_MEMORY); + } + SPpsBsInfo* pPpsBs = &pCtx->sPpsBsInfo [uiPpsId]; + pPpsBs->iPpsId = (int32_t) uiPpsId; + int32_t iTrailingZeroByte = 0; + while (pSrcNal[kSrcNalLen - iTrailingZeroByte - 1] == 0x0) //remove final trailing 0 bytes + iTrailingZeroByte++; + int32_t iActualLen = kSrcNalLen - iTrailingZeroByte; + pPpsBs->uiPpsBsLen = (uint16_t) iActualLen; + //unify start code as 0x0001 + int32_t iStartDeltaByte = 0; //0 for 0x0001, 1 for 0x001 + if (pSrcNal[0] == 0x0 && pSrcNal[1] == 0x0 && pSrcNal[2] == 0x1) { //if 0x001 + pPpsBs->pPpsBsBuf[0] = 0x0; //add 0 to form 0x0001 + iStartDeltaByte++; + pPpsBs->uiPpsBsLen++; + } + memcpy (pPpsBs->pPpsBsBuf + iStartDeltaByte, pSrcNal, iActualLen); + } + return ERR_NONE; +} + +#define VUI_MAX_CHROMA_LOG_TYPE_TOP_BOTTOM_FIELD_MAX 5 +#define VUI_NUM_UNITS_IN_TICK_MIN 1 +#define VUI_TIME_SCALE_MIN 1 +#define VUI_MAX_BYTES_PER_PIC_DENOM_MAX 16 +#define VUI_MAX_BITS_PER_MB_DENOM_MAX 16 +#define VUI_LOG2_MAX_MV_LENGTH_HOR_MAX 16 +#define VUI_LOG2_MAX_MV_LENGTH_VER_MAX 16 +#define VUI_MAX_DEC_FRAME_BUFFERING_MAX 16 +int32_t ParseVui (PWelsDecoderContext pCtx, PSps pSps, PBitStringAux pBsAux) { + uint32_t uiCode; + PVui pVui = &pSps->sVui; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //aspect_ratio_info_present_flag + pVui->bAspectRatioInfoPresentFlag = !!uiCode; + if (pSps->sVui.bAspectRatioInfoPresentFlag) { + WELS_READ_VERIFY (BsGetBits (pBsAux, 8, &uiCode)); //aspect_ratio_idc + pVui->uiAspectRatioIdc = uiCode; + if (pVui->uiAspectRatioIdc < 17) { + pVui->uiSarWidth = g_ksVuiSampleAspectRatio[pVui->uiAspectRatioIdc].uiWidth; + pVui->uiSarHeight = g_ksVuiSampleAspectRatio[pVui->uiAspectRatioIdc].uiHeight; + } else if (pVui->uiAspectRatioIdc == EXTENDED_SAR) { + WELS_READ_VERIFY (BsGetBits (pBsAux, 16, &uiCode)); //sar_width + pVui->uiSarWidth = uiCode; + WELS_READ_VERIFY (BsGetBits (pBsAux, 16, &uiCode)); //sar_height + pVui->uiSarHeight = uiCode; + } + } + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //overscan_info_present_flag + pVui->bOverscanInfoPresentFlag = !!uiCode; + if (pVui->bOverscanInfoPresentFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //overscan_appropriate_flag + pVui->bOverscanAppropriateFlag = !!uiCode; + } + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //video_signal_type_present_flag + pVui->bVideoSignalTypePresentFlag = !!uiCode; + if (pVui->bVideoSignalTypePresentFlag) { + WELS_READ_VERIFY (BsGetBits (pBsAux, 3, &uiCode)); //video_format + pVui->uiVideoFormat = uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //video_full_range_flag + pVui->bVideoFullRangeFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //colour_description_present_flag + pVui->bColourDescripPresentFlag = !!uiCode; + if (pVui->bColourDescripPresentFlag) { + WELS_READ_VERIFY (BsGetBits (pBsAux, 8, &uiCode)); //colour_primaries + pVui->uiColourPrimaries = uiCode; + WELS_READ_VERIFY (BsGetBits (pBsAux, 8, &uiCode)); //transfer_characteristics + pVui->uiTransferCharacteristics = uiCode; + WELS_READ_VERIFY (BsGetBits (pBsAux, 8, &uiCode)); //matrix_coefficients + pVui->uiMatrixCoeffs = uiCode; + } + } + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //chroma_loc_info_present_flag + pVui->bChromaLocInfoPresentFlag = !!uiCode; + if (pVui->bChromaLocInfoPresentFlag) { + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //chroma_sample_loc_type_top_field + pVui->uiChromaSampleLocTypeTopField = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiChromaSampleLocTypeTopField, VUI_MAX_CHROMA_LOG_TYPE_TOP_BOTTOM_FIELD_MAX, + "chroma_sample_loc_type_top_field"); + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //chroma_sample_loc_type_bottom_field + pVui->uiChromaSampleLocTypeBottomField = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiChromaSampleLocTypeBottomField, VUI_MAX_CHROMA_LOG_TYPE_TOP_BOTTOM_FIELD_MAX, + "chroma_sample_loc_type_bottom_field"); + } + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //timing_info_present_flag + pVui->bTimingInfoPresentFlag = !!uiCode; + if (pVui->bTimingInfoPresentFlag) { + uint32_t uiTmp = 0; + WELS_READ_VERIFY (BsGetBits (pBsAux, 16, &uiCode)); //num_units_in_tick + uiTmp = (uiCode << 16); + WELS_READ_VERIFY (BsGetBits (pBsAux, 16, &uiCode)); //num_units_in_tick + uiTmp |= uiCode; + pVui->uiNumUnitsInTick = uiTmp; + WELS_CHECK_SE_LOWER_WARNING (pVui->uiNumUnitsInTick, VUI_NUM_UNITS_IN_TICK_MIN, "num_units_in_tick"); + WELS_READ_VERIFY (BsGetBits (pBsAux, 16, &uiCode)); //time_scale + uiTmp = (uiCode << 16); + WELS_READ_VERIFY (BsGetBits (pBsAux, 16, &uiCode)); //time_scale + uiTmp |= uiCode; + pVui->uiTimeScale = uiTmp; + WELS_CHECK_SE_LOWER_WARNING (pVui->uiNumUnitsInTick, VUI_TIME_SCALE_MIN, "time_scale"); + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //fixed_frame_rate_flag + pVui->bFixedFrameRateFlag = !!uiCode; + } + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //nal_hrd_parameters_present_flag + pVui->bNalHrdParamPresentFlag = !!uiCode; + if (pVui->bNalHrdParamPresentFlag) { //Add HRD parse. the values are not being used though. +#ifdef _PARSE_NALHRD_VCLHRD_PARAMS_ + int32_t cpb_cnt_minus1 = BsGetUe (pBsAux, &uiCode); + /*bit_rate_scale = */BsGetBits (pBsAux, 4, &uiCode); + /*cpb_size_scale = */BsGetBits (pBsAux, 4, &uiCode); + for (int32_t i = 0; i <= cpb_cnt_minus1; i++) { + /*bit_rate_value_minus1[i] = */BsGetUe (pBsAux, &uiCode); + /*cpb_size_value_minus1[i] = */BsGetUe (pBsAux, &uiCode); + /*cbr_flag[i] = */BsGetOneBit (pBsAux, &uiCode); + } + /*initial_cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode); + /*cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode); + /*dpb_output_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode); + /*time_offset_length = */BsGetBits (pBsAux, 5, &uiCode); +#else + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "nal_hrd_parameters_present_flag = 1 not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_VUI_HRD); +#endif + } + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //vcl_hrd_parameters_present_flag + pVui->bVclHrdParamPresentFlag = !!uiCode; + if (pVui->bVclHrdParamPresentFlag) {//Add HRD parse. the values are not being used though. +#ifdef _PARSE_NALHRD_VCLHRD_PARAMS_ + int32_t cpb_cnt_minus1 = BsGetUe (pBsAux, &uiCode); + /*bit_rate_scale = */BsGetBits (pBsAux, 4, &uiCode); + /*cpb_size_scale = */BsGetBits (pBsAux, 4, &uiCode); + for (int32_t i = 0; i <= cpb_cnt_minus1; i++) { + /*bit_rate_value_minus1[i] = */BsGetUe (pBsAux, &uiCode); + /*cpb_size_value_minus1[i] = */BsGetUe (pBsAux, &uiCode); + /*cbr_flag[i] = */BsGetOneBit (pBsAux, &uiCode); + } + /*initial_cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode); + /*cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode); + /*dpb_output_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode); + /*time_offset_length = */BsGetBits (pBsAux, 5, &uiCode); +#else + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "vcl_hrd_parameters_present_flag = 1 not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_VUI_HRD); +#endif + } +#ifdef _PARSE_NALHRD_VCLHRD_PARAMS_ + if (pVui->bNalHrdParamPresentFlag | pVui->bVclHrdParamPresentFlag) { + /*low_delay_hrd_flag = */BsGetOneBit (pBsAux, &uiCode); + } +#endif + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_struct_present_flag + pVui->bPicStructPresentFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //bitstream_restriction_flag + pVui->bBitstreamRestrictionFlag = !!uiCode; + if (pVui->bBitstreamRestrictionFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //motion_vectors_over_pic_boundaries_flag + pVui->bMotionVectorsOverPicBoundariesFlag = !!uiCode; + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //max_bytes_per_pic_denom + pVui->uiMaxBytesPerPicDenom = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiMaxBytesPerPicDenom, VUI_MAX_BYTES_PER_PIC_DENOM_MAX, + "max_bytes_per_pic_denom"); + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //max_bits_per_mb_denom + pVui->uiMaxBitsPerMbDenom = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiMaxBitsPerMbDenom, VUI_MAX_BITS_PER_MB_DENOM_MAX, + "max_bits_per_mb_denom"); + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //log2_max_mv_length_horizontal + pVui->uiLog2MaxMvLengthHorizontal = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiLog2MaxMvLengthHorizontal, VUI_LOG2_MAX_MV_LENGTH_HOR_MAX, + "log2_max_mv_length_horizontal"); + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //log2_max_mv_length_vertical + pVui->uiLog2MaxMvLengthVertical = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiLog2MaxMvLengthVertical, VUI_LOG2_MAX_MV_LENGTH_VER_MAX, + "log2_max_mv_length_vertical"); + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //max_num_reorder_frames + pVui->uiMaxNumReorderFrames = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiMaxNumReorderFrames, VUI_MAX_DEC_FRAME_BUFFERING_MAX, + "max_num_reorder_frames"); + WELS_READ_VERIFY (BsGetUe (pBsAux, &uiCode)); //max_dec_frame_buffering + pVui->uiMaxDecFrameBuffering = uiCode; + WELS_CHECK_SE_UPPER_WARNING (pVui->uiMaxDecFrameBuffering, VUI_MAX_DEC_FRAME_BUFFERING_MAX, + "max_num_reorder_frames"); + } + return ERR_NONE; +} +/*! + ************************************************************************************* + * \brief to parse SEI message payload + * + * \param pSei sei message to be parsed output + * \param pBsAux bitstream reader auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case eNalUnitType is NAL_UNIT_SEI. + ************************************************************************************* + */ +int32_t ParseSei (void* pSei, PBitStringAux pBsAux) { // reserved Sei_Msg type + + + return ERR_NONE; +} +/* + ************************************************************************************* + * \brief to parse scalinglist message payload + * + * \param pps sps scaling list matrix message to be parsed output + * \param pBsAux bitstream reader auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case scaling list matrix present at sps or pps level + ************************************************************************************* + */ +int32_t SetScalingListValue (uint8_t* pScalingList, int iScalingListNum, bool* bUseDefaultScalingMatrixFlag, + PBitStringAux pBsAux) { // reserved Sei_Msg type + int iLastScale = 8; + int iNextScale = 8; + int iDeltaScale; + int32_t iCode; + int32_t iIdx; + for (int j = 0; j < iScalingListNum; j++) { + if (iNextScale != 0) { + WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); + WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, SCALING_LIST_DELTA_SCALE_MIN, SCALING_LIST_DELTA_SCALE_MAX, "DeltaScale", + ERR_SCALING_LIST_DELTA_SCALE); + iDeltaScale = iCode; + iNextScale = (iLastScale + iDeltaScale + 256) % 256; + *bUseDefaultScalingMatrixFlag = (j == 0 && iNextScale == 0); + if (*bUseDefaultScalingMatrixFlag) + break; + } + iIdx = iScalingListNum == 16 ? g_kuiZigzagScan[j] : g_kuiZigzagScan8x8[j]; + pScalingList[iIdx] = (iNextScale == 0) ? iLastScale : iNextScale; + iLastScale = pScalingList[iIdx]; + } + + + return ERR_NONE; +} + +int32_t ParseScalingList (PSps pSps, PBitStringAux pBs, bool bPPS, const bool kbTrans8x8ModeFlag, + bool* pScalingListPresentFlag, uint8_t (*iScalingList4x4)[16], uint8_t (*iScalingList8x8)[64]) { + uint32_t uiScalingListNum; + uint32_t uiCode; + + bool bUseDefaultScalingMatrixFlag4x4 = false; + bool bUseDefaultScalingMatrixFlag8x8 = false; + bool bInit = false; + const uint8_t* defaultScaling[4]; + + if (!bPPS) { //sps scaling_list + uiScalingListNum = (pSps->uiChromaFormatIdc != 3) ? 8 : 12; + } else { //pps scaling_list + uiScalingListNum = 6 + (int32_t) kbTrans8x8ModeFlag * ((pSps->uiChromaFormatIdc != 3) ? 2 : 6); + bInit = pSps->bSeqScalingMatrixPresentFlag; + } + +//Init default_scaling_list value for sps or pps + defaultScaling[0] = bInit ? pSps->iScalingList4x4[0] : g_kuiDequantScaling4x4Default[0]; + defaultScaling[1] = bInit ? pSps->iScalingList4x4[3] : g_kuiDequantScaling4x4Default[1]; + defaultScaling[2] = bInit ? pSps->iScalingList8x8[0] : g_kuiDequantScaling8x8Default[0]; + defaultScaling[3] = bInit ? pSps->iScalingList8x8[1] : g_kuiDequantScaling8x8Default[1]; + + for (unsigned int i = 0; i < uiScalingListNum; i++) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); + pScalingListPresentFlag[i] = !!uiCode; + if (!!uiCode) { + if (i < 6) {//4x4 scaling list + WELS_READ_VERIFY (SetScalingListValue (iScalingList4x4[i], 16, &bUseDefaultScalingMatrixFlag4x4, pBs)); + if (bUseDefaultScalingMatrixFlag4x4) { + bUseDefaultScalingMatrixFlag4x4 = false; + memcpy (iScalingList4x4[i], g_kuiDequantScaling4x4Default[i / 3], sizeof (uint8_t) * 16); + } + + + } else { + WELS_READ_VERIFY (SetScalingListValue (iScalingList8x8[i - 6], 64, &bUseDefaultScalingMatrixFlag8x8, pBs)); + + if (bUseDefaultScalingMatrixFlag8x8) { + bUseDefaultScalingMatrixFlag8x8 = false; + memcpy (iScalingList8x8[i - 6], g_kuiDequantScaling8x8Default[ (i - 6) & 1], sizeof (uint8_t) * 64); + } + } + + } else { + if (i < 6) { + if ((i != 0) && (i != 3)) + memcpy (iScalingList4x4[i], iScalingList4x4[i - 1], sizeof (uint8_t) * 16); + else + memcpy (iScalingList4x4[i], defaultScaling[i / 3], sizeof (uint8_t) * 16); + + } else { + if ((i == 6) || (i == 7)) + memcpy (iScalingList8x8[i - 6], defaultScaling[ (i & 1) + 2], sizeof (uint8_t) * 64); + else + memcpy (iScalingList8x8[i - 6], iScalingList8x8[i - 8], sizeof (uint8_t) * 64); + + } + } + } + return ERR_NONE; + +} + +/*! + ************************************************************************************* + * \brief reset fmo list due to got Sps now + * + * \param pCtx decoder context + * + * \return count number of fmo context units are reset + ************************************************************************************* + */ +int32_t ResetFmoList (PWelsDecoderContext pCtx) { + int32_t iCountNum = 0; + if (NULL != pCtx) { + // Fixed memory leak due to PPS_ID might not be continuous sometimes, 1/5/2010 + UninitFmoList (&pCtx->sFmoList[0], MAX_PPS_COUNT, pCtx->iActiveFmoNum, pCtx->pMemAlign); + iCountNum = pCtx->iActiveFmoNum; + pCtx->iActiveFmoNum = 0; + } + return iCountNum; +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/bit_stream.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/bit_stream.cpp new file mode 100644 index 000000000..c75260426 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/bit_stream.cpp @@ -0,0 +1,110 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file bit_stream.cpp + * + * \brief Reading / writing bit-stream + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ +#include "bit_stream.h" +#include "error_code.h" + +namespace WelsDec { + +inline uint32_t GetValue4Bytes (uint8_t* pDstNal) { + uint32_t uiValue = 0; + uiValue = (pDstNal[0] << 24) | (pDstNal[1] << 16) | (pDstNal[2] << 8) | (pDstNal[3]); + return uiValue; +} + +int32_t InitReadBits (PBitStringAux pBitString, intX_t iEndOffset) { + if (pBitString->pCurBuf >= (pBitString->pEndBuf - iEndOffset)) { + return ERR_INFO_INVALID_ACCESS; + } + pBitString->uiCurBits = GetValue4Bytes (pBitString->pCurBuf); + pBitString->pCurBuf += 4; + pBitString->iLeftBits = -16; + return ERR_NONE; +} + +/*! + * \brief input bits for decoder or initialize bitstream writing in encoder + * + * \param pBitString Bit string auxiliary pointer + * \param kpBuf bit-stream buffer + * \param kiSize size in bits for decoder; size in bytes for encoder + * + * \return 0: success, other: fail + */ +int32_t DecInitBits (PBitStringAux pBitString, const uint8_t* kpBuf, const int32_t kiSize) { + const int32_t kiSizeBuf = (kiSize + 7) >> 3; + uint8_t* pTmp = (uint8_t*)kpBuf; + + if (NULL == pTmp) + return ERR_INFO_INVALID_ACCESS; + + pBitString->pStartBuf = pTmp; // buffer to start position + pBitString->pEndBuf = pTmp + kiSizeBuf; // buffer + length + pBitString->iBits = kiSize; // count bits of overall bitstreaming inputindex; + pBitString->pCurBuf = pBitString->pStartBuf; + int32_t iErr = InitReadBits (pBitString, 0); + if (iErr) { + return iErr; + } + return ERR_NONE; +} + +void RBSP2EBSP (uint8_t* pDstBuf, uint8_t* pSrcBuf, const int32_t kiSize) { + uint8_t* pSrcPointer = pSrcBuf; + uint8_t* pDstPointer = pDstBuf; + uint8_t* pSrcEnd = pSrcBuf + kiSize; + int32_t iZeroCount = 0; + + while (pSrcPointer < pSrcEnd) { + if (iZeroCount == 2 && *pSrcPointer <= 3) { + //add the code 0x03 + *pDstPointer++ = 3; + iZeroCount = 0; + } + if (*pSrcPointer == 0) { + ++ iZeroCount; + } else { + iZeroCount = 0; + } + *pDstPointer++ = *pSrcPointer++; + } +} + +} // namespace WelsDec + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/cabac_decoder.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/cabac_decoder.cpp new file mode 100644 index 000000000..32be5513a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/cabac_decoder.cpp @@ -0,0 +1,333 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * cabac_decoder.cpp: deals with cabac state transition and related functions + */ +#include "cabac_decoder.h" +namespace WelsDec { +static const int16_t g_kMvdBinPos2Ctx [8] = {0, 1, 2, 3, 3, 3, 3, 3}; + +void WelsCabacGlobalInit (PWelsDecoderContext pCtx) { + for (int32_t iModel = 0; iModel < 4; iModel++) { + for (int32_t iQp = 0; iQp <= WELS_QP_MAX; iQp++) + for (int32_t iIdx = 0; iIdx < WELS_CONTEXT_COUNT; iIdx++) { + int32_t m = g_kiCabacGlobalContextIdx[iIdx][iModel][0]; + int32_t n = g_kiCabacGlobalContextIdx[iIdx][iModel][1]; + int32_t iPreCtxState = WELS_CLIP3 ((((m * iQp) >> 4) + n), 1, 126); + uint8_t uiValMps = 0; + uint8_t uiStateIdx = 0; + if (iPreCtxState <= 63) { + uiStateIdx = 63 - iPreCtxState; + uiValMps = 0; + } else { + uiStateIdx = iPreCtxState - 64; + uiValMps = 1; + } + pCtx->sWelsCabacContexts[iModel][iQp][iIdx].uiState = uiStateIdx; + pCtx->sWelsCabacContexts[iModel][iQp][iIdx].uiMPS = uiValMps; + } + } + pCtx->bCabacInited = true; +} + +// ------------------- 1. context initialization +void WelsCabacContextInit (PWelsDecoderContext pCtx, uint8_t eSliceType, int32_t iCabacInitIdc, int32_t iQp) { + int32_t iIdx = pCtx->eSliceType == WelsCommon::I_SLICE ? 0 : iCabacInitIdc + 1; + if (!pCtx->bCabacInited) { + WelsCabacGlobalInit (pCtx); + } + memcpy (pCtx->pCabacCtx, pCtx->sWelsCabacContexts[iIdx][iQp], + WELS_CONTEXT_COUNT * sizeof (SWelsCabacCtx)); +} + +// ------------------- 2. decoding Engine initialization +int32_t InitCabacDecEngineFromBS (PWelsCabacDecEngine pDecEngine, PBitStringAux pBsAux) { + int32_t iRemainingBits = - pBsAux->iLeftBits; //pBsAux->iLeftBits < 0 + int32_t iRemainingBytes = (iRemainingBits >> 3) + 2; //+2: indicating the pre-read 2 bytes + uint8_t* pCurr; + + pCurr = pBsAux->pCurBuf - iRemainingBytes; + if (pCurr >= (pBsAux->pEndBuf - 1)) { + return ERR_INFO_INVALID_ACCESS; + } + pDecEngine->uiOffset = ((pCurr[0] << 16) | (pCurr[1] << 8) | pCurr[2]); + pDecEngine->uiOffset <<= 16; + pDecEngine->uiOffset |= (pCurr[3] << 8) | pCurr[4]; + pDecEngine->iBitsLeft = 31; + pDecEngine->pBuffCurr = pCurr + 5; + + pDecEngine->uiRange = WELS_CABAC_HALF; + pDecEngine->pBuffStart = pBsAux->pStartBuf; + pDecEngine->pBuffEnd = pBsAux->pEndBuf; + pBsAux->iLeftBits = 0; + return ERR_NONE; +} + +void RestoreCabacDecEngineToBS (PWelsCabacDecEngine pDecEngine, PBitStringAux pBsAux) { + //CABAC decoding finished, changing to SBitStringAux + pDecEngine->pBuffCurr -= (pDecEngine->iBitsLeft >> 3); + pDecEngine->iBitsLeft = 0; //pcm_alignment_zero_bit in CABAC + pBsAux->iLeftBits = 0; + pBsAux->pStartBuf = pDecEngine->pBuffStart; + pBsAux->pCurBuf = pDecEngine->pBuffCurr; + pBsAux->uiCurBits = 0; + pBsAux->iIndex = 0; +} + +// ------------------- 3. actual decoding +int32_t Read32BitsCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiValue, int32_t& iNumBitsRead) { + intX_t iLeftBytes = pDecEngine->pBuffEnd - pDecEngine->pBuffCurr; + iNumBitsRead = 0; + uiValue = 0; + if (iLeftBytes <= 0) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_CABAC_NO_BS_TO_READ); + } + switch (iLeftBytes) { + case 3: + uiValue = ((pDecEngine->pBuffCurr[0]) << 16 | (pDecEngine->pBuffCurr[1]) << 8 | (pDecEngine->pBuffCurr[2])); + pDecEngine->pBuffCurr += 3; + iNumBitsRead = 24; + break; + case 2: + uiValue = ((pDecEngine->pBuffCurr[0]) << 8 | (pDecEngine->pBuffCurr[1])); + pDecEngine->pBuffCurr += 2; + iNumBitsRead = 16; + break; + case 1: + uiValue = pDecEngine->pBuffCurr[0]; + pDecEngine->pBuffCurr += 1; + iNumBitsRead = 8; + break; + default: + uiValue = ((pDecEngine->pBuffCurr[0] << 24) | (pDecEngine->pBuffCurr[1]) << 16 | (pDecEngine->pBuffCurr[2]) << 8 | + (pDecEngine->pBuffCurr[3])); + pDecEngine->pBuffCurr += 4; + iNumBitsRead = 32; + break; + } + return ERR_NONE; +} + +int32_t DecodeBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBinVal) { + int32_t iErrorInfo = ERR_NONE; + uint32_t uiState = pBinCtx->uiState; + uiBinVal = pBinCtx->uiMPS; + uint64_t uiOffset = pDecEngine->uiOffset; + uint64_t uiRange = pDecEngine->uiRange; + + int32_t iRenorm = 1; + uint32_t uiRangeLPS = g_kuiCabacRangeLps[uiState][ (uiRange >> 6) & 0x03]; + uiRange -= uiRangeLPS; + if (uiOffset >= (uiRange << pDecEngine->iBitsLeft)) { //LPS + uiOffset -= (uiRange << pDecEngine->iBitsLeft); + uiBinVal ^= 0x0001; + if (!uiState) + pBinCtx->uiMPS ^= 0x01; + pBinCtx->uiState = g_kuiStateTransTable[uiState][0]; + iRenorm = g_kRenormTable256[uiRangeLPS]; + uiRange = (uiRangeLPS << iRenorm); + } else { //MPS + pBinCtx->uiState = g_kuiStateTransTable[uiState][1]; + if (uiRange >= WELS_CABAC_QUARTER) { + pDecEngine->uiRange = uiRange; + return ERR_NONE; + } else { + uiRange <<= 1; + } + } + //Renorm + pDecEngine->uiRange = uiRange; + pDecEngine->iBitsLeft -= iRenorm; + if (pDecEngine->iBitsLeft > 0) { + pDecEngine->uiOffset = uiOffset; + return ERR_NONE; + } + uint32_t uiVal = 0; + int32_t iNumBitsRead = 0; + iErrorInfo = Read32BitsCabac (pDecEngine, uiVal, iNumBitsRead); + pDecEngine->uiOffset = (uiOffset << iNumBitsRead) | uiVal; + pDecEngine->iBitsLeft += iNumBitsRead; + if (iErrorInfo && pDecEngine->iBitsLeft < 0) { + return iErrorInfo; + } + return ERR_NONE; +} + +int32_t DecodeBypassCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal) { + int32_t iErrorInfo = ERR_NONE; + int32_t iBitsLeft = pDecEngine->iBitsLeft; + uint64_t uiOffset = pDecEngine->uiOffset; + uint64_t uiRangeValue; + + + if (iBitsLeft <= 0) { + uint32_t uiVal = 0; + int32_t iNumBitsRead = 0; + iErrorInfo = Read32BitsCabac (pDecEngine, uiVal, iNumBitsRead); + uiOffset = (uiOffset << iNumBitsRead) | uiVal; + iBitsLeft = iNumBitsRead; + if (iErrorInfo && iBitsLeft == 0) { + return iErrorInfo; + } + } + iBitsLeft--; + uiRangeValue = (pDecEngine->uiRange << iBitsLeft); + if (uiOffset >= uiRangeValue) { + pDecEngine->iBitsLeft = iBitsLeft; + pDecEngine->uiOffset = uiOffset - uiRangeValue; + uiBinVal = 1; + return ERR_NONE; + } + pDecEngine->iBitsLeft = iBitsLeft; + pDecEngine->uiOffset = uiOffset; + uiBinVal = 0; + return ERR_NONE; +} + +int32_t DecodeTerminateCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal) { + int32_t iErrorInfo = ERR_NONE; + uint64_t uiRange = pDecEngine->uiRange - 2; + uint64_t uiOffset = pDecEngine->uiOffset; + + if (uiOffset >= (uiRange << pDecEngine->iBitsLeft)) { + uiBinVal = 1; + } else { + uiBinVal = 0; + // Renorm + if (uiRange < WELS_CABAC_QUARTER) { + int32_t iRenorm = g_kRenormTable256[uiRange]; + pDecEngine->uiRange = (uiRange << iRenorm); + pDecEngine->iBitsLeft -= iRenorm; + if (pDecEngine->iBitsLeft < 0) { + uint32_t uiVal = 0; + int32_t iNumBitsRead = 0; + iErrorInfo = Read32BitsCabac (pDecEngine, uiVal, iNumBitsRead); + pDecEngine->uiOffset = (pDecEngine->uiOffset << iNumBitsRead) | uiVal; + pDecEngine->iBitsLeft += iNumBitsRead; + } + if (iErrorInfo && pDecEngine->iBitsLeft < 0) { + return iErrorInfo; + } + return ERR_NONE; + } else { + pDecEngine->uiRange = uiRange; + return ERR_NONE; + } + } + return ERR_NONE; +} + +int32_t DecodeUnaryBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, int32_t iCtxOffset, + uint32_t& uiSymVal) { + uiSymVal = 0; + WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiSymVal)); + if (uiSymVal == 0) { + return ERR_NONE; + } else { + uint32_t uiCode; + pBinCtx += iCtxOffset; + uiSymVal = 0; + do { + WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiCode)); + ++uiSymVal; + } while (uiCode != 0); + return ERR_NONE; + } +} + +int32_t DecodeExpBypassCabac (PWelsCabacDecEngine pDecEngine, int32_t iCount, uint32_t& uiSymVal) { + uint32_t uiCode; + int32_t iSymTmp = 0; + int32_t iSymTmp2 = 0; + uiSymVal = 0; + do { + WELS_READ_VERIFY (DecodeBypassCabac (pDecEngine, uiCode)); + if (uiCode == 1) { + iSymTmp += (1 << iCount); + ++iCount; + } + } while (uiCode != 0 && iCount != 16); + if (iCount == 16) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_CABAC_UNEXPECTED_VALUE); + } + + while (iCount--) { + WELS_READ_VERIFY (DecodeBypassCabac (pDecEngine, uiCode)); + if (uiCode == 1) { + iSymTmp2 |= (1 << iCount); + } + } + uiSymVal = (uint32_t) (iSymTmp + iSymTmp2); + return ERR_NONE; +} + +uint32_t DecodeUEGLevelCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiCode) { + uiCode = 0; + WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiCode)); + if (uiCode == 0) + return ERR_NONE; + else { + uint32_t uiTmp, uiCount = 1; + uiCode = 0; + do { + WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiTmp)); + ++uiCode; + ++uiCount; + } while (uiTmp != 0 && uiCount != 13); + + if (uiTmp != 0) { + WELS_READ_VERIFY (DecodeExpBypassCabac (pDecEngine, 0, uiTmp)); + uiCode += uiTmp + 1; + } + return ERR_NONE; + } + return ERR_NONE; +} + +int32_t DecodeUEGMvCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t iMaxBin, uint32_t& uiCode) { + WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx + g_kMvdBinPos2Ctx[0], uiCode)); + if (uiCode == 0) + return ERR_NONE; + else { + uint32_t uiTmp, uiCount = 1; + uiCode = 0; + do { + WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx + g_kMvdBinPos2Ctx[uiCount++], uiTmp)); + uiCode++; + } while (uiTmp != 0 && uiCount != 8); + + if (uiTmp != 0) { + WELS_READ_VERIFY (DecodeExpBypassCabac (pDecEngine, 3, uiTmp)); + uiCode += (uiTmp + 1); + } + return ERR_NONE; + } +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp new file mode 100644 index 000000000..f4fffca4c --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp @@ -0,0 +1,1422 @@ +/*! + * \copy + * Copyright (c) 2010-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file deblocking.c + * + * \brief Interfaces introduced in frame deblocking filtering + * + * \date 08/02/2010 + * + ************************************************************************************* + */ + +#include "deblocking.h" +#include "deblocking_common.h" +#include "cpu_core.h" + +namespace WelsDec { + +#define NO_SUPPORTED_FILTER_IDX (-1) +#define LEFT_FLAG_BIT 0 +#define TOP_FLAG_BIT 1 +#define LEFT_FLAG_MASK 0x01 +#define TOP_FLAG_MASK 0x02 + +#define SAME_MB_DIFF_REFIDX +#define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)+12] +#define g_kiBetaTable(x) g_kiBetaTable[(x)+12] +#define g_kiTc0Table(x) g_kiTc0Table[(x)+12] + +#define MB_BS_MV(pRefPic0, pRefPic1, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \ +(\ + ( pRefPic0 != pRefPic1) ||\ + ( WELS_ABS( iMotionVector[iMbXy][iIndex][0] - iMotionVector[iMbBn][iNeighIndex][0] ) >= 4 ) ||\ + ( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\ +) + +#define ON_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iMbBn, iIndex, iNeighIndex) \ +(\ + (( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbBn][iNeighIndex][0] ) >= 4 ) || \ + ( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbBn][iNeighIndex][1] ) >= 4 ))\ +) + +#define IN_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iIndex, iNeighIndex) \ +(\ + (( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbXy][iNeighIndex][0] ) >= 4 ) || \ + ( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbXy][iNeighIndex][1] ) >= 4 )) \ +) + +//On MB Boundary strength +//Apply for B_SLICE +#define ON_MB_BS(ref_p0, ref_q0, ref_p1, ref_q1, mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) \ +(\ + (ref_p0 != ref_p1) ? \ + ((ref_p0 == ref_q0) ? \ + (ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) : \ + (ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) : \ + ((ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) && \ + (ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) \ +) + +#if defined(SAME_MB_DIFF_REFIDX) +#define SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex) \ +(\ + ( pRefPics[iIndex] != pRefPics[iNeighIndex] )||(\ + ( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |\ + ( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\ +) +#else +#define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \ +(\ + !!(( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\ +) +#endif + +#if defined(SAME_MB_DIFF_REFIDX) +#define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \ +(\ + (((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \ + ((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \ + ((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \ + ((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \ + (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \ + (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \ + ((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \ + (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \ +) +#else +#define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \ +(\ + !!(((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \ + ((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \ + ((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \ + ((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \ + (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \ + (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \ + ((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \ + (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \ +) +#endif + +#define BS_EDGE(bsx1, pRefPics, iMotionVector, iIndex, iNeighIndex) \ +( (bsx1|SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1))) + +//Inside MB Boundary strength +//Apply for B_SLICE +#define IN_BS_EDGE(bsx1, refs, mv, iMbXy, iIndex, iNeigborIndex) \ +( (bsx1|IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex))<<((uint8_t)(!!bsx1))) + +#define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \ +{\ + iIndex = (iQp + iAlphaOffset);\ + iAlpha = g_kuiAlphaTable(iIndex);\ + iBeta = g_kiBetaTable((iQp + iBetaOffset));\ +} + +static const uint8_t g_kuiAlphaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, + 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, + 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, + 80, 90, 101, 113, 127, 144, 162, 182, 203, 226, + 255, 255 + , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 +}; + +static const int8_t g_kiBetaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, + 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, + 18, 18 + , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18 +}; + +static const int8_t g_kiTc0Table[52 + 24][4] = { //this table refers Table 8-17 in H.264/AVC standard + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 }, + { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 }, + { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, + { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 }, + { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 }, + { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 }, + { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 } + , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 } + , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 } +}; + +static const uint8_t g_kuiTableBIdx[2][8] = { + { + 0, 4, 8, 12, + 3, 7, 11, 15 + }, + + { + 0, 1, 2, 3, + 12, 13, 14, 15 + }, +}; + +static const uint8_t g_kuiTableB8x8Idx[2][16] = { + { + 0, 1, 4, 5, 8, 9, 12, 13, // 0 1 | 2 3 + 2, 3, 6, 7, 10, 11, 14, 15 // 4 5 | 6 7 + }, // ------------ + // 8 9 | 10 11 + { + // 12 13 | 14 15 + 0, 1, 4, 5, 2, 3, 6, 7, + 8, 9, 12, 13, 10, 11, 14, 15 + }, +}; +//fix Bugzilla 1486223 +#define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \ +{\ + tc[0] = g_kiTc0Table(iIndexA)[pBS[0] & 3] + bChroma;\ + tc[1] = g_kiTc0Table(iIndexA)[pBS[1] & 3] + bChroma;\ + tc[2] = g_kiTc0Table(iIndexA)[pBS[2] & 3] + bChroma;\ + tc[3] = g_kiTc0Table(iIndexA)[pBS[3] & 3] + bChroma;\ +} + +void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) { + uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; + + uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); + uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); + uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); + uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); + + nBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor; + nBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor; + nBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor; + + nBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor; + nBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor; + nBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor; + * (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor; + + nBS[0][1][2] = (pNnzTab[8] | pNnzTab[9]) << iLShiftFactor; + nBS[0][2][2] = (pNnzTab[9] | pNnzTab[10]) << iLShiftFactor; + nBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor; + * (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor; + + nBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor; + nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor; + nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor; + * (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor; +} + +void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) { + int8_t i8x8NnzTab[4]; + for (int32_t i = 0; i < 4; i++) { + int32_t iBlkIdx = i << 2; + i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] | + pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]); + } + + //vertical + nBS[0][2][0] = nBS[0][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[1]) << iLShiftFactor; + nBS[0][2][2] = nBS[0][2][3] = (i8x8NnzTab[2] | i8x8NnzTab[3]) << iLShiftFactor; + //horizontal + nBS[1][2][0] = nBS[1][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[2]) << iLShiftFactor; + nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor; +} + +void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], + int8_t* pNnzTab, + int32_t iMbXy) { + uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; + int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy]; + void* iRefs[MB_BLOCK4x4_NUM]; + int i; + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); + + int8_t i8x8NnzTab[4]; + + /* Look up each reference picture based on indices */ + for (i = 0; i < MB_BLOCK4x4_NUM; i++) { + if (iRefIdx[i] > REF_NOT_IN_LIST) + iRefs[i] = pFilter->pRefPics[LIST_0][iRefIdx[i]]; + else + iRefs[i] = NULL; + } + + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (int32_t i = 0; i < 4; i++) { + int32_t iBlkIdx = i << 2; + i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] | + pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]); + } + //vertical + nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], + g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]); + nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], + g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]); + + //horizontal + nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], + g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]); + nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], + g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]); + } else { + uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); + uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); + uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); + uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; + nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 1, 0); + nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 2, 1); + nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 3, 2); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; + nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 4); + nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 5); + nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 6); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; + nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 8); + nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 9); + nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 10); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; + nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 12); + nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 13); + nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 14); + + // horizontal + * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); + nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 4, 0); + nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 1); + nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 2); + nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 3); + + * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); + nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 8, 4); + nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 5); + nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 6); + nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 7); + + * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); + nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 12, 8); + nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 9); + nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 10); + nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 11); + } +} + +void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, + uint8_t nBS[2][4][4], int8_t* pNnzTab, + int32_t iMbXy) { + uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; + void* iRefs[LIST_A][MB_BLOCK4x4_NUM]; + + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); + int8_t i8x8NnzTab[4]; + int l; + + for (l = 0; l < LIST_A; l++) { + int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[l][iMbXy]; + int i; + /* Look up each reference picture based on indices */ + for (i = 0; i < MB_BLOCK4x4_NUM; i++) { + if (iRefIdx[i] > REF_NOT_IN_LIST) + iRefs[l][i] = pFilter->pRefPics[l][iRefIdx[i]]; + else + iRefs[l][i] = NULL; + } + } + + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (int32_t i = 0; i < 4; i++) { + int32_t iBlkIdx = i << 2; + i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] | + pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]); + } + //vertical + int8_t iIndex = g_kuiMbCountScan4Idx[1 << 2]; + int8_t iNeigborIndex = g_kuiMbCountScan4Idx[0]; + nBS[0][2][0] = nBS[0][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv, iMbXy, + iIndex, iNeigborIndex); + iIndex = g_kuiMbCountScan4Idx[3 << 2]; + iNeigborIndex = g_kuiMbCountScan4Idx[2 << 2]; + nBS[0][2][2] = nBS[0][2][3] = IN_BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy, + iIndex, iNeigborIndex); + + //horizontal + iIndex = g_kuiMbCountScan4Idx[2 << 2]; + iNeigborIndex = g_kuiMbCountScan4Idx[0]; + nBS[1][2][0] = nBS[1][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv, iMbXy, + iIndex, iNeigborIndex); + + iIndex = g_kuiMbCountScan4Idx[3 << 2]; + iNeigborIndex = g_kuiMbCountScan4Idx[1 << 2]; + nBS[1][2][2] = nBS[1][2][3] = IN_BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy, + iIndex, iNeigborIndex); + } else { + uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); + uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); + uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); + uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; + nBS[0][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 1, 0); + nBS[0][2][0] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 2, 1); + nBS[0][3][0] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 3, 2); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; + nBS[0][1][1] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 4); + nBS[0][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 5); + nBS[0][3][1] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 6); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; + nBS[0][1][2] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 8); + nBS[0][2][2] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 9); + nBS[0][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 10); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; + nBS[0][1][3] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 12); + nBS[0][2][3] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 13); + nBS[0][3][3] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 14); + + // horizontal + * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); + nBS[1][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 4, 0); + nBS[1][1][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 1); + nBS[1][1][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 2); + nBS[1][1][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 3); + + * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); + nBS[1][2][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 8, 4); + nBS[1][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 5); + nBS[1][2][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 6); + nBS[1][2][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 7); + + * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); + nBS[1][3][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 12, 8); + nBS[1][3][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 9); + nBS[1][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 10); + nBS[1][3][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 11); + for (int ii = 0; ii < 2; ii++) + for (int jj = 1; jj < 4; jj++) + for (int kk = 0; kk < 4; kk++) + if (nBS[ii][jj][kk] > 1) + nBS[ii][jj][kk] = nBS[ii][jj][kk]; + } +} + + +uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, + int32_t iNeighMb, int32_t iMbXy) { + int32_t i, j; + uint32_t uiBSx4; + uint8_t* pBS = (uint8_t*) (&uiBSx4); + const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0]; + const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4]; + const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0]; + const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8]; + int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pRefIndex[LIST_0] : + pCurDqLayer->pRefIndex[LIST_0]; + + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) { + for (i = 0; i < 2; i++) { + uint8_t uiNzc = 0; + for (j = 0; uiNzc == 0 && j < 4; j++) { + uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]); + } + if (uiNzc) { + pBS[i << 1] = pBS[1 + (i << 1)] = 2; + } else { + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] : + NULL; + pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[LIST_0], iMbXy, iNeighMb, + *pB8x8Idx, *pBn8x8Idx); + } + pB8x8Idx += 4; + pBn8x8Idx += 4; + } + } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (i = 0; i < 2; i++) { + uint8_t uiNzc = 0; + for (j = 0; uiNzc == 0 && j < 4; j++) { + uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)]; + } + for (j = 0; j < 2; j++) { + if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { + pBS[j + (i << 1)] = 2; + } else { + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL; + pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, + (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pB8x8Idx, + *pBnIdx); + } + pBnIdx++; + } + pB8x8Idx += 4; + } + } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) { + for (i = 0; i < 2; i++) { + uint8_t uiNzc = 0; + for (j = 0; uiNzc == 0 && j < 4; j++) { + uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]; + } + for (j = 0; j < 2; j++) { + if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) { + pBS[j + (i << 1)] = 2; + } else { + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] : + NULL; + pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, + (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pBIdx, + *pBn8x8Idx); + } + pBIdx++; + } + pBn8x8Idx += 4; + } + } else { + // only 4x4 transform + for (i = 0; i < 4; i++) { + if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { + pBS[i] = 2; + } else { + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL; + pBS[i] = MB_BS_MV (ref0, ref1, (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), + iMbXy, iNeighMb, *pBIdx, *pBnIdx); + } + pBIdx++; + pBnIdx++; + } + } + + return uiBSx4; +} +uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, + int32_t iNeighMb, int32_t iMbXy) { + int32_t i, j; + uint32_t uiBSx4; + uint8_t* pBS = (uint8_t*) (&uiBSx4); + const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0]; + const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4]; + const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0]; + const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8]; + PPicture ref_p0, ref_p1, ref_q0, ref_q1; + int8_t (*iRefIdx0)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_0]; + int8_t (*iRefIdx1)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_1]; + + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) { + for (i = 0; i < 2; i++) { + uint8_t uiNzc = 0; + for (j = 0; uiNzc == 0 && j < 4; j++) { + uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]); + } + if (uiNzc) { + pBS[i << 1] = pBS[1 + (i << 1)] = 2; + } else { + pBS[i << 1] = pBS[1 + (i << 1)] = 1; + ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL; + ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] : + NULL; + ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL; + ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] : + NULL; + if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) { + int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : + pCurDqLayer->pMv[LIST_0]; + int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] : + pCurDqLayer->pMv[LIST_1]; + pBS[i << 1] = pBS[1 + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx, + *pBn8x8Idx); + } + } + pB8x8Idx += 4; + pBn8x8Idx += 4; + } + } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (i = 0; i < 2; i++) { + uint8_t uiNzc = 0; + for (j = 0; uiNzc == 0 && j < 4; j++) { + uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)]; + } + for (j = 0; j < 2; j++) { + if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { + pBS[j + (i << 1)] = 2; + } else { + pBS[j + (i << 1)] = 1; + ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL; + ref_q0 = iRefIdx0[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] : + NULL; + ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL; + ref_q1 = iRefIdx1[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] : + NULL; + if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) { + int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : + pCurDqLayer->pMv[LIST_0]; + int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] : + pCurDqLayer->pMv[LIST_1]; + pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx, *pBnIdx); + } + } + pBnIdx++; + } + pB8x8Idx += 4; + } + } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) { + for (i = 0; i < 2; i++) { + uint8_t uiNzc = 0; + for (j = 0; uiNzc == 0 && j < 4; j++) { + uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]; + } + for (j = 0; j < 2; j++) { + if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) { + pBS[j + (i << 1)] = 2; + } else { + pBS[j + (i << 1)] = 1; + ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL; + ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] : + NULL; + ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL; + ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] : + NULL; + if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) { + int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : + pCurDqLayer->pMv[LIST_0]; + int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] : + pCurDqLayer->pMv[LIST_1]; + pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx); + } + } + pBIdx++; + } + pBn8x8Idx += 4; + } + } else { + // only 4x4 transform + for (i = 0; i < 4; i++) { + if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { + pBS[i] = 2; + } else { + pBS[i] = 1; + ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL; + ref_q0 = iRefIdx0[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] : + NULL; + ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL; + ref_q1 = iRefIdx1[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] : + NULL; + if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) { + int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : + pCurDqLayer->pMv[LIST_0]; + int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] : + pCurDqLayer->pMv[LIST_1]; + pBS[i] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBnIdx); + } + } + pBIdx++; + pBnIdx++; + } + } + + return uiBSx4; +} +int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc) { + int32_t iMbY = pCurDqLayer->iMbY; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + bool bLeftFlag = false; + bool bTopFlag = false; + + if (2 == iFilterIdc) { + bLeftFlag = (iMbX > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - 1]); + bTopFlag = (iMbY > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - pCurDqLayer->iMbWidth]); + } else { //if ( 0 == iFilterIdc ) + bLeftFlag = (iMbX > 0); + bTopFlag = (iMbY > 0); + } + return (bLeftFlag << LEFT_FLAG_BIT) | (bTopFlag << TOP_FLAG_BIT); +} + +void FilteringEdgeLumaH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + + GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0); + pFilter->pLoopf->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, tc); + } + return; +} + + +void FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + + GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0); + pFilter->pLoopf->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, tc); + } + return; +} + + +void FilteringEdgeLumaIntraH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + + GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + pFilter->pLoopf->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta); + } + return; +} + +void FilteringEdgeLumaIntraV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + + GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + pFilter->pLoopf->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta); + } + return; +} +void FilteringEdgeChromaH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1); + pFilter->pLoopf->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc); + } + } else { + + for (int i = 0; i < 2; i++) { + + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr; + TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1); + pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pPixCbCr, iStride, iAlpha, iBeta, tc); + } + + + + } + + } + return; +} +void FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { + + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1); + pFilter->pLoopf->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc); + } + + + } else { + + for (int i = 0; i < 2; i++) { + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr; + TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1); + pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pPixCbCr, iStride, iAlpha, iBeta, tc); + } + + + } + } + return; +} + +void FilteringEdgeChromaIntraH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + pFilter->pLoopf->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta); + } + } else { + + for (int i = 0; i < 2; i++) { + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr; + pFilter->pLoopf->pfChromaDeblockingEQ4Ver2 (pPixCbCr, iStride, iAlpha, iBeta); + } + + } + } + return; +} + +void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + uint8_t* pBS) { + int32_t iIndexA; + int32_t iAlpha; + int32_t iBeta; + if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { // QP of cb and cr are the same + + + + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + pFilter->pLoopf->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta); + } + } else { + + for (int i = 0; i < 2; i++) { + + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr; + pFilter->pLoopf->pfChromaDeblockingEQ4Hor2 (pPixCbCr, iStride, iAlpha, iBeta); + } + } + + } + return; +} + + +static void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4], + int32_t iBoundryFlag) { + int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + + int32_t iCurLumaQp = pCurDqLayer->pLumaQp[iMbXyIndex]; + //int32_t* iCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex]; + int8_t* pCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex]; + int32_t iLineSize = pFilter->iCsStride[0]; + int32_t iLineSizeUV = pFilter->iCsStride[1]; + + uint8_t* pDestY, * pDestCb, * pDestCr; + pDestY = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4); + pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSizeUV + iMbX) << 3); + pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSizeUV + iMbX) << 3); + + //Vertical margin + if (iBoundryFlag & LEFT_FLAG_MASK) { + int32_t iLeftXyIndex = iMbXyIndex - 1; + pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iLeftXyIndex] + 1) >> 1; + for (int i = 0; i < 2; i++) { + pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iLeftXyIndex][i] + 1) >> 1; + } + if (nBS[0][0][0] == 0x04) { + FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL); + FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL); + } else { + if (* (uint32_t*)nBS[0][0] != 0) { + FilteringEdgeLumaV (pFilter, pDestY, iLineSize, nBS[0][0]); + FilteringEdgeChromaV (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[0][0]); + } + } + } + + pFilter->iLumaQP = iCurLumaQp; + pFilter->iChromaQP[0] = pCurChromaQp[0]; + pFilter->iChromaQP[1] = pCurChromaQp[1]; + + if (* (uint32_t*)nBS[0][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + FilteringEdgeLumaV (pFilter, &pDestY[1 << 2], iLineSize, nBS[0][1]); + } + + if (* (uint32_t*)nBS[0][2] != 0) { + FilteringEdgeLumaV (pFilter, &pDestY[2 << 2], iLineSize, nBS[0][2]); + FilteringEdgeChromaV (pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, nBS[0][2]); + } + + if (* (uint32_t*)nBS[0][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + FilteringEdgeLumaV (pFilter, &pDestY[3 << 2], iLineSize, nBS[0][3]); + } + + if (iBoundryFlag & TOP_FLAG_MASK) { + int32_t iTopXyIndex = iMbXyIndex - pCurDqLayer->iMbWidth; + pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iTopXyIndex] + 1) >> 1; + for (int i = 0; i < 2; i++) { + pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iTopXyIndex][i] + 1) >> 1; + } + + if (nBS[1][0][0] == 0x04) { + FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL); + FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL); + } else { + if (* (uint32_t*)nBS[1][0] != 0) { + FilteringEdgeLumaH (pFilter, pDestY, iLineSize, nBS[1][0]); + FilteringEdgeChromaH (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[1][0]); + } + } + } + + pFilter->iLumaQP = iCurLumaQp; + pFilter->iChromaQP[0] = pCurChromaQp[0]; + pFilter->iChromaQP[1] = pCurChromaQp[1]; + + if (* (uint32_t*)nBS[1][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + FilteringEdgeLumaH (pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, nBS[1][1]); + } + + if (* (uint32_t*)nBS[1][2] != 0) { + FilteringEdgeLumaH (pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, nBS[1][2]); + FilteringEdgeChromaH (pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV], iLineSizeUV, + nBS[1][2]); + } + + if (* (uint32_t*)nBS[1][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + FilteringEdgeLumaH (pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, nBS[1][3]); + } +} + +void FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) { + int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + int32_t iMbWidth = pCurDqLayer->iMbWidth; + int32_t iLineSize = pFilter->iCsStride[0]; + + uint8_t* pDestY; + int32_t iCurQp; + int32_t iIndexA, iAlpha, iBeta; + + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); + + pDestY = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4); + iCurQp = pCurDqLayer->pLumaQp[iMbXyIndex]; + + * (uint32_t*)uiBSx4 = 0x03030303; + + // luma v + if (iBoundryFlag & LEFT_FLAG_MASK) { + pFilter->iLumaQP = (iCurQp + pCurDqLayer->pLumaQp[iMbXyIndex - 1] + 1) >> 1; + FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL); + } + + pFilter->iLumaQP = iCurQp; + GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 0); + + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc); + } + + pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc); + + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc); + } + } + + // luma h + if (iBoundryFlag & TOP_FLAG_MASK) { + pFilter->iLumaQP = (iCurQp + pCurDqLayer->pLumaQp[iMbXyIndex - iMbWidth] + 1) >> 1; + FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL); + } + + pFilter->iLumaQP = iCurQp; + if (iAlpha | iBeta) { + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); + } + + pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); + + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) { + pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); + } + } +} +void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) { + int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + int32_t iMbWidth = pCurDqLayer->iMbWidth; + int32_t iLineSize = pFilter->iCsStride[1]; + + uint8_t* pDestCb; + uint8_t* pDestCr; + //int32_t iCurQp; + int8_t* pCurQp; + int32_t iIndexA, iAlpha, iBeta; + + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); + + pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3); + pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3); + pCurQp = pCurDqLayer->pChromaQp[iMbXyIndex]; + + * (uint32_t*)uiBSx4 = 0x03030303; + + +// chroma v + if (iBoundryFlag & LEFT_FLAG_MASK) { + + for (int i = 0; i < 2; i++) { + pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - 1][i] + 1) >> 1; + + } + FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSize, NULL); + } + + pFilter->iChromaQP[0] = pCurQp[0]; + pFilter->iChromaQP[1] = pCurQp[1]; + if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1); + pFilter->pLoopf->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc); + } + } else { + + for (int i = 0; i < 2; i++) { + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + uint8_t* pDestCbCr = (i == 0) ? &pDestCb[2 << 1] : &pDestCr[2 << 1]; + TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1); + pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pDestCbCr, iLineSize, iAlpha, iBeta, iTc); + } + + } + } + + // chroma h + + if (iBoundryFlag & TOP_FLAG_MASK) { + for (int i = 0; i < 2; i++) { + pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - iMbWidth][i] + 1) >> 1; + } + FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSize, NULL); + } + + pFilter->iChromaQP[0] = pCurQp[0]; + pFilter->iChromaQP[1] = pCurQp[1]; + + if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1); + pFilter->pLoopf->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize, + iAlpha, iBeta, iTc); + } + } else { + for (int i = 0; i < 2; i++) { + + GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1); + uint8_t* pDestCbCr = (i == 0) ? &pDestCb[ (2 << 1) * iLineSize] : &pDestCr[ (2 << 1) * iLineSize]; + pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pDestCbCr, iLineSize, + iAlpha, iBeta, iTc); + } + } + + + } +} + +// merge h&v lookup table operation to save performance +static void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) { + FilteringEdgeLumaHV (pCurDqLayer, pFilter, iBoundryFlag); + FilteringEdgeChromaHV (pCurDqLayer, pFilter, iBoundryFlag); +} + +void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) { + uint8_t nBS[2][4][4] = {{{ 0 }}}; + + int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex; + uint32_t iCurMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbXyIndex] : + pCurDqLayer->pMbType[iMbXyIndex]; + int32_t iMbNb; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + bool bBSlice = pSliceHeader->eSliceType == B_SLICE; + + switch (iCurMbType) { + case MB_TYPE_INTRA4x4: + case MB_TYPE_INTRA8x8: + case MB_TYPE_INTRA16x16: + case MB_TYPE_INTRA_PCM: + DeblockingIntraMb (pCurDqLayer, pFilter, iBoundryFlag); + break; + default: + + if (iBoundryFlag & LEFT_FLAG_MASK) { + iMbNb = iMbXyIndex - 1; + uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb]; + if (bBSlice) { + * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : + DeblockingBSliceBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex); + } else { + * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex); + } + } else { + * (uint32_t*)nBS[0][0] = 0; + } + if (iBoundryFlag & TOP_FLAG_MASK) { + iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth; + uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb]; + if (bBSlice) { + * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : + DeblockingBSliceBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex); + } else { + * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex); + } + } else { + * (uint32_t*)nBS[1][0] = 0; + } + //SKIP MB_16x16 or others + if (IS_SKIP (iCurMbType)) { + * (uint32_t*)nBS[0][1] = * (uint32_t*)nBS[0][2] = * (uint32_t*)nBS[0][3] = + * (uint32_t*)nBS[1][1] = * (uint32_t*)nBS[1][2] = * (uint32_t*)nBS[1][3] = 0; + } else { + if (IS_INTER_16x16 (iCurMbType)) { + if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) { + DeblockingBSInsideMBAvsbase (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1); + } else { + DeblockingBSInsideMBAvsbase8x8 (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1); + } + } else { + + if (bBSlice) { + DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex); + } else { + DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex); + } + } + } + DeblockingInterMb (pCurDqLayer, pFilter, nBS, iBoundryFlag); + break; + } +} + +/*! + * \brief AVC slice deblocking filtering target layer + * + * \param dec Wels avc decoder context + * + * \return NONE + */ +void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt; + int32_t iMbWidth = pCurDqLayer->iMbWidth; + int32_t iTotalMbCount = pSliceHeaderExt->sSliceHeader.pSps->uiTotalMbCount; + + SDeblockingFilter pFilter; + memset (&pFilter, 0, sizeof (pFilter)); + PFmo pFmo = pCtx->pFmo; + int32_t iNextMbXyIndex = 0; + int32_t iTotalNumMb = pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; + int32_t iCountNumMb = 0; + int32_t iBoundryFlag; + int32_t iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc; + + /* Step1: parameters set */ + pFilter.pCsData[0] = pCtx->pDec->pData[0]; + pFilter.pCsData[1] = pCtx->pDec->pData[1]; + pFilter.pCsData[2] = pCtx->pDec->pData[2]; + + pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0]; + pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1]; + + pFilter.eSliceType = (EWelsSliceType) pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType; + + pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset; + pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset; + + pFilter.pLoopf = &pCtx->sDeblockingFunc; + pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0]; + pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1]; + + /* Step2: macroblock deblocking */ + if (0 == iFilterIdc || 2 == iFilterIdc) { + iNextMbXyIndex = pSliceHeaderExt->sSliceHeader.iFirstMbInSlice; + pCurDqLayer->iMbX = iNextMbXyIndex % iMbWidth; + pCurDqLayer->iMbY = iNextMbXyIndex / iMbWidth; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + + do { + iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc); + + pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag); + + ++iCountNumMb; + if (iCountNumMb >= iTotalNumMb) { + break; + } + + if (pSliceHeaderExt->sSliceHeader.pPps->uiNumSliceGroups > 1) { + iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); + } else { + ++iNextMbXyIndex; + } + if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbCount) { // slice group boundary or end of a frame + break; + } + + pCurDqLayer->iMbX = iNextMbXyIndex % iMbWidth; + pCurDqLayer->iMbY = iNextMbXyIndex / iMbWidth; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + } while (1); + } +} + +/*! +* \brief AVC slice init deblocking filtering target layer +* +* \in and out param SDeblockingFilter +* \in and out param iFilterIdc +* +* \return NONE +*/ +void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt; + + memset (&pFilter, 0, sizeof (pFilter)); + + iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc; + + /* Step1: parameters set */ + pFilter.pCsData[0] = pCtx->pDec->pData[0]; + pFilter.pCsData[1] = pCtx->pDec->pData[1]; + pFilter.pCsData[2] = pCtx->pDec->pData[2]; + + pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0]; + pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1]; + + pFilter.eSliceType = (EWelsSliceType)pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType; + + pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset; + pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset; + + pFilter.pLoopf = &pCtx->sDeblockingFunc; + pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0]; + pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1]; +} + +/*! +* \brief AVC MB deblocking filtering target layer +* +* \param DqLayer which has the current location of MB to be deblocked. +* +* \return NONE +*/ +void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc, + PDeblockingFilterMbFunc pDeblockMb) { + /* macroblock deblocking */ + if (0 == iFilterIdc || 2 == iFilterIdc) { + int32_t iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc); + pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag); + } +} +/*! + * \brief deblocking module initialize + * + * \param pf + * cpu + * + * \return NONE + */ + +void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_c; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_c; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_c; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_c; + + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_c; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_c; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_c; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_c; + + pFunc->pfChromaDeblockingLT4Ver2 = DeblockChromaLt4V2_c; + pFunc->pfChromaDeblockingEQ4Ver2 = DeblockChromaEq4V2_c; + pFunc->pfChromaDeblockingLT4Hor2 = DeblockChromaLt4H2_c; + pFunc->pfChromaDeblockingEQ4Hor2 = DeblockChromaEq4H2_c; + +#ifdef X86_ASM + if (iCpu & WELS_CPU_SSSE3) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3; + } +#endif + +#if defined(HAVE_NEON) + if (iCpu & WELS_CPU_NEON) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_neon; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_neon; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_neon; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_neon; + + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon; + } +#endif + +#if defined(HAVE_NEON_AARCH64) + if (iCpu & WELS_CPU_NEON) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon; + + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon; + } +#endif + +#if defined(HAVE_MMI) + if (iCpu & WELS_CPU_MMI) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_mmi; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_mmi; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_mmi; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_mmi; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_mmi; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_mmi; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_mmi; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi; + } +#endif//HAVE_MMI + +#if defined(HAVE_MSA) + if (iCpu & WELS_CPU_MSA) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa; + } +#endif//HAVE_MSA +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decode_mb_aux.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decode_mb_aux.cpp new file mode 100644 index 000000000..8d4ef3c91 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decode_mb_aux.cpp @@ -0,0 +1,190 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#include "decode_mb_aux.h" +#include "wels_common_basis.h" + +namespace WelsDec { + +//NOTE::: p_RS should NOT be modified and it will lead to mismatch with JSVM. +// so should allocate kA array to store the temporary value (idct). +void IdctResAddPred_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) { + int16_t iSrc[16]; + + uint8_t* pDst = pPred; + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + int32_t i; + + for (i = 0; i < 4; i++) { + const int32_t kiY = i << 2; + const int32_t kiT0 = pRs[kiY] + pRs[kiY + 2]; + const int32_t kiT1 = pRs[kiY] - pRs[kiY + 2]; + const int32_t kiT2 = (pRs[kiY + 1] >> 1) - pRs[kiY + 3]; + const int32_t kiT3 = pRs[kiY + 1] + (pRs[kiY + 3] >> 1); + + iSrc[kiY] = kiT0 + kiT3; + iSrc[kiY + 1] = kiT1 + kiT2; + iSrc[kiY + 2] = kiT1 - kiT2; + iSrc[kiY + 3] = kiT0 - kiT3; + } + + for (i = 0; i < 4; i++) { + int32_t kT1 = iSrc[i] + iSrc[i + 8]; + int32_t kT2 = iSrc[i + 4] + (iSrc[i + 12] >> 1); + int32_t kT3 = (32 + kT1 + kT2) >> 6; + int32_t kT4 = (32 + kT1 - kT2) >> 6; + + pDst[i] = WelsClip1 (kT3 + pPred[i]); + pDst[i + kiStride3] = WelsClip1 (kT4 + pPred[i + kiStride3]); + + kT1 = iSrc[i] - iSrc[i + 8]; + kT2 = (iSrc[i + 4] >> 1) - iSrc[i + 12]; + pDst[i + kiStride] = WelsClip1 (((32 + kT1 + kT2) >> 6) + pDst[i + kiStride]); + pDst[i + kiStride2] = WelsClip1 (((32 + kT1 - kT2) >> 6) + pDst[i + kiStride2]); + } +} + +void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) { + // To make the ASM code easy to write, should using one funciton to apply hor and ver together, such as we did on HEVC + // Ugly code, just for easy debug, the final version need optimization + int16_t p[8], b[8]; + int16_t a[4]; + + int16_t iTmp[64]; + int16_t iRes[64]; + + // Horizontal + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + p[j] = pRs[j + (i << 3)]; + } + a[0] = p[0] + p[4]; + a[1] = p[0] - p[4]; + a[2] = p[6] - (p[2] >> 1); + a[3] = p[2] + (p[6] >> 1); + + b[0] = a[0] + a[3]; + b[2] = a[1] - a[2]; + b[4] = a[1] + a[2]; + b[6] = a[0] - a[3]; + + a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1); + a[1] = p[1] + p[7] - p[3] - (p[3] >> 1); + a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1); + a[3] = p[3] + p[5] + p[1] + (p[1] >> 1); + + b[1] = a[0] + (a[3] >> 2); + b[3] = a[1] + (a[2] >> 2); + b[5] = a[2] - (a[1] >> 2); + b[7] = a[3] - (a[0] >> 2); + + iTmp[0 + (i << 3)] = b[0] + b[7]; + iTmp[1 + (i << 3)] = b[2] - b[5]; + iTmp[2 + (i << 3)] = b[4] + b[3]; + iTmp[3 + (i << 3)] = b[6] + b[1]; + iTmp[4 + (i << 3)] = b[6] - b[1]; + iTmp[5 + (i << 3)] = b[4] - b[3]; + iTmp[6 + (i << 3)] = b[2] + b[5]; + iTmp[7 + (i << 3)] = b[0] - b[7]; + } + + //Vertical + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + p[j] = iTmp[i + (j << 3)]; + } + + a[0] = p[0] + p[4]; + a[1] = p[0] - p[4]; + a[2] = p[6] - (p[2] >> 1); + a[3] = p[2] + (p[6] >> 1); + + b[0] = a[0] + a[3]; + b[2] = a[1] - a[2]; + b[4] = a[1] + a[2]; + b[6] = a[0] - a[3]; + + a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1); + a[1] = p[1] + p[7] - p[3] - (p[3] >> 1); + a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1); + a[3] = p[3] + p[5] + p[1] + (p[1] >> 1); + + + b[1] = a[0] + (a[3] >> 2); + b[7] = a[3] - (a[0] >> 2); + b[3] = a[1] + (a[2] >> 2); + b[5] = a[2] - (a[1] >> 2); + + iRes[ (0 << 3) + i] = b[0] + b[7]; + iRes[ (1 << 3) + i] = b[2] - b[5]; + iRes[ (2 << 3) + i] = b[4] + b[3]; + iRes[ (3 << 3) + i] = b[6] + b[1]; + iRes[ (4 << 3) + i] = b[6] - b[1]; + iRes[ (5 << 3) + i] = b[4] - b[3]; + iRes[ (6 << 3) + i] = b[2] + b[5]; + iRes[ (7 << 3) + i] = b[0] - b[7]; + } + + uint8_t* pDst = pPred; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + pDst[i * kiStride + j] = WelsClip1 (((32 + iRes[ (i << 3) + j]) >> 6) + pDst[i * kiStride + j]); + } + } + +} + +void GetI4LumaIChromaAddrTable (int32_t* pBlockOffset, const int32_t kiYStride, const int32_t kiUVStride) { + int32_t* pOffset = pBlockOffset; + int32_t i; + const uint8_t kuiScan0 = g_kuiScan8[0]; + + for (i = 0; i < 16; i++) { + const uint32_t kuiA = g_kuiScan8[i] - kuiScan0; + const uint32_t kuiX = kuiA & 0x07; + const uint32_t kuiY = kuiA >> 3; + + pOffset[i] = (kuiX + kiYStride * kuiY) << 2; + } + + for (i = 0; i < 4; i++) { + const uint32_t kuiA = g_kuiScan8[i] - kuiScan0; + + pOffset[16 + i] = + pOffset[20 + i] = ((kuiA & 0x07) + (kiUVStride/*>>1*/) * (kuiA >> 3)) << 2; + } +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp new file mode 100644 index 000000000..d06a7d77f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp @@ -0,0 +1,3064 @@ +/*! + * \copy + * Copyright (c) 2008-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * Abstract + * current slice decoding + * + * History + * 07/10/2008 Created + * 08/09/2013 Modified + * + *****************************************************************************/ + + +#include "deblocking.h" + +#include "decode_slice.h" + +#include "parse_mb_syn_cavlc.h" +#include "parse_mb_syn_cabac.h" +#include "rec_mb.h" +#include "mv_pred.h" + +#include "cpu_core.h" + +namespace WelsDec { + +extern void FreePicture (PPicture pPic, CMemoryAlign* pMa); + +extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight); + +static bool CheckRefPics (const PWelsDecoderContext& pCtx) { + int32_t listCount = 1; + if (pCtx->eSliceType == B_SLICE) { + ++listCount; + } + for (int32_t list = LIST_0; list < listCount; ++list) { + int32_t shortRefCount = pCtx->sRefPic.uiShortRefCount[list]; + for (int32_t refIdx = 0; refIdx < shortRefCount; ++refIdx) { + if (!pCtx->sRefPic.pShortRefList[list][refIdx]) { + return false; + } + } + int32_t longRefCount = pCtx->sRefPic.uiLongRefCount[list]; + for (int32_t refIdx = 0; refIdx < longRefCount; ++refIdx) { + if (!pCtx->sRefPic.pLongRefList[list][refIdx]) { + return false; + } + } + } + return true; +} + +int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader; + + int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount; + + int32_t iCurLayerWidth = pCurDqLayer->iMbWidth << 4; + int32_t iCurLayerHeight = pCurDqLayer->iMbHeight << 4; + + int32_t iNextMbXyIndex = 0; + PFmo pFmo = pCtx->pFmo; + + int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice; + int32_t iCountNumMb = 0; + PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb; + + if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) { + return ERR_INFO_WIDTH_MISMATCH; + } + + iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; + pCurDqLayer->iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + pCurDqLayer->iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + + if (0 == iNextMbXyIndex) { + pCurDqLayer->pDec->iSpsId = pCtx->pSps->iSpsId; + pCurDqLayer->pDec->iPpsId = pCtx->pPps->iPpsId; + + pCurDqLayer->pDec->uiQualityId = pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId; + } + + do { + if (iCountNumMb >= iTotalNumMb) { + break; + } + + if (!pCtx->pParam->bParseOnly) { //for parse only, actual recon MB unnecessary + if (WelsTargetMbConstruction (pCtx)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d", + pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurSlice->eSliceType); + + return ERR_INFO_MB_RECON_FAIL; + } + } + + ++iCountNumMb; + if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite + pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true; + pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0); + ++pCtx->iTotalNumMbRec; + } + + if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d", + pCtx->iTotalNumMbRec, iTotalMbTargetLayer); + + return ERR_INFO_MB_NUM_EXCEED_FAIL; + } + + if (pSliceHeader->pPps->uiNumSliceGroups > 1) { + iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); + } else { + ++iNextMbXyIndex; + } + if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame + break; + } + pCurDqLayer->iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + pCurDqLayer->iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + } while (1); + + pCtx->pDec->iWidthInPixel = iCurLayerWidth; + pCtx->pDec->iHeightInPixel = iCurLayerHeight; + + if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE) && (pCurSlice->eSliceType != B_SLICE)) + return ERR_NONE; //no error but just ignore the type unsupported + + if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on + return ERR_NONE; + + if (1 == pSliceHeader->uiDisableDeblockingFilterIdc + || pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) { + return ERR_NONE;//NO_SUPPORTED_FILTER_IDX + } else { + WelsDeblockingFilterSlice (pCtx, pDeblockMb); + } + // any other filter_idc not supported here, 7/22/2010 + + return ERR_NONE; +} + +int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, + uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t i, iIndex, iOffset; + + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (i = 0; i < 4; i++) { + iIndex = g_kuiMbCountScan4Idx[i << 2]; + if (pCurDqLayer->pNzc[iMbXy][iIndex] || pCurDqLayer->pNzc[iMbXy][iIndex + 1] || pCurDqLayer->pNzc[iMbXy][iIndex + 4] + || pCurDqLayer->pNzc[iMbXy][iIndex + 5]) { + iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2); + pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 6)); + } + } + } else { + // luma. + const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy]; + pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc + 0); + pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc + 2); + pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc + 8); + pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10); + } + + const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy]; + // Cb. + pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16); + // Cr. + pCtx->pIdctFourResAddPredFunc (pDstV, iStrideC, pScaledTCoeff + 5 * 64, pNzc + 18); + + return ERR_NONE; +} +int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) { + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + uint8_t* pDstY, *pDstCb, *pDstCr; + + int32_t iLumaStride = pCtx->pDec->iLinesize[0]; + int32_t iChromaStride = pCtx->pDec->iLinesize[1]; + + pDstY = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + + if (pCtx->eSliceType == P_SLICE) { + WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx)); + } else { + if (pCtx->pTempDec == NULL) + pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); + uint8_t* pTempDstYCbCr[3]; + uint8_t* pDstYCbCr[3]; + pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + pDstYCbCr[0] = pDstY; + pDstYCbCr[1] = pDstCb; + pDstYCbCr[2] = pDstCr; + WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx)); + } + WelsMbInterSampleConstruction (pCtx, pCurDqLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride); + + if (GetThreadCount (pCtx) <= 1) { + pCtx->sBlockFunc.pWelsSetNonZeroCountFunc ( + pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti! + } + return ERR_NONE; +} + +void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx) { + const int32_t kiQMul = pCtx->bUseScalingList ? pCtx->pDequant_coeff4x4[0][iQp][0] : (g_kuiDequantCoeff[iQp][0] << 4); +#define STRIDE 16 + int32_t i; + int32_t iTemp[16]; //FIXME check if this is a good idea + int16_t* pBlk = pBlock; + static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2, 5 * STRIDE}; + static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE}; + + for (i = 0; i < 4; i++) { + const int32_t kiOffset = kiYOffset[i]; + const int32_t kiX1 = kiOffset + kiXOffset[2]; + const int32_t kiX2 = STRIDE + kiOffset; + const int32_t kiX3 = kiOffset + kiXOffset[3]; + const int32_t kiI4 = i << 2; // 4*i + const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1]; + const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1]; + const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3]; + const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3]; + + iTemp[kiI4] = kiZ0 + kiZ3; + iTemp[1 + kiI4] = kiZ1 + kiZ2; + iTemp[2 + kiI4] = kiZ1 - kiZ2; + iTemp[3 + kiI4] = kiZ0 - kiZ3; + } + + for (i = 0; i < 4; i++) { + const int32_t kiOffset = kiXOffset[i]; + const int32_t kiI4 = 4 + i; + const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4]; + const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4]; + const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4]; + const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4]; + + pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + (1 << 5)) >> 6; //FIXME think about merging this into decode_resdual + pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + (1 << 5)) >> 6; + pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + (1 << 5)) >> 6; + pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + (1 << 5)) >> 6; + } +#undef STRIDE +} + +int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput) { +//seems IPCM should not enter this path + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + WelsFillRecNeededMbInfo (pCtx, bOutput, pCurDqLayer); + + if (IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])) { + RecI16x16Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer); + } else if (IS_INTRA8x8 (pCurDqLayer->pDec->pMbType[iMbXy])) { + RecI8x8Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer); + } else if (IS_INTRA4x4 (pCurDqLayer->pDec->pMbType[iMbXy])) { + RecI4x4Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer); + } + return ERR_NONE; +} + +int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) { + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + uint8_t* pDstY, *pDstCb, *pDstCr; + + int32_t iLumaStride = pCtx->pDec->iLinesize[0]; + int32_t iChromaStride = pCtx->pDec->iLinesize[1]; + + pDstY = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + + if (pCtx->eSliceType == P_SLICE) { + WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx)); + } else { + if (pCtx->pTempDec == NULL) + pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); + uint8_t* pTempDstYCbCr[3]; + uint8_t* pDstYCbCr[3]; + pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + pDstYCbCr[0] = pDstY; + pDstYCbCr[1] = pDstCb; + pDstYCbCr[2] = pDstCr; + WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx)); + } + return ERR_NONE; +} + +int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + if (MB_TYPE_INTRA_PCM == pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]) { + //already decoded and reconstructed when parsing + return ERR_NONE; + } else if (IS_INTRA (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) { + WelsMbIntraPredictionConstruction (pCtx, pCurDqLayer, 1); + } else if (IS_INTER (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) { //InterMB + if (0 == pCurDqLayer->pCbp[pCurDqLayer->iMbXyIndex]) { //uiCbp==0 include SKIP + if (!CheckRefPics (pCtx)) { + return ERR_INFO_MB_RECON_FAIL; + } + return WelsMbInterPrediction (pCtx, pCurDqLayer); + } else { + WelsMbInterConstruction (pCtx, pCurDqLayer); + } + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d", + pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]); + return ERR_INFO_MB_RECON_FAIL; + } + + return ERR_NONE; +} + +void WelsChromaDcIdct (int16_t* pBlock) { + int32_t iStride = 32; + int32_t iXStride = 16; + int32_t iStride1 = iXStride + iStride; + int16_t* pBlk = pBlock; + int32_t iA, iB, iC, iD, iE; + + iA = pBlk[0]; + iB = pBlk[iXStride]; + iC = pBlk[iStride]; + iD = pBlk[iStride1]; + + iE = iA - iB; + iA += iB; + iB = iC - iD; + iC += iD; + + pBlk[0] = (iA + iC); + pBlk[iXStride] = (iE + iB); + pBlk[iStride] = (iA - iC); + pBlk[iStride1] = (iE - iB); +} + +void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) { + if (pNeighAvail->iLeftAvail) { //left + pSampleAvail[ 6] = + pSampleAvail[12] = + pSampleAvail[18] = + pSampleAvail[24] = 1; + } + if (pNeighAvail->iLeftTopAvail) { //top_left + pSampleAvail[0] = 1; + } + if (pNeighAvail->iTopAvail) { //top + pSampleAvail[1] = + pSampleAvail[2] = + pSampleAvail[3] = + pSampleAvail[4] = 1; + } + if (pNeighAvail->iRightTopAvail) { //top_right + pSampleAvail[5] = 1; + } +} + +void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) { + if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) { //left + pSampleAvail[ 6] = + pSampleAvail[12] = + pSampleAvail[18] = + pSampleAvail[24] = 1; + } + if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) { //top_left + pSampleAvail[0] = 1; + } + if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) { //top + pSampleAvail[1] = + pSampleAvail[2] = + pSampleAvail[3] = + pSampleAvail[4] = 1; + } + if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) { //top_right + pSampleAvail[5] = 1; + } +} +void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) { + if (pNeighAvail->iLeftAvail) { + *pSampleAvail = (1 << 2); + } + if (pNeighAvail->iLeftTopAvail) { + *pSampleAvail |= (1 << 1); + } + if (pNeighAvail->iTopAvail) { + *pSampleAvail |= 1; + } +} + +void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) { + if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) { + *pSampleAvail = (1 << 2); + } + if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) { + *pSampleAvail |= (1 << 1); + } + if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) { + *pSampleAvail |= 1; + } +} + +int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, + PBitStringAux pBs, + PDqLayer pCurDqLayer) { + int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0 + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t iFinalMode, i; + + uint8_t uiNeighAvail = 0; + uint32_t uiCode; + int32_t iCode; + pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail); + uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]); + for (i = 0; i < 16; i++) { + int32_t iPrevIntra4x4PredMode = 0; + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) { + WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode)); + iPrevIntra4x4PredMode = iCode; + } else { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); + iPrevIntra4x4PredMode = uiCode; + } + const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i); + + int8_t iBestMode; + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) { + if (iPrevIntra4x4PredMode == -1) + iBestMode = kiPredMode; + else + iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode); + } else { + if (iPrevIntra4x4PredMode) { + iBestMode = kiPredMode; + } else { + WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode)); + iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode); + } + } + + iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false); + if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE); + } + + pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode; + + pIntraPredMode[g_kuiScan8[i]] = iBestMode; + + iSampleAvail[g_kuiCache30ScanIdx[i]] = 1; + } + ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4])); + pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1]; + pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2]; + pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3]; + + if (pCtx->pSps->uiChromaFormatIdc == 0)//no need parse chroma + return ERR_NONE; + + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) { + WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode)); + if (iCode > MAX_PRED_MODE_ID_CHROMA) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + pCurDqLayer->pChromaPredMode[iMbXy] = iCode; + } else { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode + if (uiCode > MAX_PRED_MODE_ID_CHROMA) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + pCurDqLayer->pChromaPredMode[iMbXy] = uiCode; + } + + if (-1 == pCurDqLayer->pChromaPredMode[iMbXy] + || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + return ERR_NONE; +} + +int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, + PBitStringAux pBs, + PDqLayer pCurDqLayer) { + // Similar with Intra_4x4, can put them together when needed + int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0 + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t iFinalMode, i; + + uint8_t uiNeighAvail = 0; + uint32_t uiCode; + int32_t iCode; + pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail); + // Top-Right : Left : Top-Left : Top + uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]); + + pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail; + + for (i = 0; i < 4; i++) { + int32_t iPrevIntra4x4PredMode = 0; + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) { + WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode)); + iPrevIntra4x4PredMode = iCode; + } else { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); + iPrevIntra4x4PredMode = uiCode; + } + const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2); + + int8_t iBestMode; + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) { + if (iPrevIntra4x4PredMode == -1) + iBestMode = kiPredMode; + else + iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode); + } else { + if (iPrevIntra4x4PredMode) { + iBestMode = kiPredMode; + } else { + WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode)); + iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode); + } + } + + iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true); + + if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE); + } + + for (int j = 0; j < 4; j++) { + pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode; + pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode; + iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1; + } + } + ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4])); + pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1]; + pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2]; + pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3]; + + if (pCtx->pSps->uiChromaFormatIdc == 0) + return ERR_NONE; + + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) { + WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode)); + if (iCode > MAX_PRED_MODE_ID_CHROMA) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + pCurDqLayer->pChromaPredMode[iMbXy] = iCode; + } else { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode + if (uiCode > MAX_PRED_MODE_ID_CHROMA) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + pCurDqLayer->pChromaPredMode[iMbXy] = uiCode; + } + + if (-1 == pCurDqLayer->pChromaPredMode[iMbXy] + || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + + return ERR_NONE; +} + +int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs, + PDqLayer pCurDqLayer) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail) + uint32_t uiCode; + int32_t iCode; + pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail); + + if (CheckIntra16x16PredMode (uiNeighAvail, + &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE); + } + if (pCtx->pSps->uiChromaFormatIdc == 0) + return ERR_NONE; + + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) { + WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode)); + if (iCode > MAX_PRED_MODE_ID_CHROMA) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + pCurDqLayer->pChromaPredMode[iMbXy] = iCode; + } else { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode + if (uiCode > MAX_PRED_MODE_ID_CHROMA) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + pCurDqLayer->pChromaPredMode[iMbXy] = uiCode; + } + if (-1 == pCurDqLayer->pChromaPredMode[iMbXy] + || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + + return ERR_NONE; +} + +int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBsAux = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + SWelsNeighAvail sNeighAvail; + int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; + int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t i; + uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0; + + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); + WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType)); + if (uiMbType > 25) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + } else if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 + && uiMbType <= 24))) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + } else if (25 == uiMbType) { //I_PCM + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!"); + WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx)); + pSlice->iLastDeltaQp = 0; + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + if (uiEosFlag) { + RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux); + } + return ERR_NONE; + } else if (0 == uiMbType) { //I4x4 + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + if (pCtx->pPps->bTransform8x8ModeFlag) { + // Transform 8x8 cabac will be added soon + WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); + } + if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); + } else { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); + } + //get uiCbp for I4x4 + WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp)); + pCurDqLayer->pCbp[iMbXy] = uiCbp; + pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0; + uiCbpLuma = uiCbp & 15; + } else { //I16x16; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurDqLayer)); + } + + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + pCurDqLayer->pCbfDc[iMbXy] = 0; + + if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)]; + } + } + + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); + int32_t iQpDelta, iId8x8, iId4x4; + WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta)); + if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); + } + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)]; + } + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + //step1: Luma DC + WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan, + I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)); + //step2: Luma AC + if (uiCbpLuma) { + for (i = 0; i < 16; i++) { + WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i, + iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, + pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurDqLayer->pLumaQp[iMbXy], pCtx)); + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { //pNonZeroCount = 0 + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + } + } else { //non-MB_TYPE_INTRA16x16 + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + // Transform 8x8 support for CABAC + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + if (uiCbpLuma & (1 << iId8x8)) { + WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2), + iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx)); + } else { + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0); + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + if (uiCbpLuma & (1 << iId8x8)) { + int32_t iIdx = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + //Luma (DC and AC decoding together) + WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); + iIdx++; + } + } else { + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0); + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } + int32_t iMbResProperty; + //chroma + //step1: DC + if (1 == uiCbpChroma || 2 == uiCbpChroma) { + //Cb Cr + for (i = 0; i < 2; i++) { + iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; + WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan, + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); + } + } + + //step2: AC + if (2 == uiCbpChroma) { + for (i = 0; i < 2; i++) { //Cb Cr + iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; + int32_t iIdx = 16 + (i << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, + iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); + iIdx++; + } + } + ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); + } else { + ST16 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], 0); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], 0); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], 0); + } + } else { + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + } + + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + if (uiEosFlag) { + RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux); + } + return ERR_NONE; +} + +int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { + WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag)); + return ERR_NONE; +} + +int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBsAux = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + + int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; + int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t iMbResProperty; + int32_t i; + uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0; + + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + + WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType)); + // uiMbType = 4 is not allowded. + if (uiMbType < 4) { //Inter mode + int16_t pMotionVector[LIST_A][30][MV_A]; + int16_t pMvdCache[LIST_A][30][MV_A]; + int8_t pRefIndex[LIST_A][30]; + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType; + WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer); + WELS_READ_VERIFY (ParseInterPMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex)); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + } else { //Intra mode + uiMbType -= 5; + if (uiMbType > 25) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24))) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + + if (25 == uiMbType) { //I_PCM + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!"); + WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx)); + pSlice->iLastDeltaQp = 0; + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + if (uiEosFlag) { + RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux); + } + return ERR_NONE; + } else { //normal Intra mode + if (0 == uiMbType) { //Intra4x4 + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + if (pCtx->pPps->bTransform8x8ModeFlag) { + WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); + } + if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); + } else { + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); + } + } else { //Intra16x16 + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer)); + } + } + } + + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { + WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp)); + + pCurDqLayer->pCbp[iMbXy] = uiCbp; + pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + } + + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { + // Need modification when B picutre add in + bool bNeedParseTransformSize8x8Flag = + (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0) + && (pCtx->pPps->bTransform8x8ModeFlag)); + + if (bNeedParseTransformSize8x8Flag) { + WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, + pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag + } + } + + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); + + int32_t iQpDelta, iId8x8, iId4x4; + + WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta)); + if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); + } + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + //step1: Luma DC + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan, + I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)); + //step2: Luma AC + if (uiCbpLuma) { + for (i = 0; i < 16; i++) { + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + } + } else { //non-MB_TYPE_INTRA16x16 + if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + // Transform 8x8 support for CABAC + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + if (uiCbpLuma & (1 << iId8x8)) { + WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2), + iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, + IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx)); + } else { + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0); + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + if (uiCbpLuma & (1 << iId8x8)) { + int32_t iIdx = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + //Luma (DC and AC decoding together) + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), + pCurDqLayer->pLumaQp[iMbXy], + pCtx)); + iIdx++; + } + } else { + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } + + //chroma + //step1: DC + if (1 == uiCbpChroma || 2 == uiCbpChroma) { + for (i = 0; i < 2; i++) { + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; + else + iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; + + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan, + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); + } + } + //step2: AC + if (2 == uiCbpChroma) { + for (i = 0; i < 2; i++) { + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; + else + iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; + int32_t index = 16 + (i << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index, + iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); + index++; + } + } + ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); + } else { + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + } + } else { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + if (uiEosFlag) { + RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux); + } + + return ERR_NONE; +} + +int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBsAux = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + + int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; + int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t iMbResProperty; + int32_t i; + uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0; + + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + + WELS_READ_VERIFY (ParseMBTypeBSliceCabac (pCtx, pNeighAvail, uiMbType)); + + if (uiMbType < 23) { //Inter B mode + int16_t pMotionVector[LIST_A][30][MV_A]; + int16_t pMvdCache[LIST_A][30][MV_A]; + int8_t pRefIndex[LIST_A][30]; + int8_t pDirect[30]; + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType; + WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer); + WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurDqLayer); + WELS_READ_VERIFY (ParseInterBMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, + pDirect)); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + } else { //Intra mode + uiMbType -= 23; + if (uiMbType > 25) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24))) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + + if (25 == uiMbType) { //I_PCM + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!"); + WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx)); + pSlice->iLastDeltaQp = 0; + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + if (uiEosFlag) { + RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux); + } + return ERR_NONE; + } else { //normal Intra mode + if (0 == uiMbType) { //Intra4x4 + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + if (pCtx->pPps->bTransform8x8ModeFlag) { + WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); + } + if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); + } else { + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); + } + } else { //Intra16x16 + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer)); + } + } + } + + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { + WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp)); + + pCurDqLayer->pCbp[iMbXy] = uiCbp; + pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + } + + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { + // Need modification when B picutre add in + bool bNeedParseTransformSize8x8Flag = + (((IS_INTER_16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_DIRECT (pCurDqLayer->pDec->pMbType[iMbXy]) + || IS_INTER_16x8 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_INTER_8x16 (pCurDqLayer->pDec->pMbType[iMbXy])) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0) + && (pCtx->pPps->bTransform8x8ModeFlag)); + + if (bNeedParseTransformSize8x8Flag) { + WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, + pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag + } + } + + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); + + int32_t iQpDelta, iId8x8, iId4x4; + + WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta)); + if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); + } + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + //step1: Luma DC + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan, + I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)); + //step2: Luma AC + if (uiCbpLuma) { + for (i = 0; i < 16; i++) { + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + } + } else { //non-MB_TYPE_INTRA16x16 + if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + // Transform 8x8 support for CABAC + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + if (uiCbpLuma & (1 << iId8x8)) { + WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2), + iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, + IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx)); + } else { + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0); + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + if (uiCbpLuma & (1 << iId8x8)) { + int32_t iIdx = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + //Luma (DC and AC decoding together) + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), + pCurDqLayer->pLumaQp[iMbXy], + pCtx)); + iIdx++; + } + } else { + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } + + //chroma + //step1: DC + if (1 == uiCbpChroma || 2 == uiCbpChroma) { + for (i = 0; i < 2; i++) { + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; + else + iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; + + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan, + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); + } + } + //step2: AC + if (2 == uiCbpChroma) { + for (i = 0; i < 2; i++) { + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; + else + iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; + int32_t index = 16 + (i << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index, + iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); + index++; + } + } + ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); + } else { + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + } + } else { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + if (uiEosFlag) { + RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux); + } + + return ERR_NONE; +} + + +int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0]; + uint32_t uiCode; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t i; + SWelsNeighAvail uiNeighAvail; + pCurDqLayer->pCbp[iMbXy] = 0; + pCurDqLayer->pCbfDc[iMbXy] = 0; + pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC; + + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + + GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer); + WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode)); + + if (uiCode) { + int16_t pMv[2] = {0}; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP; + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16); + bool bIsPending = GetThreadCount (pCtx) > 1; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete + || bIsPending)); + //predict mv + PredPSkipMvFromNeighbor (pCurDqLayer, pMv); + for (i = 0; i < 16; i++) { + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)pMv); + ST32 (pCurDqLayer->pMvd[0][iMbXy][i], 0); + } + + //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) { + // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); + //} + + //reset rS + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + + //for neighboring CABAC usage + pSlice->iLastDeltaQp = 0; + + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + + return ERR_NONE; + } + + WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag)); + return ERR_NONE; +} + + +int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0]; + PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1]; + uint32_t uiCode; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t i; + SWelsNeighAvail uiNeighAvail; + pCurDqLayer->pCbp[iMbXy] = 0; + pCurDqLayer->pCbfDc[iMbXy] = 0; + pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC; + + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + + GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer); + WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode)); + + memset (pCurDqLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16); + + bool bIsPending = GetThreadCount (pCtx) > 1; + + if (uiCode) { + int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } }; + int8_t ref[LIST_A] = { 0 }; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT; + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16); + memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16); + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete + || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending)); + + if (pCtx->bMbRefConcealed) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + + SubMbType subMbType; + if (pSliceHeader->iDirectSpatialMvPredFlag) { + + //predict direct spatial mv + int32_t ret = PredMvBDirectSpatial (pCtx, pMv, ref, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } else { + //temporal direct mode + int32_t ret = PredBDirectTemporal (pCtx, pMv, ref, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } + + + //reset rS + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + + //for neighboring CABAC usage + pSlice->iLastDeltaQp = 0; + + WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); + + return ERR_NONE; + } + + WELS_READ_VERIFY (WelsDecodeMbCabacBSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag)); + return ERR_NONE; +} + +// Calculate deqaunt coeff scaling list value +int32_t WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) { + if (pCtx->pSps->bSeqScalingMatrixPresentFlag || pCtx->pPps->bPicScalingMatrixPresentFlag) { + pCtx->bUseScalingList = true; + + if (!pCtx->bDequantCoeff4x4Init || (pCtx->iDequantCoeffPpsid != pCtx->pPps->iPpsId)) { + int i, q, x, y; + //Init dequant coeff value for different QP + for (i = 0; i < 6; i++) { + pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i]; + pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i]; + for (q = 0; q < 51; q++) { + for (x = 0; x < 16; x++) { + pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] * + g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList4x4[i][x] * g_kuiDequantCoeff[q][x & 0x07]; + } + for (y = 0; y < 64; y++) { + pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] * + g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8]; + } + } + } + pCtx->bDequantCoeff4x4Init = true; + pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId; + } + } else + pCtx->bUseScalingList = false; + return ERR_NONE; +} + +int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PFmo pFmo = pCtx->pFmo; + int32_t iRet; + int32_t iNextMbXyIndex, iSliceIdc; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt; + PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader; + int32_t iMbX, iMbY; + const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice + uint32_t uiEosFlag = 0; + PWelsDecMbFunc pDecMbFunc; + + pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding. + + if (pCtx->pPps->bEntropyCodingModeFlag) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag || + pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag || + pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!"); + pCtx->iErrorCode |= dsBitstreamError; + return dsBitstreamError; + } + if (P_SLICE == pSliceHeader->eSliceType) + pDecMbFunc = WelsDecodeMbCabacPSlice; + else if (B_SLICE == pSliceHeader->eSliceType) + pDecMbFunc = WelsDecodeMbCabacBSlice; + else //I_SLICE. B_SLICE is being supported + pDecMbFunc = WelsDecodeMbCabacISlice; + } else { + if (P_SLICE == pSliceHeader->eSliceType) { + pDecMbFunc = WelsDecodeMbCavlcPSlice; + } else if (B_SLICE == pSliceHeader->eSliceType) { + pDecMbFunc = WelsDecodeMbCavlcBSlice; + } else { //I_SLICE + pDecMbFunc = WelsDecodeMbCavlcISlice; + } + } + + if (pSliceHeader->pPps->bConstainedIntraPredFlag) { + pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN; + pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleConstrain1; + pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1; + } else { + pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN; + pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleNormal; + pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal; + } + + pCtx->eSliceType = pSliceHeader->eSliceType; + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) { + int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp; + int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc; + WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp); + //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp); + pSlice->iLastDeltaQp = 0; + WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux)); + } + //try to calculate the dequant_coeff + WelsCalcDeqCoeffScalingList (pCtx); + + iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009 + pSlice->iMbSkipRun = -1; + iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId; + + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + + do { + if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame + break; + } + + pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc; + pCtx->bMbRefConcealed = false; + iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag); + pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed; + if (iRet != ERR_NONE) { + return iRet; + } + + ++pSlice->iTotalMbInCurSlice; + if (uiEosFlag) { //end of slice + break; + } + if (pSliceHeader->pPps->uiNumSliceGroups > 1) { + iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); + } else { + ++iNextMbXyIndex; + } + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + } while (1); + + return ERR_NONE; +} + +int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx) { + PNalUnit pNalCur = pCtx->pNalCur; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PFmo pFmo = pCtx->pFmo; + int32_t iRet; + int32_t iNextMbXyIndex, iSliceIdc; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt; + PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader; + int32_t iMbX, iMbY; + const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice + int32_t iTotalMbTargetLayer = kiCountNumMb; + uint32_t uiEosFlag = 0; + PWelsDecMbFunc pDecMbFunc; + + pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding. + + if (pCtx->pPps->bEntropyCodingModeFlag) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag || + pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag || + pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!"); + pCtx->iErrorCode |= dsBitstreamError; + return dsBitstreamError; + } + if (P_SLICE == pSliceHeader->eSliceType) + pDecMbFunc = WelsDecodeMbCabacPSlice; + else if (B_SLICE == pSliceHeader->eSliceType) + pDecMbFunc = WelsDecodeMbCabacBSlice; + else //I_SLICE. B_SLICE is being supported + pDecMbFunc = WelsDecodeMbCabacISlice; + } else { + if (P_SLICE == pSliceHeader->eSliceType) { + pDecMbFunc = WelsDecodeMbCavlcPSlice; + } else if (B_SLICE == pSliceHeader->eSliceType) { + pDecMbFunc = WelsDecodeMbCavlcBSlice; + } else { //I_SLICE + pDecMbFunc = WelsDecodeMbCavlcISlice; + } + } + + if (pSliceHeader->pPps->bConstainedIntraPredFlag) { + pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN; + pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleConstrain1; + pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1; + } else { + pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN; + pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleNormal; + pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal; + } + + pCtx->eSliceType = pSliceHeader->eSliceType; + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) { + int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp; + int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc; + WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp); + //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp); + pSlice->iLastDeltaQp = 0; + WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux)); + } + //try to calculate the dequant_coeff + WelsCalcDeqCoeffScalingList (pCtx); + + iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009 + pSlice->iMbSkipRun = -1; + iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId; + + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + + PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb; + + SDeblockingFilter pFilter; + int32_t iFilterIdc = 1; + if (pSliceHeader->uiDisableDeblockingFilterIdc != 1) { + WelsDeblockingInitFilter (pCtx, pFilter, iFilterIdc); + } + + do { + if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame + break; + } + + pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc; + pCtx->bMbRefConcealed = false; + iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag); + pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed; + if (iRet != ERR_NONE) { + return iRet; + } + if (WelsTargetMbConstruction (pCtx)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d", + pCurDqLayer->iMbX, pCurDqLayer->iMbY, pSlice->eSliceType); + + return ERR_INFO_MB_RECON_FAIL; + } + memcpy (pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex], pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex], 24); + if (pCtx->eSliceType != I_SLICE) { + pCtx->sBlockFunc.pWelsSetNonZeroCountFunc ( + pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti! + } + WelsDeblockingFilterMB (pCurDqLayer, pFilter, iFilterIdc, pDeblockMb); + if (pCtx->uiNalRefIdc > 0) { + if (pCurDqLayer->iMbX == 0 || pCurDqLayer->iMbX == pCurDqLayer->iMbWidth - 1 || pCurDqLayer->iMbY == 0 + || pCurDqLayer->iMbY == pCurDqLayer->iMbHeight - 1) { + PadMBLuma_c (pCurDqLayer->pDec->pData[0], pCurDqLayer->pDec->iLinesize[0], pCurDqLayer->pDec->iWidthInPixel, + pCurDqLayer->pDec->iHeightInPixel, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, pCurDqLayer->iMbHeight); + PadMBChroma_c (pCurDqLayer->pDec->pData[1], pCurDqLayer->pDec->iLinesize[1], pCurDqLayer->pDec->iWidthInPixel / 2, + pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, + pCurDqLayer->iMbHeight); + PadMBChroma_c (pCurDqLayer->pDec->pData[2], pCurDqLayer->pDec->iLinesize[2], pCurDqLayer->pDec->iWidthInPixel / 2, + pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, + pCurDqLayer->iMbHeight); + } + } + if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite + pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true; + pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0); + ++pCtx->iTotalNumMbRec; + } + + if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d", + pCtx->iTotalNumMbRec, iTotalMbTargetLayer); + + return ERR_INFO_MB_NUM_EXCEED_FAIL; + } + + ++pSlice->iTotalMbInCurSlice; + if (uiEosFlag) { //end of slice + SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]); + break; + } + if (pSliceHeader->pPps->uiNumSliceGroups > 1) { + iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); + } else { + ++iNextMbXyIndex; + } + int32_t iLastMby = iMbY; + int32_t iLastMbx = iMbX; + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + if (GetThreadCount (pCtx) > 1) { + if ((iMbY > iLastMby) && (iLastMbx == pCurDqLayer->iMbWidth - 1)) { + SET_EVENT (&pCtx->pDec->pReadyEvent[iLastMby]); + } + } + } while (1); + if (GetThreadCount (pCtx) > 1) { + SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]); + } + return ERR_NONE; +} + +int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { + SVlcTable* pVlcTable = pCtx->pVlcTable; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + + SWelsNeighAvail sNeighAvail; + int32_t iMbResProperty; + + int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; + int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; + + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int32_t i; + int32_t iRet = ERR_NONE; + uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; + uint32_t uiCode; + int32_t iCode; + + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType + uiMbType = uiCode; + if (uiMbType > 25) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24))) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + + if (25 == uiMbType) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!"); + int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1]; + + int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; + int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; + + uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL; + uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC; + uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC; + + uint8_t* pTmpBsBuf; + + + int32_t i; + int32_t iCopySizeY = (sizeof (uint8_t) << 4); + int32_t iCopySizeUV = (sizeof (uint8_t) << 3); + + int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + + //step 1: locating bit-stream pointer [must align into integer byte] + pBs->pCurBuf -= iIndex; + + //step 2: copy pixel from bit-stream into fdec [reconstruction] + pTmpBsBuf = pBs->pCurBuf; + if (!pCtx->pParam->bParseOnly) { + for (i = 0; i < 16; i++) { //luma + memcpy (pDecY, pTmpBsBuf, iCopySizeY); + pDecY += iDecStrideL; + pTmpBsBuf += 16; + } + for (i = 0; i < 8; i++) { //cb + memcpy (pDecU, pTmpBsBuf, iCopySizeUV); + pDecU += iDecStrideC; + pTmpBsBuf += 8; + } + for (i = 0; i < 8; i++) { //cr + memcpy (pDecV, pTmpBsBuf, iCopySizeUV); + pDecV += iDecStrideC; + pTmpBsBuf += 8; + } + } + + pBs->pCurBuf += 384; + + //step 3: update QP and pNonZeroCount + pCurDqLayer->pLumaQp[iMbXy] = 0; + memset (pCurDqLayer->pChromaQp[iMbXy], 0, sizeof (pCurDqLayer->pChromaQp[iMbXy])); + memset (pNzc, 16, sizeof (pCurDqLayer->pNzc[iMbXy])); //Rec. 9.2.1 for PCM, nzc=16 + WELS_READ_VERIFY (InitReadBits (pBs, 0)); + return ERR_NONE; + } else if (0 == uiMbType) { //reference to JM + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + if (pCtx->pPps->bTransform8x8ModeFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + } + } + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } else { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } + + //uiCbp + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern + uiCbp = uiCode; + //G.9.1 Alternative parsing process for coded pBlock pattern + if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + + if (pCtx->pSps->uiChromaFormatIdc) + uiCbp = g_kuiIntra4x4CbpTable[uiCbp]; + else + uiCbp = g_kuiIntra4x4CbpTable400[uiCbp]; + pCurDqLayer->pCbp[iMbXy] = uiCbp; + uiCbpC = uiCbp >> 4; + uiCbpL = uiCbp & 15; + } else { //I_PCM exclude, we can ignore it + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)); + } + + ST32A4 (&pNzc[0], 0); + ST32A4 (&pNzc[4], 0); + ST32A4 (&pNzc[8], 0); + ST32A4 (&pNzc[12], 0); + ST32A4 (&pNzc[16], 0); + ST32A4 (&pNzc[20], 0); + + if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + + } + + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); + int32_t iQpDelta, iId8x8, iId4x4; + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta + iQpDelta = iCode; + + if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); + } + + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, + 51)]; + } + + + BsStartCavlc (pBs); + + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + //step1: Luma DC + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, + pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + //step2: Luma AC + if (uiCbpL) { + for (i = 0; i < 16; i++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, + g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } else { //non-MB_TYPE_INTRA16x16 + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet; + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + //Luma (DC and AC decoding together) + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } + + //chroma + //step1: DC + if (1 == uiCbpC || 2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + } + + //step2: AC + if (2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; + int32_t iIndex = 16 + (i << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } + ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1])); + ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2])); + ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4])); + ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5])); + } + BsEndCavlc (pBs); + } + + return ERR_NONE; +} + +int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt; + int32_t iBaseModeFlag; + int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 + uint32_t uiCode; + intX_t iUsedBits; + if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag + iBaseModeFlag = uiCode; + } else { + iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag; + } + if (!iBaseModeFlag) { + iRet = WelsActualDecodeMbCavlcISlice (pCtx); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.", + iBaseModeFlag); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP); + } + if (iRet) { //occur error when parsing, MUST STOP decoding + return iRet; + } + + // check whether there is left bits to read next time in case multiple slices + iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); + // sub 1, for stop bit + if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary + uiEosFlag = 1; + } + if (iUsedBits > (pBs->iBits - + 1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash. + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.", + (int64_t) iUsedBits, pBs->iBits); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE); + } + return ERR_NONE; +} + +int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { + SVlcTable* pVlcTable = pCtx->pVlcTable; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + + int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; + int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; + + SWelsNeighAvail sNeighAvail; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int32_t i; + int32_t iRet = ERR_NONE; + uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; + uint32_t uiCode; + int32_t iCode; + int32_t iMbResProperty; + + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23 + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType + uiMbType = uiCode; + if (uiMbType < 5) { //inter MB type + int16_t iMotionVector[LIST_A][30][MV_A]; + int8_t iRefIndex[LIST_A][30]; + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType; + WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer); + + if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) { + return iRet;//abnormal + } + + if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag + pCurDqLayer->pResidualPredFlag[iMbXy] = uiCode; + } else { + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + } + + if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) { + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } else { //intra MB type + uiMbType -= 5; + if (uiMbType > 25) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24))) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + + if (25 == uiMbType) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!"); + int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1]; + + int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; + int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; + + uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL; + uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC; + uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC; + + uint8_t* pTmpBsBuf; + + int32_t i; + int32_t iCopySizeY = (sizeof (uint8_t) << 4); + int32_t iCopySizeUV = (sizeof (uint8_t) << 3); + + int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + + //step 1: locating bit-stream pointer [must align into integer byte] + pBs->pCurBuf -= iIndex; + + //step 2: copy pixel from bit-stream into fdec [reconstruction] + pTmpBsBuf = pBs->pCurBuf; + if (!pCtx->pParam->bParseOnly) { + for (i = 0; i < 16; i++) { //luma + memcpy (pDecY, pTmpBsBuf, iCopySizeY); + pDecY += iDecStrideL; + pTmpBsBuf += 16; + } + + for (i = 0; i < 8; i++) { //cb + memcpy (pDecU, pTmpBsBuf, iCopySizeUV); + pDecU += iDecStrideC; + pTmpBsBuf += 8; + } + for (i = 0; i < 8; i++) { //cr + memcpy (pDecV, pTmpBsBuf, iCopySizeUV); + pDecV += iDecStrideC; + pTmpBsBuf += 8; + } + } + + pBs->pCurBuf += 384; + + //step 3: update QP and pNonZeroCount + pCurDqLayer->pLumaQp[iMbXy] = 0; + pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0; + //Rec. 9.2.1 for PCM, nzc=16 + ST32A4 (&pNzc[0], 0x10101010); + ST32A4 (&pNzc[4], 0x10101010); + ST32A4 (&pNzc[8], 0x10101010); + ST32A4 (&pNzc[12], 0x10101010); + ST32A4 (&pNzc[16], 0x10101010); + ST32A4 (&pNzc[20], 0x10101010); + WELS_READ_VERIFY (InitReadBits (pBs, 0)); + return ERR_NONE; + } else { + if (0 == uiMbType) { + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + if (pCtx->pPps->bTransform8x8ModeFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + } + } + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } else { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } + } else { //I_PCM exclude, we can ignore it + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) { + return iRet; + } + } + } + } + + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern + uiCbp = uiCode; + { + if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) { + + uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp]; + } else //inter + uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp]; + } + + pCurDqLayer->pCbp[iMbXy] = uiCbp; + uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + + // Need modification when B picutre add in + bool bNeedParseTransformSize8x8Flag = + (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + && (uiCbpL > 0) + && (pCtx->pPps->bTransform8x8ModeFlag)); + + if (bNeedParseTransformSize8x8Flag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + } + } + + ST32A4 (&pNzc[0], 0); + ST32A4 (&pNzc[4], 0); + ST32A4 (&pNzc[8], 0); + ST32A4 (&pNzc[12], 0); + ST32A4 (&pNzc[16], 0); + ST32A4 (&pNzc[20], 0); + if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) + && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + int32_t iQpDelta, iId8x8, iId4x4; + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t)); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta + iQpDelta = iCode; + + if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); + } + + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, + 51)]; + } + + BsStartCavlc (pBs); + + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + //step1: Luma DC + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, + pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + //step2: Luma AC + if (uiCbpL) { + for (i = 0; i < 16; i++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, + g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } else { //non-MB_TYPE_INTRA16x16 + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet; + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { // Normal T4x4 + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + //Luma (DC and AC decoding together) + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } + + + //chroma + //step1: DC + if (1 == uiCbpC || 2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; + else + iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; + + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + } else { + } + //step2: AC + if (2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; + else + iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; + + int32_t iIndex = 16 + (i << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } + ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1])); + ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2])); + ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4])); + ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5])); + } + BsEndCavlc (pBs); + } + + return ERR_NONE; +} + +int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0]; + intX_t iUsedBits; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int32_t iBaseModeFlag, i; + int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 + uint32_t uiCode; + + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + + if (-1 == pSlice->iMbSkipRun) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run + pSlice->iMbSkipRun = uiCode; + if (-1 == pSlice->iMbSkipRun) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN); + } + } + if (pSlice->iMbSkipRun--) { + int16_t iMv[2]; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP; + ST32A4 (&pNzc[0], 0); + ST32A4 (&pNzc[4], 0); + ST32A4 (&pNzc[8], 0); + ST32A4 (&pNzc[12], 0); + ST32A4 (&pNzc[16], 0); + ST32A4 (&pNzc[20], 0); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16); + bool bIsPending = GetThreadCount (pCtx) > 1; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete + || bIsPending)); + //predict iMv + PredPSkipMvFromNeighbor (pCurDqLayer, iMv); + for (i = 0; i < 16; i++) { + ST32A2 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)iMv); + } + + //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) { + // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); + //} + + //reset rS + if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag || + (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + pCurDqLayer->pCbp[iMbXy] = 0; + } else { + if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag + iBaseModeFlag = uiCode; + } else { + iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag; + } + if (!iBaseModeFlag) { + iRet = WelsActualDecodeMbCavlcPSlice (pCtx); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.", + iBaseModeFlag); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP); + } + if (iRet) { //occur error when parsing, MUST STOP decoding + return iRet; + } + } + // check whether there is left bits to read next time in case multiple slices + iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); + // sub 1, for stop bit + if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary + uiEosFlag = 1; + } + if (iUsedBits > (pBs->iBits - + 1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash. + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.", + (int64_t) iUsedBits, pBs->iBits); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE); + } + return ERR_NONE; +} + +int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0]; + PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1]; + intX_t iUsedBits; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int32_t iBaseModeFlag, i; + int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 + uint32_t uiCode; + + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + + if (-1 == pSlice->iMbSkipRun) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run + pSlice->iMbSkipRun = uiCode; + if (-1 == pSlice->iMbSkipRun) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN); + } + } + if (pSlice->iMbSkipRun--) { + int16_t iMv[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + int8_t ref[LIST_A] = { 0 }; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT; + ST32A4 (&pNzc[0], 0); + ST32A4 (&pNzc[4], 0); + ST32A4 (&pNzc[8], 0); + ST32A4 (&pNzc[12], 0); + ST32A4 (&pNzc[16], 0); + ST32A4 (&pNzc[20], 0); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16); + memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16); + bool bIsPending = GetThreadCount (pCtx) > 1; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete + || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending)); + + /*if (pCtx->bMbRefConcealed) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + }*/ + //predict iMv + SubMbType subMbType; + if (pSliceHeader->iDirectSpatialMvPredFlag) { + + //predict direct spatial mv + int32_t ret = PredMvBDirectSpatial (pCtx, iMv, ref, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } else { + //temporal direct mode + int32_t ret = PredBDirectTemporal (pCtx, iMv, ref, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } + + //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) { + // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); + //} + + //reset rS + if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag || + (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + pCurDqLayer->pCbp[iMbXy] = 0; + } else { + if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag + iBaseModeFlag = uiCode; + } else { + iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag; + } + if (!iBaseModeFlag) { + iRet = WelsActualDecodeMbCavlcBSlice (pCtx); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.", + iBaseModeFlag); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP); + } + if (iRet) { //occur error when parsing, MUST STOP decoding + return iRet; + } + } + // check whether there is left bits to read next time in case multiple slices + iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); + // sub 1, for stop bit + if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary + uiEosFlag = 1; + } + if (iUsedBits > (pBs->iBits - + 1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash. + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsDecodeMbCavlcBSlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.", + (int64_t)iUsedBits, pBs->iBits); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE); + } + return ERR_NONE; +} + +int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx) { + SVlcTable* pVlcTable = pCtx->pVlcTable; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + + int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; + int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; + + SWelsNeighAvail sNeighAvail; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int32_t i; + int32_t iRet = ERR_NONE; + uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; + uint32_t uiCode; + int32_t iCode; + int32_t iMbResProperty; + + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23 + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType + uiMbType = uiCode; + if (uiMbType < 23) { //inter MB type + int16_t iMotionVector[LIST_A][30][MV_A]; + int8_t iRefIndex[LIST_A][30]; + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType; + WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer); + + if ((iRet = ParseInterBInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) { + return iRet;//abnormal + } + + if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag + pCurDqLayer->pResidualPredFlag[iMbXy] = uiCode; + } else { + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + } + + if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) { + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } else { //intra MB type + uiMbType -= 23; + if (uiMbType > 25) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24))) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + + if (25 == uiMbType) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!"); + int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1]; + + int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; + int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; + + uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL; + uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC; + uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC; + + uint8_t* pTmpBsBuf; + + int32_t i; + int32_t iCopySizeY = (sizeof (uint8_t) << 4); + int32_t iCopySizeUV = (sizeof (uint8_t) << 3); + + int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + + //step 1: locating bit-stream pointer [must align into integer byte] + pBs->pCurBuf -= iIndex; + + //step 2: copy pixel from bit-stream into fdec [reconstruction] + pTmpBsBuf = pBs->pCurBuf; + if (!pCtx->pParam->bParseOnly) { + for (i = 0; i < 16; i++) { //luma + memcpy (pDecY, pTmpBsBuf, iCopySizeY); + pDecY += iDecStrideL; + pTmpBsBuf += 16; + } + + for (i = 0; i < 8; i++) { //cb + memcpy (pDecU, pTmpBsBuf, iCopySizeUV); + pDecU += iDecStrideC; + pTmpBsBuf += 8; + } + for (i = 0; i < 8; i++) { //cr + memcpy (pDecV, pTmpBsBuf, iCopySizeUV); + pDecV += iDecStrideC; + pTmpBsBuf += 8; + } + } + + pBs->pCurBuf += 384; + + //step 3: update QP and pNonZeroCount + pCurDqLayer->pLumaQp[iMbXy] = 0; + pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0; + //Rec. 9.2.1 for PCM, nzc=16 + ST32A4 (&pNzc[0], 0x10101010); + ST32A4 (&pNzc[4], 0x10101010); + ST32A4 (&pNzc[8], 0x10101010); + ST32A4 (&pNzc[12], 0x10101010); + ST32A4 (&pNzc[16], 0x10101010); + ST32A4 (&pNzc[20], 0x10101010); + WELS_READ_VERIFY (InitReadBits (pBs, 0)); + return ERR_NONE; + } else { + if (0 == uiMbType) { + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + if (pCtx->pPps->bTransform8x8ModeFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + } + } + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } else { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } + } else { //I_PCM exclude, we can ignore it + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) { + return iRet; + } + } + } + } + + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern + uiCbp = uiCode; + { + if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) { + + uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp]; + } else //inter + uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp]; + } + + pCurDqLayer->pCbp[iMbXy] = uiCbp; + uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + + // Need modification when B picutre add in + bool bNeedParseTransformSize8x8Flag = + (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + && (uiCbpL > 0) + && (pCtx->pPps->bTransform8x8ModeFlag)); + + if (bNeedParseTransformSize8x8Flag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + } + } + + ST32A4 (&pNzc[0], 0); + ST32A4 (&pNzc[4], 0); + ST32A4 (&pNzc[8], 0); + ST32A4 (&pNzc[12], 0); + ST32A4 (&pNzc[16], 0); + ST32A4 (&pNzc[20], 0); + if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) + && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + int32_t iQpDelta, iId8x8, iId4x4; + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t)); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta + iQpDelta = iCode; + + if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); + } + + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, + 51)]; + } + + BsStartCavlc (pBs); + + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + //step1: Luma DC + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, + pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + //step2: Luma AC + if (uiCbpL) { + for (i = 0; i < 16; i++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, + g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } else { //non-MB_TYPE_INTRA16x16 + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet; + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { // Normal T4x4 + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + //Luma (DC and AC decoding together) + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } + + + //chroma + //step1: DC + if (1 == uiCbpC || 2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; + else + iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; + + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + } else { + } + //step2: AC + if (2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; + else + iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; + + int32_t iIndex = 16 + (i << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } + ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1])); + ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2])); + ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4])); + ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5])); + } + BsEndCavlc (pBs); + } + + return ERR_NONE; +} + +void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) { + pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_c; + pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c; + pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c; + +#ifdef HAVE_NEON + if (iCpu & WELS_CPU_NEON) { + pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_neon; + pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_neon; + pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_neon; + } +#endif + +#ifdef HAVE_NEON_AARCH64 + if (iCpu & WELS_CPU_NEON) { + pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_AArch64_neon; + pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_AArch64_neon; + pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_AArch64_neon; + } +#endif + +#if defined(X86_ASM) + if (iCpu & WELS_CPU_SSE2) { + pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_sse2; + pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_sse2; + pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_sse2; + } +#endif + +} + +void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) { + int32_t i; + int16_t* pDst = pBlock; + + for (i = 0; i < iH; i++) { + memset (pDst, uiVal, iW * sizeof (int16_t)); + pDst += iStride; + } +} +void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) { + WelsBlockInit (pBlock, 16, 16, iStride, 0); +} + +void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) { + WelsBlockInit (pBlock, 8, 8, iStride, 0); +} + +// Compute the temporal-direct scaling factor that's common +// to all direct MBs in this slice, as per clause 8.4.1.2.3 +// of T-REC H.264 201704 +bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader; + if (!pSliceHeader->iDirectSpatialMvPredFlag) { + uint32_t uiRefCount = pSliceHeader->uiRefCount[LIST_0]; + if (pCtx->sRefPic.pRefList[LIST_1][0] != NULL) { + for (uint32_t i = 0; i < uiRefCount; ++i) { + if (pCtx->sRefPic.pRefList[LIST_0][i] != NULL) { + const int32_t poc0 = pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc; + const int32_t poc1 = pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc; + const int32_t poc = pSliceHeader->iPicOrderCntLsb; + const int32_t td = WELS_CLIP3 (poc1 - poc0, -128, 127); + if (td == 0) { + pCurSlice->iMvScale[LIST_0][i] = 1 << 8; + } else { + int32_t tb = WELS_CLIP3 (poc - poc0, -128, 127); + int32_t tx = (16384 + (abs (td) >> 1)) / td; + pCurSlice->iMvScale[LIST_0][i] = WELS_CLIP3 ((tb * tx + 32) >> 6, -1024, 1023); + } + } + } + } + } + return true; +} +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder.cpp new file mode 100644 index 000000000..d22c276d2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder.cpp @@ -0,0 +1,1250 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file decoder.c + * + * \brief Interfaces implementation introduced in decoder system architecture + * + * \date 03/10/2009 Created + * + ************************************************************************************* + */ +#include "codec_def.h" +#include "decoder.h" +#include "cpu.h" +#include "au_parser.h" +#include "get_intra_predictor.h" +#include "rec_mb.h" +#include "mc.h" +#include "decode_mb_aux.h" +#include "manage_dec_ref.h" +#include "decoder_core.h" +#include "deblocking.h" +#include "expand_pic.h" +#include "decode_slice.h" +#include "error_concealment.h" +#include "memory_align.h" +#include "wels_decoder_thread.h" + +namespace WelsDec { + +extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight); + +extern void FreePicture (PPicture pPic, CMemoryAlign* pMa); + +static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, const int32_t kiSize, + const int32_t kiPicWidth, const int32_t kiPicHeight) { + + PPicBuff pPicBuf = NULL; + int32_t iPicIdx = 0; + if (kiSize <= 0 || kiPicWidth <= 0 || kiPicHeight <= 0) { + return ERR_INFO_INVALID_PARAM; + } + + CMemoryAlign* pMa = pCtx->pMemAlign; + + pPicBuf = (PPicBuff)pMa->WelsMallocz (sizeof (SPicBuff), "PPicBuff"); + + if (NULL == pPicBuf) { + return ERR_INFO_OUT_OF_MEMORY; + } + + pPicBuf->ppPic = (PPicture*)pMa->WelsMallocz (kiSize * sizeof (PPicture), "PPicture*"); + + if (NULL == pPicBuf->ppPic) { + pPicBuf->iCapacity = 0; + DestroyPicBuff (pCtx, &pPicBuf, pMa); + return ERR_INFO_OUT_OF_MEMORY; + } + + for (iPicIdx = 0; iPicIdx < kiSize; ++ iPicIdx) { + PPicture pPic = AllocPicture (pCtx, kiPicWidth, kiPicHeight); + if (NULL == pPic) { + // init capacity first for free memory + pPicBuf->iCapacity = iPicIdx; + DestroyPicBuff (pCtx, &pPicBuf, pMa); + return ERR_INFO_OUT_OF_MEMORY; + } + pPicBuf->ppPic[iPicIdx] = pPic; + } + +// initialize context in queue + pPicBuf->iCapacity = kiSize; + pPicBuf->iCurrentIdx = 0; + * ppPicBuf = pPicBuf; + + return ERR_NONE; +} + +static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, const int32_t kiOldSize, + const int32_t kiPicWidth, const int32_t kiPicHeight, const int32_t kiNewSize) { + PPicBuff pPicOldBuf = *ppPicBuf; + PPicBuff pPicNewBuf = NULL; + int32_t iPicIdx = 0; + if (kiOldSize <= 0 || kiNewSize <= 0 || kiPicWidth <= 0 || kiPicHeight <= 0) { + return ERR_INFO_INVALID_PARAM; + } + + CMemoryAlign* pMa = pCtx->pMemAlign; + pPicNewBuf = (PPicBuff)pMa->WelsMallocz (sizeof (SPicBuff), "PPicBuff"); + + if (NULL == pPicNewBuf) { + return ERR_INFO_OUT_OF_MEMORY; + } + + pPicNewBuf->ppPic = (PPicture*)pMa->WelsMallocz (kiNewSize * sizeof (PPicture), "PPicture*"); + + if (NULL == pPicNewBuf->ppPic) { + pPicNewBuf->iCapacity = 0; + DestroyPicBuff (pCtx, &pPicNewBuf, pMa); + return ERR_INFO_OUT_OF_MEMORY; + } + + // increase new PicBuf + for (iPicIdx = kiOldSize; iPicIdx < kiNewSize; ++ iPicIdx) { + PPicture pPic = AllocPicture (pCtx, kiPicWidth, kiPicHeight); + if (NULL == pPic) { + // Set maximum capacity as the new malloc memory at the tail + pPicNewBuf->iCapacity = iPicIdx; + DestroyPicBuff (pCtx, &pPicNewBuf, pMa); + return ERR_INFO_OUT_OF_MEMORY; + } + pPicNewBuf->ppPic[iPicIdx] = pPic; + } + + // copy old PicBuf to new PicBuf + memcpy (pPicNewBuf->ppPic, pPicOldBuf->ppPic, kiOldSize * sizeof (PPicture)); + +// initialize context in queue + pPicNewBuf->iCapacity = kiNewSize; + pPicNewBuf->iCurrentIdx = pPicOldBuf->iCurrentIdx; + * ppPicBuf = pPicNewBuf; + + for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) { + pPicNewBuf->ppPic[i]->bUsedAsRef = false; + pPicNewBuf->ppPic[i]->bIsLongRef = false; + pPicNewBuf->ppPic[i]->iRefCount = 0; + pPicNewBuf->ppPic[i]->bIsComplete = false; + } +// remove old PicBuf + if (pPicOldBuf->ppPic != NULL) { + pMa->WelsFree (pPicOldBuf->ppPic, "pPicOldBuf->queue"); + pPicOldBuf->ppPic = NULL; + } + pPicOldBuf->iCapacity = 0; + pPicOldBuf->iCurrentIdx = 0; + pMa->WelsFree (pPicOldBuf, "pPicOldBuf"); + pPicOldBuf = NULL; + return ERR_NONE; +} + +static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, const int32_t kiOldSize, + const int32_t kiPicWidth, const int32_t kiPicHeight, const int32_t kiNewSize) { + PPicBuff pPicOldBuf = *ppPicBuf; + PPicBuff pPicNewBuf = NULL; + int32_t iPicIdx = 0; + if (kiOldSize <= 0 || kiNewSize <= 0 || kiPicWidth <= 0 || kiPicHeight <= 0) { + return ERR_INFO_INVALID_PARAM; + } + + CMemoryAlign* pMa = pCtx->pMemAlign; + + pPicNewBuf = (PPicBuff)pMa->WelsMallocz (sizeof (SPicBuff), "PPicBuff"); + + if (NULL == pPicNewBuf) { + return ERR_INFO_OUT_OF_MEMORY; + } + + pPicNewBuf->ppPic = (PPicture*)pMa->WelsMallocz (kiNewSize * sizeof (PPicture), "PPicture*"); + + if (NULL == pPicNewBuf->ppPic) { + pPicNewBuf->iCapacity = 0; + DestroyPicBuff (pCtx, &pPicNewBuf, pMa); + return ERR_INFO_OUT_OF_MEMORY; + } + + ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false); + + int32_t iPrevPicIdx = -1; + for (iPrevPicIdx = 0; iPrevPicIdx < kiOldSize; ++iPrevPicIdx) { + if (pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb == pPicOldBuf->ppPic[iPrevPicIdx]) { + break; + } + } + int32_t iDelIdx; + if (iPrevPicIdx < kiOldSize && iPrevPicIdx >= kiNewSize) { + // found pPreviousDecodedPictureInDpb, + pPicNewBuf->ppPic[0] = pPicOldBuf->ppPic[iPrevPicIdx]; + pPicNewBuf->iCurrentIdx = 0; + memcpy (pPicNewBuf->ppPic + 1, pPicOldBuf->ppPic, (kiNewSize - 1) * sizeof (PPicture)); + iDelIdx = kiNewSize - 1; + } else { + memcpy (pPicNewBuf->ppPic, pPicOldBuf->ppPic, kiNewSize * sizeof (PPicture)); + pPicNewBuf->iCurrentIdx = iPrevPicIdx < kiNewSize ? iPrevPicIdx : 0; + iDelIdx = kiNewSize; + } + + //update references due to allocation changes + //all references' references have to be reset oss-buzz 14423 + for (int32_t i = 0; i < kiNewSize; i++) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + int32_t j = -1; + while (++j < MAX_DPB_COUNT && pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] != NULL) { + pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] = NULL; + } + } + } + + for (iPicIdx = iDelIdx; iPicIdx < kiOldSize; iPicIdx++) { + if (iPrevPicIdx != iPicIdx) { + if (pPicOldBuf->ppPic[iPicIdx] != NULL) { + FreePicture (pPicOldBuf->ppPic[iPicIdx], pMa); + pPicOldBuf->ppPic[iPicIdx] = NULL; + } + } + } + + // initialize context in queue + pPicNewBuf->iCapacity = kiNewSize; + * ppPicBuf = pPicNewBuf; + + for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) { + pPicNewBuf->ppPic[i]->bUsedAsRef = false; + pPicNewBuf->ppPic[i]->bIsLongRef = false; + pPicNewBuf->ppPic[i]->iRefCount = 0; + pPicNewBuf->ppPic[i]->bIsComplete = false; + } + // remove old PicBuf + if (pPicOldBuf->ppPic != NULL) { + pMa->WelsFree (pPicOldBuf->ppPic, "pPicOldBuf->queue"); + pPicOldBuf->ppPic = NULL; + } + pPicOldBuf->iCapacity = 0; + pPicOldBuf->iCurrentIdx = 0; + pMa->WelsFree (pPicOldBuf, "pPicOldBuf"); + pPicOldBuf = NULL; + + return ERR_NONE; +} + +void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa) { + PPicBuff pPicBuf = NULL; + + ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false); + if (pCtx->pDstInfo) pCtx->pDstInfo->iBufferStatus = 0; + + if (NULL == ppPicBuf || NULL == *ppPicBuf) + return; + + pPicBuf = *ppPicBuf; + while (pPicBuf->ppPic != NULL) { + int32_t iPicIdx = 0; + while (iPicIdx < pPicBuf->iCapacity) { + PPicture pPic = pPicBuf->ppPic[iPicIdx]; + if (pPic != NULL) { + FreePicture (pPic, pMa); + } + pPic = NULL; + ++ iPicIdx; + } + + pMa->WelsFree (pPicBuf->ppPic, "pPicBuf->queue"); + + pPicBuf->ppPic = NULL; + } + pPicBuf->iCapacity = 0; + pPicBuf->iCurrentIdx = 0; + + pMa->WelsFree (pPicBuf, "pPicBuf"); + + pPicBuf = NULL; + *ppPicBuf = NULL; +} + +//reset picture reodering buffer list +void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo, + const bool& fullReset) { + if (pPictReoderingStatus != NULL && pPictInfo != NULL) { + int32_t pictInfoListCount = fullReset ? 16 : (pPictReoderingStatus->iLargestBufferedPicIndex + 1); + pPictReoderingStatus->iPictInfoIndex = 0; + pPictReoderingStatus->iMinPOC = IMinInt32; + pPictReoderingStatus->iNumOfPicts = 0; + pPictReoderingStatus->iLastGOPRemainPicts = 0; + pPictReoderingStatus->iLastWrittenPOC = IMinInt32; + pPictReoderingStatus->iLargestBufferedPicIndex = 0; + for (int32_t i = 0; i < pictInfoListCount; ++i) { + pPictInfo[i].bLastGOP = false; + pPictInfo[i].iPOC = IMinInt32; + } + pPictInfo->sBufferInfo.iBufferStatus = 0; + } +} + +/* + * fill data fields in default for decoder context + */ +void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx) { + int32_t iCpuCores = 1; + pCtx->sLogCtx = *pLogCtx; + + pCtx->pArgDec = NULL; + + pCtx->bHaveGotMemory = false; // not ever request memory blocks for decoder context related + pCtx->uiCpuFlag = 0; + + pCtx->bAuReadyFlag = 0; // au data is not ready + pCtx->bCabacInited = false; + + pCtx->uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores); + + pCtx->iImgWidthInPixel = 0; + pCtx->iImgHeightInPixel = 0; // alloc picture data when picture size is available + pCtx->iLastImgWidthInPixel = 0; + pCtx->iLastImgHeightInPixel = 0; + pCtx->bFreezeOutput = true; + + pCtx->iFrameNum = -1; + pCtx->pLastDecPicInfo->iPrevFrameNum = -1; + pCtx->iErrorCode = ERR_NONE; + + pCtx->pDec = NULL; + + pCtx->pTempDec = NULL; + + WelsResetRefPic (pCtx); + + pCtx->iActiveFmoNum = 0; + + pCtx->pPicBuff = NULL; + + //pCtx->sSpsPpsCtx.bAvcBasedFlag = true; + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL; + pCtx->pDecoderStatistics->iAvgLumaQp = -1; + pCtx->pDecoderStatistics->iStatisticsLogInterval = 1000; + pCtx->bUseScalingList = false; + /*pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iPPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1; + pCtx->sSpsPpsCtx.iSPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1; + pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1; + */ + pCtx->iFeedbackNalRefIdc = -1; //initialize + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0; + +} + +/* +* fill data fields in SPS and PPS default for decoder context +*/ +void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx) { + sSpsPpsCtx.bSpsExistAheadFlag = false; + sSpsPpsCtx.bSubspsExistAheadFlag = false; + sSpsPpsCtx.bPpsExistAheadFlag = false; + sSpsPpsCtx.bAvcBasedFlag = true; + sSpsPpsCtx.iSpsErrorIgnored = 0; + sSpsPpsCtx.iSubSpsErrorIgnored = 0; + sSpsPpsCtx.iPpsErrorIgnored = 0; + sSpsPpsCtx.iPPSInvalidNum = 0; + sSpsPpsCtx.iPPSLastInvalidId = -1; + sSpsPpsCtx.iSPSInvalidNum = 0; + sSpsPpsCtx.iSPSLastInvalidId = -1; + sSpsPpsCtx.iSubSPSInvalidNum = 0; + sSpsPpsCtx.iSubSPSLastInvalidId = -1; + sSpsPpsCtx.iSeqId = -1; +} + +/* +* fill last decoded picture info +*/ +void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo) { + sLastDecPicInfo.iPrevPicOrderCntMsb = 0; + sLastDecPicInfo.iPrevPicOrderCntLsb = 0; + sLastDecPicInfo.pPreviousDecodedPictureInDpb = NULL; + sLastDecPicInfo.iPrevFrameNum = -1; + sLastDecPicInfo.bLastHasMmco5 = false; + sLastDecPicInfo.uiDecodingTimeStamp = 0; +} + +/*! +* \brief copy SpsPps from one Ctx to another ctx for threaded code +*/ +void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx) { + pToCtx->sSpsPpsCtx = pFromCtx->sSpsPpsCtx; + PAccessUnit pFromCurAu = pFromCtx->pAccessUnitList; + PSps pTmpLayerSps[MAX_LAYER_NUM]; + for (int i = 0; i < MAX_LAYER_NUM; i++) { + pTmpLayerSps[i] = NULL; + } + // track the layer sps for the current au + for (unsigned int i = pFromCurAu->uiStartPos; i <= pFromCurAu->uiEndPos; i++) { + uint32_t uiDid = pFromCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId; + pTmpLayerSps[uiDid] = pFromCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps; + for (unsigned int j = 0; j < MAX_SPS_COUNT + 1; ++j) { + if (&pFromCtx->sSpsPpsCtx.sSpsBuffer[j] == pTmpLayerSps[uiDid]) { + pTmpLayerSps[uiDid] = &pToCtx->sSpsPpsCtx.sSpsBuffer[j]; + break; + } + } + } + for (int i = 0; i < MAX_LAYER_NUM; i++) { + if (pTmpLayerSps[i] != NULL) { + pToCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i]; + } + } +} + +/* + * destory_mb_blocks + */ + +/* + * get size of reference picture list in target layer incoming, = (iNumRefFrames + */ +static inline int32_t GetTargetRefListSize (PWelsDecoderContext pCtx) { + int32_t iNumRefFrames = 0; + // +2 for EC MV Copy buffer exchange + if ((pCtx == NULL) || (pCtx->pSps == NULL)) { + iNumRefFrames = MAX_REF_PIC_COUNT + 2; + } else { + iNumRefFrames = pCtx->pSps->iNumRefFrames + 2; + int32_t iThreadCount = GetThreadCount (pCtx); + if (iThreadCount > 1) { + iNumRefFrames = MAX_REF_PIC_COUNT; + } + } + +#ifdef LONG_TERM_REF + //pic_queue size minimum set 2 + if (iNumRefFrames < 2) { + iNumRefFrames = 2; + } +#endif + + return iNumRefFrames; +} + +/* + * request memory blocks for decoder avc part + */ +int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight, + bool& bReallocFlag) { + const int32_t kiPicWidth = kiMbWidth << 4; + const int32_t kiPicHeight = kiMbHeight << 4; + int32_t iErr = ERR_NONE; + + int32_t iPicQueueSize = 0; // adaptive size of picture queue, = (pSps->iNumRefFrames x 2) + bReallocFlag = false; + bool bNeedChangePicQueue = true; + CMemoryAlign* pMa = pCtx->pMemAlign; + + WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiPicWidth <= 0 || kiPicHeight <= 0)) + + // Fixed the issue about different gop size over last, 5/17/2010 + // get picture queue size currently + iPicQueueSize = GetTargetRefListSize (pCtx); // adaptive size of picture queue, = (pSps->iNumRefFrames x 2) + pCtx->iPicQueueNumber = iPicQueueSize; + if (pCtx->pPicBuff != NULL + && pCtx->pPicBuff->iCapacity == + iPicQueueSize) // comparing current picture queue size requested and previous allocation picture queue + bNeedChangePicQueue = false; + // HD based pic buffer need consider memory size consumed when switch from 720p to other lower size + WELS_VERIFY_RETURN_IF (ERR_NONE, pCtx->bHaveGotMemory && (kiPicWidth == pCtx->iImgWidthInPixel + && kiPicHeight == pCtx->iImgHeightInPixel) && (!bNeedChangePicQueue)) // have same scaled buffer + + // sync update pRefList + if (GetThreadCount (pCtx) <= 1) { + WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free + } + + if (pCtx->bHaveGotMemory && (kiPicWidth == pCtx->iImgWidthInPixel && kiPicHeight == pCtx->iImgHeightInPixel) + && pCtx->pPicBuff != NULL && pCtx->pPicBuff->iCapacity != iPicQueueSize) { + // currently only active for LIST_0 due to have no B frames + // Actually just need one memory allocation for the PicBuff. While it needs two pointer list (LIST_0 and LIST_1). + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "WelsRequestMem(): memory re-alloc for no resolution change (size = %d * %d), ref list size change from %d to %d", + kiPicWidth, kiPicHeight, pCtx->pPicBuff->iCapacity, iPicQueueSize); + if (pCtx->pPicBuff->iCapacity < iPicQueueSize) { + iErr = IncreasePicBuff (pCtx, &pCtx->pPicBuff, pCtx->pPicBuff->iCapacity, kiPicWidth, kiPicHeight, + iPicQueueSize); + } else { + iErr = DecreasePicBuff (pCtx, &pCtx->pPicBuff, pCtx->pPicBuff->iCapacity, kiPicWidth, kiPicHeight, + iPicQueueSize); + } + } else { + if (pCtx->bHaveGotMemory) + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "WelsRequestMem(): memory re-alloc for resolution change, size change from %d * %d to %d * %d, ref list size change from %d to %d", + pCtx->iImgWidthInPixel, pCtx->iImgHeightInPixel, kiPicWidth, kiPicHeight, pCtx->pPicBuff->iCapacity, + iPicQueueSize); + else + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "WelsRequestMem(): memory alloc size = %d * %d, ref list size = %d", + kiPicWidth, kiPicHeight, iPicQueueSize); + // for Recycled_Pic_Queue + PPicBuff* ppPic = &pCtx->pPicBuff; + if (NULL != ppPic && NULL != *ppPic) { + DestroyPicBuff (pCtx, ppPic, pMa); + } + + + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL; + + // currently only active for LIST_0 due to have no B frames + iErr = CreatePicBuff (pCtx, &pCtx->pPicBuff, iPicQueueSize, kiPicWidth, kiPicHeight); + } + + if (iErr != ERR_NONE) + return iErr; + + + pCtx->iImgWidthInPixel = kiPicWidth; // target width of image to be reconstruted while decoding + pCtx->iImgHeightInPixel = kiPicHeight; // target height of image to be reconstruted while decoding + + pCtx->bHaveGotMemory = true; // global memory for decoder context related is requested + pCtx->pDec = NULL; // need prefetch a new pic due to spatial size changed + + if (pCtx->pCabacDecEngine == NULL) + pCtx->pCabacDecEngine = (SWelsCabacDecEngine*) pMa->WelsMallocz (sizeof (SWelsCabacDecEngine), "pCtx->pCabacDecEngine"); + WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, (NULL == pCtx->pCabacDecEngine)) + + bReallocFlag = true; // memory re-allocation successfully finished + return ERR_NONE; +} + +/* + * free memory dynamically allocated during decoder + */ +void WelsFreeDynamicMemory (PWelsDecoderContext pCtx) { + + CMemoryAlign* pMa = pCtx->pMemAlign; + + //free dq layer memory + UninitialDqLayersContext (pCtx); + + //free FMO memory + ResetFmoList (pCtx); + + //free ref-pic list & picture memory + WelsResetRefPic (pCtx); + + PPicBuff* pPicBuff = &pCtx->pPicBuff; + if (NULL != pPicBuff && NULL != *pPicBuff) { + DestroyPicBuff (pCtx, pPicBuff, pMa); + } + if (GetThreadCount (pCtx) > 1) { + //prevent from double destruction of PPicBuff + PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pThreadCtx); + int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum; + int32_t id = pThreadCtx->sThreadInfo.uiThrNum; + for (int32_t i = 0; i < threadCount; ++i) { + if (pThreadCtx[i - id].pCtx != NULL) { + pThreadCtx[i - id].pCtx->pPicBuff = NULL; + } + } + } + + if (pCtx->pTempDec) { + FreePicture (pCtx->pTempDec, pCtx->pMemAlign); + pCtx->pTempDec = NULL; + } + + // added for safe memory + pCtx->iImgWidthInPixel = 0; + pCtx->iImgHeightInPixel = 0; + pCtx->iLastImgWidthInPixel = 0; + pCtx->iLastImgHeightInPixel = 0; + pCtx->bFreezeOutput = true; + pCtx->bHaveGotMemory = false; + + //free CABAC memory + pMa->WelsFree (pCtx->pCabacDecEngine, "pCtx->pCabacDecEngine"); +} + +/*! + * \brief Open decoder + */ +int32_t WelsOpenDecoder (PWelsDecoderContext pCtx, SLogContext* pLogCtx) { + int iRet = ERR_NONE; + // function pointers + InitDecFuncs (pCtx, pCtx->uiCpuFlag); + + // vlc tables + InitVlcTable (pCtx->pVlcTable); + + // static memory + iRet = WelsInitStaticMemory (pCtx); + if (ERR_NONE != iRet) { + pCtx->iErrorCode |= dsOutOfMemory; + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitStaticMemory() failed in WelsOpenDecoder()."); + return iRet; + } + +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = true; +#else + pCtx->bReferenceLostAtT0Flag = true; // should be true to waiting IDR at incoming AU bits following, 6/4/2010 +#endif //LONG_TERM_REF + pCtx->bNewSeqBegin = true; + pCtx->bPrintFrameErrorTraceFlag = true; + pCtx->iIgnoredErrorInfoPacketCount = 0; + pCtx->bFrameFinish = true; + return iRet; +} + +/*! + * \brief Close decoder + */ +void WelsCloseDecoder (PWelsDecoderContext pCtx) { + WelsFreeDynamicMemory (pCtx); + + WelsFreeStaticMemory (pCtx); + +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = false; +#else + pCtx->bReferenceLostAtT0Flag = false; +#endif + pCtx->bNewSeqBegin = false; + pCtx->bPrintFrameErrorTraceFlag = false; +} + +/*! + * \brief configure decoder parameters + */ +int32_t DecoderConfigParam (PWelsDecoderContext pCtx, const SDecodingParam* kpParam) { + if (NULL == pCtx || NULL == kpParam) + return ERR_INFO_INVALID_PARAM; + + memcpy (pCtx->pParam, kpParam, sizeof (SDecodingParam)); + if ((pCtx->pParam->eEcActiveIdc > ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE) + || (pCtx->pParam->eEcActiveIdc < ERROR_CON_DISABLE)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "eErrorConMethod (%d) not in range: (%d - %d). Set as default value: (%d).", pCtx->pParam->eEcActiveIdc, + ERROR_CON_DISABLE, ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE, + ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE); + pCtx->pParam->eEcActiveIdc = ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE; + } + + if (pCtx->pParam->bParseOnly) //parse only, disable EC method + pCtx->pParam->eEcActiveIdc = ERROR_CON_DISABLE; + InitErrorCon (pCtx); + + if (VIDEO_BITSTREAM_SVC == pCtx->pParam->sVideoProperty.eVideoBsType || + VIDEO_BITSTREAM_AVC == pCtx->pParam->sVideoProperty.eVideoBsType) { + pCtx->eVideoType = pCtx->pParam->sVideoProperty.eVideoBsType; + } else { + pCtx->eVideoType = VIDEO_BITSTREAM_DEFAULT; + } + + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "eVideoType: %d", pCtx->eVideoType); + + return ERR_NONE; +} + +/*! + ************************************************************************************* + * \brief Initialize Wels decoder parameters and memory + * + * \param pCtx input context to be initialized at first stage + * + * \return 0 - successed + * \return 1 - failed + * + * \note N/A + ************************************************************************************* + */ +int32_t WelsInitDecoder (PWelsDecoderContext pCtx, SLogContext* pLogCtx) { + if (pCtx == NULL) { + return ERR_INFO_INVALID_PTR; + } + + // open decoder + return WelsOpenDecoder (pCtx, pLogCtx); +} + +/*! + ************************************************************************************* + * \brief Uninitialize Wels decoder parameters and memory + * + * \param pCtx input context to be uninitialized at release stage + * + * \return NONE + * + * \note N/A + ************************************************************************************* + */ +void WelsEndDecoder (PWelsDecoderContext pCtx) { + // close decoder + WelsCloseDecoder (pCtx); +} + +void GetVclNalTemporalId (PWelsDecoderContext pCtx) { + PAccessUnit pAccessUnit = pCtx->pAccessUnitList; + int32_t idx = pAccessUnit->uiStartPos; + + pCtx->iFeedbackVclNalInAu = FEEDBACK_VCL_NAL; + pCtx->iFeedbackTidInAu = pAccessUnit->pNalUnitsList[idx]->sNalHeaderExt.uiTemporalId; + pCtx->iFeedbackNalRefIdc = pAccessUnit->pNalUnitsList[idx]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc; +} + +/*! + ************************************************************************************* + * \brief First entrance to decoding core interface. + * + * \param pCtx decoder context + * \param pBufBs bit streaming buffer + * \param kBsLen size in bytes length of bit streaming buffer input + * \param ppDst picture payload data to be output + * \param pDstBufInfo buf information of ouput data + * + * \return 0 - successed + * \return 1 - failed + * + * \note N/A + ************************************************************************************* + */ +int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const int32_t kiBsLen, + uint8_t** ppDst, SBufferInfo* pDstBufInfo, SParserBsInfo* pDstBsInfo) { + if (!pCtx->bEndOfStreamFlag) { + SDataBuffer* pRawData = &pCtx->sRawData; + SDataBuffer* pSavedData = NULL; + + int32_t iSrcIdx = 0; //the index of source bit-stream till now after parsing one or more NALs + int32_t iSrcConsumed = 0; // consumed bit count of source bs + int32_t iDstIdx = 0; //the size of current NAL after 0x03 removal and 00 00 01 removal + int32_t iSrcLength = 0; //the total size of current AU or NAL + int32_t iRet = 0; + int32_t iConsumedBytes = 0; + int32_t iOffset = 0; + + uint8_t* pSrcNal = NULL; + uint8_t* pDstNal = NULL; + uint8_t* pNalPayload = NULL; + + + if (NULL == DetectStartCodePrefix (kpBsBuf, &iOffset, + kiBsLen)) { //CAN'T find the 00 00 01 start prefix from the source buffer + pCtx->iErrorCode |= dsBitstreamError; + return dsBitstreamError; + } + + pSrcNal = const_cast (kpBsBuf) + iOffset; + iSrcLength = kiBsLen - iOffset; + + if ((kiBsLen + 4) > (pRawData->pEnd - pRawData->pCurPos)) { + pRawData->pCurPos = pRawData->pHead; + } + + if (pCtx->pParam->bParseOnly) { + pSavedData = &pCtx->sSavedData; + if ((kiBsLen + 4) > (pSavedData->pEnd - pSavedData->pCurPos)) { + pSavedData->pCurPos = pSavedData->pHead; + } + } + //copy raw data from source buffer (application) to raw data buffer (codec inside) + //0x03 removal and extract all of NAL Unit from current raw data + pDstNal = pRawData->pCurPos; + + bool bNalStartBytes = false; + + while (iSrcConsumed < iSrcLength) { + if ((2 + iSrcConsumed < iSrcLength) && (0 == LD16 (pSrcNal + iSrcIdx)) && (pSrcNal[2 + iSrcIdx] <= 0x03)) { + if (bNalStartBytes && (pSrcNal[2 + iSrcIdx] != 0x00 && pSrcNal[2 + iSrcIdx] != 0x01)) { + pCtx->iErrorCode |= dsBitstreamError; + return pCtx->iErrorCode; + } + + if (pSrcNal[2 + iSrcIdx] == 0x02) { + pCtx->iErrorCode |= dsBitstreamError; + return pCtx->iErrorCode; + } else if (pSrcNal[2 + iSrcIdx] == 0x00) { + pDstNal[iDstIdx++] = pSrcNal[iSrcIdx++]; + iSrcConsumed++; + bNalStartBytes = true; + } else if (pSrcNal[2 + iSrcIdx] == 0x03) { + if ((3 + iSrcConsumed < iSrcLength) && pSrcNal[3 + iSrcIdx] > 0x03) { + pCtx->iErrorCode |= dsBitstreamError; + return pCtx->iErrorCode; + } else { + ST16 (pDstNal + iDstIdx, 0); + iDstIdx += 2; + iSrcIdx += 3; + iSrcConsumed += 3; + } + } else { // 0x01 + bNalStartBytes = false; + + iConsumedBytes = 0; + pDstNal[iDstIdx] = pDstNal[iDstIdx + 1] = pDstNal[iDstIdx + 2] = pDstNal[iDstIdx + 3] = + 0; // set 4 reserved bytes to zero + pNalPayload = ParseNalHeader (pCtx, &pCtx->sCurNalHead, pDstNal, iDstIdx, pSrcNal - 3, iSrcIdx + 3, &iConsumedBytes); + if (pNalPayload) { //parse correct + if (IS_PARAM_SETS_NALS (pCtx->sCurNalHead.eNalUnitType)) { + iRet = ParseNonVclNal (pCtx, pNalPayload, iDstIdx - iConsumedBytes, pSrcNal - 3, iSrcIdx + 3); + } + CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo); + if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { + if (GetThreadCount (pCtx) <= 1) { + ConstructAccessUnit (pCtx, ppDst, pDstBufInfo); + } else { + pCtx->pAccessUnitList->uiAvailUnitsNum = 1; + } + } + } + DecodeFinishUpdate (pCtx); + + if ((dsOutOfMemory | dsNoParamSets) & pCtx->iErrorCode) { +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = true; +#else + pCtx->bReferenceLostAtT0Flag = true; +#endif + if (dsOutOfMemory & pCtx->iErrorCode) { + return pCtx->iErrorCode; + } + } + if (iRet) { + iRet = 0; + if (dsNoParamSets & pCtx->iErrorCode) { +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = true; +#else + pCtx->bReferenceLostAtT0Flag = true; +#endif + } + return pCtx->iErrorCode; + } + + pDstNal += (iDstIdx + 4); //init, increase 4 reserved zero bytes, used to store the next NAL + if ((iSrcLength - iSrcConsumed + 4) > (pRawData->pEnd - pDstNal)) { + pDstNal = pRawData->pCurPos = pRawData->pHead; + } else { + pRawData->pCurPos = pDstNal; + } + + pSrcNal += iSrcIdx + 3; + iSrcConsumed += 3; + iSrcIdx = 0; + iDstIdx = 0; //reset 0, used to statistic the length of next NAL + } + continue; + } + pDstNal[iDstIdx++] = pSrcNal[iSrcIdx++]; + iSrcConsumed++; + } + + //last NAL decoding + + iConsumedBytes = 0; + pDstNal[iDstIdx] = pDstNal[iDstIdx + 1] = pDstNal[iDstIdx + 2] = pDstNal[iDstIdx + 3] = + 0; // set 4 reserved bytes to zero + pRawData->pCurPos = pDstNal + iDstIdx + 4; //init, increase 4 reserved zero bytes, used to store the next NAL + pNalPayload = ParseNalHeader (pCtx, &pCtx->sCurNalHead, pDstNal, iDstIdx, pSrcNal - 3, iSrcIdx + 3, &iConsumedBytes); + if (pNalPayload) { //parse correct + if (IS_PARAM_SETS_NALS (pCtx->sCurNalHead.eNalUnitType)) { + iRet = ParseNonVclNal (pCtx, pNalPayload, iDstIdx - iConsumedBytes, pSrcNal - 3, iSrcIdx + 3); + } + if (GetThreadCount (pCtx) <= 1) { + CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo); + } + if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { + if (GetThreadCount (pCtx) <= 1) { + ConstructAccessUnit (pCtx, ppDst, pDstBufInfo); + } else { + pCtx->pAccessUnitList->uiAvailUnitsNum = 1; + } + } + } + DecodeFinishUpdate (pCtx); + + if ((dsOutOfMemory | dsNoParamSets) & pCtx->iErrorCode) { +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = true; +#else + pCtx->bReferenceLostAtT0Flag = true; +#endif + return pCtx->iErrorCode; + } + if (iRet) { + iRet = 0; + if (dsNoParamSets & pCtx->iErrorCode) { +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = true; +#else + pCtx->bReferenceLostAtT0Flag = true; +#endif + } + return pCtx->iErrorCode; + } + } else { /* no supplementary picture payload input, but stored a picture */ + PAccessUnit pCurAu = + pCtx->pAccessUnitList; // current access unit, it will never point to NULL after decode's successful initialization + + if (pCurAu->uiAvailUnitsNum == 0) { + return pCtx->iErrorCode; + } else { + pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; + + ConstructAccessUnit (pCtx, ppDst, pDstBufInfo); + } + DecodeFinishUpdate (pCtx); + + if ((dsOutOfMemory | dsNoParamSets) & pCtx->iErrorCode) { +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = true; +#else + pCtx->bReferenceLostAtT0Flag = true; +#endif + return pCtx->iErrorCode; + } + } + + return pCtx->iErrorCode; +} + +/*! + * \brief make sure synchonozization picture resolution (get from slice header) among different parts (i.e, memory related and so on) + * over decoder internal + * ( MB coordinate and parts of data within decoder context structure ) + * \param pCtx Wels decoder context + * \param iMbWidth MB width + * \pram iMbHeight MB height + * \return 0 - successful; none 0 - something wrong + */ +int32_t SyncPictureResolutionExt (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight) { + int32_t iErr = ERR_NONE; + const int32_t kiPicWidth = kiMbWidth << 4; + const int32_t kiPicHeight = kiMbHeight << 4; + //fix Bugzilla Bug1479656 reallocate temp dec picture + if (pCtx->pTempDec != NULL && (pCtx->pTempDec->iWidthInPixel != kiPicWidth + || pCtx->pTempDec->iHeightInPixel != kiPicHeight)) { + FreePicture (pCtx->pTempDec, pCtx->pMemAlign); + pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); + } + bool bReallocFlag = false; + iErr = WelsRequestMem (pCtx, kiMbWidth, kiMbHeight, bReallocFlag); // common memory used + if (ERR_NONE != iErr) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "SyncPictureResolutionExt()::WelsRequestMem--buffer allocated failure."); + pCtx->iErrorCode |= dsOutOfMemory; + return iErr; + } + + iErr = InitialDqLayersContext (pCtx, kiPicWidth, kiPicHeight); + if (ERR_NONE != iErr) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "SyncPictureResolutionExt()::InitialDqLayersContext--buffer allocated failure."); + pCtx->iErrorCode |= dsOutOfMemory; + } +#if defined(MEMORY_MONITOR) + if (bReallocFlag) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "SyncPictureResolutionExt(), overall memory usage: %llu bytes", + static_cast (sizeof (SWelsDecoderContext) + pCtx->pMemAlign->WelsGetMemoryUsage())); + } +#endif//MEMORY_MONITOR + return iErr; +} + +void InitDecFuncs (PWelsDecoderContext pCtx, uint32_t uiCpuFlag) { + WelsBlockFuncInit (&pCtx->sBlockFunc, uiCpuFlag); + InitPredFunc (pCtx, uiCpuFlag); + InitMcFunc (& (pCtx->sMcFunc), uiCpuFlag); + InitExpandPictureFunc (& (pCtx->sExpandPicFunc), uiCpuFlag); + DeblockingInit (&pCtx->sDeblockingFunc, uiCpuFlag); +} + +namespace { + +template +void IdctFourResAddPred_ (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc) { + if (pNzc[0] || pRs[0 * 16]) + pfIdctResAddPred (pPred + 0 * iStride + 0, iStride, pRs + 0 * 16); + if (pNzc[1] || pRs[1 * 16]) + pfIdctResAddPred (pPred + 0 * iStride + 4, iStride, pRs + 1 * 16); + if (pNzc[4] || pRs[2 * 16]) + pfIdctResAddPred (pPred + 4 * iStride + 0, iStride, pRs + 2 * 16); + if (pNzc[5] || pRs[3 * 16]) + pfIdctResAddPred (pPred + 4 * iStride + 4, iStride, pRs + 3 * 16); +} + +} // anon ns + +void InitPredFunc (PWelsDecoderContext pCtx, uint32_t uiCpuFlag) { + pCtx->pGetI16x16LumaPredFunc[I16_PRED_V ] = WelsI16x16LumaPredV_c; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_H ] = WelsI16x16LumaPredH_c; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC ] = WelsI16x16LumaPredDc_c; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_P ] = WelsI16x16LumaPredPlane_c; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_L ] = WelsI16x16LumaPredDcLeft_c; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_T ] = WelsI16x16LumaPredDcTop_c; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_128] = WelsI16x16LumaPredDcNA_c; + + pCtx->pGetI4x4LumaPredFunc[I4_PRED_V ] = WelsI4x4LumaPredV_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_H ] = WelsI4x4LumaPredH_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DC ] = WelsI4x4LumaPredDc_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DC_L ] = WelsI4x4LumaPredDcLeft_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DC_T ] = WelsI4x4LumaPredDcTop_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DC_128] = WelsI4x4LumaPredDcNA_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDL ] = WelsI4x4LumaPredDDL_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDL_TOP] = WelsI4x4LumaPredDDLTop_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDR ] = WelsI4x4LumaPredDDR_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VL ] = WelsI4x4LumaPredVL_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VL_TOP] = WelsI4x4LumaPredVLTop_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VR ] = WelsI4x4LumaPredVR_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HU ] = WelsI4x4LumaPredHU_c; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HD ] = WelsI4x4LumaPredHD_c; + + pCtx->pGetI8x8LumaPredFunc[I4_PRED_V ] = WelsI8x8LumaPredV_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_H ] = WelsI8x8LumaPredH_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC ] = WelsI8x8LumaPredDc_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_L ] = WelsI8x8LumaPredDcLeft_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_T ] = WelsI8x8LumaPredDcTop_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_128] = WelsI8x8LumaPredDcNA_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDL ] = WelsI8x8LumaPredDDL_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDL_TOP] = WelsI8x8LumaPredDDLTop_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDR ] = WelsI8x8LumaPredDDR_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_VL ] = WelsI8x8LumaPredVL_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_VL_TOP] = WelsI8x8LumaPredVLTop_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_VR ] = WelsI8x8LumaPredVR_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_HU ] = WelsI8x8LumaPredHU_c; + pCtx->pGetI8x8LumaPredFunc[I4_PRED_HD ] = WelsI8x8LumaPredHD_c; + + pCtx->pGetIChromaPredFunc[C_PRED_DC ] = WelsIChromaPredDc_c; + pCtx->pGetIChromaPredFunc[C_PRED_H ] = WelsIChromaPredH_c; + pCtx->pGetIChromaPredFunc[C_PRED_V ] = WelsIChromaPredV_c; + pCtx->pGetIChromaPredFunc[C_PRED_P ] = WelsIChromaPredPlane_c; + pCtx->pGetIChromaPredFunc[C_PRED_DC_L ] = WelsIChromaPredDcLeft_c; + pCtx->pGetIChromaPredFunc[C_PRED_DC_T ] = WelsIChromaPredDcTop_c; + pCtx->pGetIChromaPredFunc[C_PRED_DC_128] = WelsIChromaPredDcNA_c; + + pCtx->pIdctResAddPredFunc = IdctResAddPred_c; + pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_; + + pCtx->pIdctResAddPredFunc8x8 = IdctResAddPred8x8_c; + +#if defined(HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + pCtx->pIdctResAddPredFunc = IdctResAddPred_neon; + pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_; + + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC] = WelsDecoderI16x16LumaPredDc_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_P] = WelsDecoderI16x16LumaPredPlane_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_H] = WelsDecoderI16x16LumaPredH_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_V] = WelsDecoderI16x16LumaPredV_neon; + + pCtx->pGetI4x4LumaPredFunc[I4_PRED_V ] = WelsDecoderI4x4LumaPredV_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_H ] = WelsDecoderI4x4LumaPredH_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDL ] = WelsDecoderI4x4LumaPredDDL_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDR ] = WelsDecoderI4x4LumaPredDDR_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VL ] = WelsDecoderI4x4LumaPredVL_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VR ] = WelsDecoderI4x4LumaPredVR_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HU ] = WelsDecoderI4x4LumaPredHU_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HD ] = WelsDecoderI4x4LumaPredHD_neon; + + pCtx->pGetIChromaPredFunc[C_PRED_H] = WelsDecoderIChromaPredH_neon; + pCtx->pGetIChromaPredFunc[C_PRED_V] = WelsDecoderIChromaPredV_neon; + pCtx->pGetIChromaPredFunc[C_PRED_P ] = WelsDecoderIChromaPredPlane_neon; + pCtx->pGetIChromaPredFunc[C_PRED_DC] = WelsDecoderIChromaPredDc_neon; + } +#endif//HAVE_NEON + +#if defined(HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pCtx->pIdctResAddPredFunc = IdctResAddPred_AArch64_neon; + pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_; + + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC] = WelsDecoderI16x16LumaPredDc_AArch64_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_P] = WelsDecoderI16x16LumaPredPlane_AArch64_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_H] = WelsDecoderI16x16LumaPredH_AArch64_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_V] = WelsDecoderI16x16LumaPredV_AArch64_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_L] = WelsDecoderI16x16LumaPredDcLeft_AArch64_neon; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_T] = WelsDecoderI16x16LumaPredDcTop_AArch64_neon; + + pCtx->pGetI4x4LumaPredFunc[I4_PRED_H ] = WelsDecoderI4x4LumaPredH_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDL ] = WelsDecoderI4x4LumaPredDDL_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDL_TOP] = WelsDecoderI4x4LumaPredDDLTop_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VL ] = WelsDecoderI4x4LumaPredVL_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VL_TOP ] = WelsDecoderI4x4LumaPredVLTop_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VR ] = WelsDecoderI4x4LumaPredVR_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HU ] = WelsDecoderI4x4LumaPredHU_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HD ] = WelsDecoderI4x4LumaPredHD_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DC ] = WelsDecoderI4x4LumaPredDc_AArch64_neon; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DC_T ] = WelsDecoderI4x4LumaPredDcTop_AArch64_neon; + + pCtx->pGetIChromaPredFunc[C_PRED_H] = WelsDecoderIChromaPredH_AArch64_neon; + pCtx->pGetIChromaPredFunc[C_PRED_V] = WelsDecoderIChromaPredV_AArch64_neon; + pCtx->pGetIChromaPredFunc[C_PRED_P ] = WelsDecoderIChromaPredPlane_AArch64_neon; + pCtx->pGetIChromaPredFunc[C_PRED_DC] = WelsDecoderIChromaPredDc_AArch64_neon; + pCtx->pGetIChromaPredFunc[C_PRED_DC_T] = WelsDecoderIChromaPredDcTop_AArch64_neon; + } +#endif//HAVE_NEON_AARCH64 + +#if defined(X86_ASM) + if (uiCpuFlag & WELS_CPU_MMXEXT) { + pCtx->pIdctResAddPredFunc = IdctResAddPred_mmx; + pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_; + + ///////mmx code opt--- + pCtx->pGetIChromaPredFunc[C_PRED_H] = WelsDecoderIChromaPredH_mmx; + pCtx->pGetIChromaPredFunc[C_PRED_V] = WelsDecoderIChromaPredV_mmx; + pCtx->pGetIChromaPredFunc[C_PRED_DC_L ] = WelsDecoderIChromaPredDcLeft_mmx; + pCtx->pGetIChromaPredFunc[C_PRED_DC_128] = WelsDecoderIChromaPredDcNA_mmx; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDR] = WelsDecoderI4x4LumaPredDDR_mmx; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HD ] = WelsDecoderI4x4LumaPredHD_mmx; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_HU ] = WelsDecoderI4x4LumaPredHU_mmx; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VR ] = WelsDecoderI4x4LumaPredVR_mmx; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_DDL] = WelsDecoderI4x4LumaPredDDL_mmx; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_VL ] = WelsDecoderI4x4LumaPredVL_mmx; + } + if (uiCpuFlag & WELS_CPU_SSE2) { + pCtx->pIdctResAddPredFunc = IdctResAddPred_sse2; + pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_; + + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC] = WelsDecoderI16x16LumaPredDc_sse2; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_P] = WelsDecoderI16x16LumaPredPlane_sse2; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_H] = WelsDecoderI16x16LumaPredH_sse2; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_V] = WelsDecoderI16x16LumaPredV_sse2; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_T ] = WelsDecoderI16x16LumaPredDcTop_sse2; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_128] = WelsDecoderI16x16LumaPredDcNA_sse2; + pCtx->pGetIChromaPredFunc[C_PRED_P ] = WelsDecoderIChromaPredPlane_sse2; + pCtx->pGetIChromaPredFunc[C_PRED_DC] = WelsDecoderIChromaPredDc_sse2; + pCtx->pGetIChromaPredFunc[C_PRED_DC_T] = WelsDecoderIChromaPredDcTop_sse2; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_H] = WelsDecoderI4x4LumaPredH_sse2; + } +#if defined(HAVE_AVX2) + if (uiCpuFlag & WELS_CPU_AVX2) { + pCtx->pIdctResAddPredFunc = IdctResAddPred_avx2; + pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_avx2; + } +#endif + +#endif + +#if defined(HAVE_MMI) + if (uiCpuFlag & WELS_CPU_MMI) { + pCtx->pIdctResAddPredFunc = IdctResAddPred_mmi; + pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_; + + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC] = WelsDecoderI16x16LumaPredDc_mmi; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_P] = WelsDecoderI16x16LumaPredPlane_mmi; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_H] = WelsDecoderI16x16LumaPredH_mmi; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_V] = WelsDecoderI16x16LumaPredV_mmi; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_T ] = WelsDecoderI16x16LumaPredDcTop_mmi; + pCtx->pGetI16x16LumaPredFunc[I16_PRED_DC_128] = WelsDecoderI16x16LumaPredDcNA_mmi; + pCtx->pGetIChromaPredFunc[C_PRED_P ] = WelsDecoderIChromaPredPlane_mmi; + pCtx->pGetIChromaPredFunc[C_PRED_DC] = WelsDecoderIChromaPredDc_mmi; + pCtx->pGetIChromaPredFunc[C_PRED_DC_T] = WelsDecoderIChromaPredDcTop_mmi; + pCtx->pGetI4x4LumaPredFunc[I4_PRED_H] = WelsDecoderI4x4LumaPredH_mmi; + } +#endif//HAVE_MMI +} + +//reset decoder number related statistics info +void ResetDecStatNums (SDecoderStatistics* pDecStat) { + uint32_t uiWidth = pDecStat->uiWidth; + uint32_t uiHeight = pDecStat->uiHeight; + int32_t iAvgLumaQp = pDecStat->iAvgLumaQp; + uint32_t iLogInterval = pDecStat->iStatisticsLogInterval; + uint32_t uiProfile = pDecStat->uiProfile; + uint32_t uiLevel = pDecStat->uiLevel; + memset (pDecStat, 0, sizeof (SDecoderStatistics)); + pDecStat->uiWidth = uiWidth; + pDecStat->uiHeight = uiHeight; + pDecStat->iAvgLumaQp = iAvgLumaQp; + pDecStat->iStatisticsLogInterval = iLogInterval; + pDecStat->uiProfile = uiProfile; + pDecStat->uiLevel = uiLevel; +} + +//update information when freezing occurs, including IDR/non-IDR number +void UpdateDecStatFreezingInfo (const bool kbIdrFlag, SDecoderStatistics* pDecStat) { + if (kbIdrFlag) + pDecStat->uiFreezingIDRNum++; + else + pDecStat->uiFreezingNonIDRNum++; +} + +//update information when no freezing occurs, including QP, correct IDR number, ECed IDR number +void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx) { + PDqLayer pCurDq = pCtx->pCurDqLayer; + PPicture pPic = pCtx->pDec; + SDecoderStatistics* pDecStat = pCtx->pDecoderStatistics; + + if (pDecStat->iAvgLumaQp == -1) //first correct frame received + pDecStat->iAvgLumaQp = 0; + + //update QP info + int32_t iTotalQp = 0; + const int32_t kiMbNum = pCurDq->iMbWidth * pCurDq->iMbHeight; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { //all correct + for (int32_t iMb = 0; iMb < kiMbNum; ++iMb) { + iTotalQp += pCurDq->pLumaQp[iMb]; + } + iTotalQp /= kiMbNum; + } else { + int32_t iCorrectMbNum = 0; + for (int32_t iMb = 0; iMb < kiMbNum; ++iMb) { + iCorrectMbNum += (int32_t) pCurDq->pMbCorrectlyDecodedFlag[iMb]; + iTotalQp += pCurDq->pLumaQp[iMb] * pCurDq->pMbCorrectlyDecodedFlag[iMb]; + } + if (iCorrectMbNum == 0) //non MB is correct, should remain QP statistic info + iTotalQp = pDecStat->iAvgLumaQp; + else + iTotalQp /= iCorrectMbNum; + } + if (pDecStat->uiDecodedFrameCount + 1 == 0) { //maximum uint32_t reached + ResetDecStatNums (pDecStat); + pDecStat->iAvgLumaQp = iTotalQp; + } else + pDecStat->iAvgLumaQp = (int) ((uint64_t) (pDecStat->iAvgLumaQp * pDecStat->uiDecodedFrameCount + iTotalQp) / + (pDecStat->uiDecodedFrameCount + 1)); + + //update IDR number + if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag) { + pDecStat->uiIDRCorrectNum += (pPic->bIsComplete); + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) + pDecStat->uiEcIDRNum += (!pPic->bIsComplete); + } +} + +//update decoder statistics information +void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput) { + if (pCtx->bFreezeOutput) + UpdateDecStatFreezingInfo (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag, pCtx->pDecoderStatistics); + else if (kbOutput) + UpdateDecStatNoFreezingInfo (pCtx); +} + + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp new file mode 100644 index 000000000..eac41bfb2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp @@ -0,0 +1,3000 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * decoder_core.c: Wels decoder framework core implementation + */ + +#include "decoder_core.h" +#include "error_code.h" +#include "memmgr_nal_unit.h" +#include "au_parser.h" +#include "decode_slice.h" +#include "manage_dec_ref.h" +#include "expand_pic.h" +#include "decoder.h" +#include "decode_mb_aux.h" +#include "memory_align.h" +#include "error_concealment.h" + +namespace WelsDec { +static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) { + PDqLayer pCurDq = pCtx->pCurDqLayer; + PPicture pPic = pCtx->pDec; + + const int32_t kiWidth = pCurDq->iMbWidth << 4; + const int32_t kiHeight = pCurDq->iMbHeight << 4; + + const int32_t kiTotalNumMbInCurLayer = pCurDq->iMbWidth * pCurDq->iMbHeight; + bool bFrameCompleteFlag = true; + + if (pPic->bNewSeqBegin) { + memcpy (& (pCtx->sFrameCrop), & (pCurDq->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.pSps->sFrameCrop), + sizeof (SPosOffset)); //confirmed_safe_unsafe_usage +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = false; +#else + pCtx->bReferenceLostAtT0Flag = false; // need initialize it due new seq, 6/4/2010 +#endif //LONG_TERM_REF + if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) { + pCtx->bPrintFrameErrorTraceFlag = true; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "DecodeFrameConstruction(): will output first frame of new sequence, %d x %d, crop_left:%d, crop_right:%d, crop_top:%d, crop_bottom:%d, ignored error packet:%d.", + kiWidth, kiHeight, pCtx->sFrameCrop.iLeftOffset, pCtx->sFrameCrop.iRightOffset, pCtx->sFrameCrop.iTopOffset, + pCtx->sFrameCrop.iBottomOffset, pCtx->iIgnoredErrorInfoPacketCount); + pCtx->iIgnoredErrorInfoPacketCount = 0; + } + } + + const int32_t kiActualWidth = kiWidth - (pCtx->sFrameCrop.iLeftOffset + pCtx->sFrameCrop.iRightOffset) * 2; + const int32_t kiActualHeight = kiHeight - (pCtx->sFrameCrop.iTopOffset + pCtx->sFrameCrop.iBottomOffset) * 2; + + + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + if ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth) + || (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight)) { + pCtx->pDecoderStatistics->uiResolutionChangeTimes++; + pCtx->pDecoderStatistics->uiWidth = kiActualWidth; + pCtx->pDecoderStatistics->uiHeight = kiActualHeight; + } + UpdateDecStatNoFreezingInfo (pCtx); + } + + if (pCtx->pParam->bParseOnly) { //should exit for parse only to prevent access NULL pDstInfo + PAccessUnit pCurAu = pCtx->pAccessUnitList; + if (dsErrorFree == pCtx->iErrorCode) { //correct decoding, add to data buffer + SParserBsInfo* pParser = pCtx->pParserBsInfo; + SNalUnit* pCurNal = NULL; + int32_t iTotalNalLen = 0; + int32_t iNalLen = 0; + int32_t iNum = 0; + while (iNum < pParser->iNalNum) { + iTotalNalLen += pParser->pNalLenInByte[iNum++]; + } + uint8_t* pDstBuf = pParser->pDstBuff + iTotalNalLen; + int32_t iIdx = pCurAu->uiStartPos; + int32_t iEndIdx = pCurAu->uiEndPos; + uint8_t* pNalBs = NULL; + pParser->uiOutBsTimeStamp = (pCurAu->pNalUnitsList [iIdx]) ? pCurAu->pNalUnitsList [iIdx]->uiTimeStamp : 0; + //pParser->iNalNum = 0; + pParser->iSpsWidthInPixel = (pCtx->pSps->iMbWidth << 4) - ((pCtx->pSps->sFrameCrop.iLeftOffset + + pCtx->pSps->sFrameCrop.iRightOffset) << 1); + pParser->iSpsHeightInPixel = (pCtx->pSps->iMbHeight << 4) - ((pCtx->pSps->sFrameCrop.iTopOffset + + pCtx->pSps->sFrameCrop.iBottomOffset) << 1); + + if (pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.bIdrFlag) { //IDR + if (pCtx->bFrameFinish) { //add required sps/pps + if (pParser->iNalNum > pCtx->iMaxNalNum - 2) { //2 reserved for sps+pps + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "DecodeFrameConstruction(): current NAL num (%d) plus sps & pps exceeds permitted num (%d). Will expand", + pParser->iNalNum, pCtx->iMaxNalNum); + WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + 2)) + } + bool bSubSps = (NAL_UNIT_CODED_SLICE_EXT == pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.sNalUnitHeader.eNalUnitType); + SSpsBsInfo* pSpsBs = NULL; + SPpsBsInfo* pPpsBs = NULL; + int32_t iSpsId = pCtx->pSps->iSpsId; + int32_t iPpsId = pCtx->pPps->iPpsId; + pCtx->bParamSetsLostFlag = false; + //find required sps, pps and write into dst buff + pSpsBs = bSubSps ? &pCtx->sSubsetSpsBsInfo [iSpsId] : &pCtx->sSpsBsInfo [iSpsId]; + pPpsBs = &pCtx->sPpsBsInfo [iPpsId]; + if (pDstBuf - pParser->pDstBuff + pSpsBs->uiSpsBsLen + pPpsBs->uiPpsBsLen >= MAX_ACCESS_UNIT_CAPACITY) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "DecodeFrameConstruction(): sps pps size: (%d %d) too large. Failed to parse. \n", pSpsBs->uiSpsBsLen, + pPpsBs->uiPpsBsLen); + pCtx->iErrorCode |= dsOutOfMemory; + pCtx->pParserBsInfo->iNalNum = 0; + return ERR_INFO_OUT_OF_MEMORY; + } + memcpy (pDstBuf, pSpsBs->pSpsBsBuf, pSpsBs->uiSpsBsLen); + pParser->pNalLenInByte [pParser->iNalNum ++] = pSpsBs->uiSpsBsLen; + pDstBuf += pSpsBs->uiSpsBsLen; + memcpy (pDstBuf, pPpsBs->pPpsBsBuf, pPpsBs->uiPpsBsLen); + pParser->pNalLenInByte [pParser->iNalNum ++] = pPpsBs->uiPpsBsLen; + pDstBuf += pPpsBs->uiPpsBsLen; + pCtx->bFrameFinish = false; + } + } + //then VCL data re-write + if (pParser->iNalNum + iEndIdx - iIdx + 1 > pCtx->iMaxNalNum) { //calculate total NAL num + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "DecodeFrameConstruction(): current NAL num (%d) exceeds permitted num (%d). Will expand", + pParser->iNalNum + iEndIdx - iIdx + 1, pCtx->iMaxNalNum); + WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + iEndIdx - iIdx + 1)) + } + while (iIdx <= iEndIdx) { + pCurNal = pCurAu->pNalUnitsList [iIdx ++]; + iNalLen = pCurNal->sNalData.sVclNal.iNalLength; + pNalBs = pCurNal->sNalData.sVclNal.pNalPos; + pParser->pNalLenInByte [pParser->iNalNum ++] = iNalLen; + if (pDstBuf - pParser->pDstBuff + iNalLen >= MAX_ACCESS_UNIT_CAPACITY) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "DecodeFrameConstruction(): composed output size (%ld) exceeds (%d). Failed to parse. current data pos %d out of %d:, previously accumulated num: %d, total num: %d, previously accumulated len: %d, current len: %d, current buf pos: %p, header buf pos: %p \n", + (long) (pDstBuf - pParser->pDstBuff + iNalLen), MAX_ACCESS_UNIT_CAPACITY, iIdx, iEndIdx, iNum, pParser->iNalNum, + iTotalNalLen, iNalLen, pDstBuf, pParser->pDstBuff); + pCtx->iErrorCode |= dsOutOfMemory; + pCtx->pParserBsInfo->iNalNum = 0; + return ERR_INFO_OUT_OF_MEMORY; + } + + memcpy (pDstBuf, pNalBs, iNalLen); + pDstBuf += iNalLen; + } + if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) { //frame complete + pCtx->iTotalNumMbRec = 0; + pCtx->bFramePending = false; + pCtx->bFrameFinish = true; //finish current frame and mark it + } else if (pCtx->iTotalNumMbRec != 0) { //frame incomplete + pCtx->bFramePending = true; + pCtx->pDec->bIsComplete = false; + pCtx->bFrameFinish = false; //current frame not finished + pCtx->iErrorCode |= dsFramePending; + return ERR_INFO_PARSEONLY_PENDING; + //pCtx->pParserBsInfo->iNalNum = 0; + } + } else { //error + pCtx->pParserBsInfo->uiOutBsTimeStamp = 0; + pCtx->pParserBsInfo->iNalNum = 0; + pCtx->pParserBsInfo->iSpsWidthInPixel = 0; + pCtx->pParserBsInfo->iSpsHeightInPixel = 0; + return ERR_INFO_PARSEONLY_ERROR; + } + return ERR_NONE; + } + + if (pCtx->iTotalNumMbRec != kiTotalNumMbInCurLayer) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ", + pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight); + bFrameCompleteFlag = false; //return later after output buffer is done + if (pCtx->bInstantDecFlag) { //no-delay decoding, wait for new slice + return ERR_INFO_MB_NUM_INADEQUATE; + } + } else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag + && (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done + pCtx->pDec->bIsComplete = true; + pCtx->bFreezeOutput = false; + } + + pCtx->iTotalNumMbRec = 0; + + //////output:::normal path + pDstInfo->uiOutYuvTimeStamp = pPic->uiTimeStamp; + ppDst[0] = pPic->pData[0]; + ppDst[1] = pPic->pData[1]; + ppDst[2] = pPic->pData[2]; + + pDstInfo->UsrData.sSystemBuffer.iFormat = videoFormatI420; + + pDstInfo->UsrData.sSystemBuffer.iWidth = kiActualWidth; + pDstInfo->UsrData.sSystemBuffer.iHeight = kiActualHeight; + pDstInfo->UsrData.sSystemBuffer.iStride[0] = pPic->iLinesize[0]; + pDstInfo->UsrData.sSystemBuffer.iStride[1] = pPic->iLinesize[1]; + ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2; + ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset; + ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset; + for (int i = 0; i < 3; ++i) { + pDstInfo->pDst[i] = ppDst[i]; + } + pDstInfo->iBufferStatus = 1; + if (GetThreadCount (pCtx) > 1 && pPic->bIsComplete == false) { + pPic->bIsComplete = true; + } + if (GetThreadCount (pCtx) > 1) { + uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4; + for (uint32_t i = 0; i < uiMbHeight; ++i) { + SET_EVENT (&pCtx->pDec->pReadyEvent[i]); + } + } + bool bOutResChange = false; + if (GetThreadCount (pCtx) <= 1 || pCtx->pLastThreadCtx == NULL) { + bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth) + || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight); + } else { + if (pCtx->pLastThreadCtx != NULL) { + PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx); + bOutResChange = (pLastThreadCtx->pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth) + || (pLastThreadCtx->pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight); + } + } + pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth; + pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete + pDstInfo->iBufferStatus = (int32_t) (bFrameCompleteFlag + && pPic->bIsComplete); // When EC disable, ECed picture not output + else if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE + || pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE) + && pCtx->iErrorCode && bOutResChange) + pCtx->bFreezeOutput = true; + + if (pDstInfo->iBufferStatus == 0) { + if (!bFrameCompleteFlag) + pCtx->iErrorCode |= dsBitstreamError; + return ERR_INFO_MB_NUM_INADEQUATE; + } + if (pCtx->bFreezeOutput) { + pDstInfo->iBufferStatus = 0; + if (pPic->bNewSeqBegin) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "DecodeFrameConstruction():New sequence detected, but freezed, correct MBs (%d) out of whole MBs (%d).", + kiTotalNumMbInCurLayer - pCtx->iMbEcedNum, kiTotalNumMbInCurLayer); + } + } + pCtx->iMbEcedNum = pPic->iMbEcedNum; + pCtx->iMbNum = pPic->iMbNum; + pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + if (pDstInfo->iBufferStatus && ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth) + || (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight))) { + pCtx->pDecoderStatistics->uiResolutionChangeTimes++; + pCtx->pDecoderStatistics->uiWidth = kiActualWidth; + pCtx->pDecoderStatistics->uiHeight = kiActualHeight; + } + UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0); + } + return ERR_NONE; +} + +inline bool CheckSliceNeedReconstruct (uint8_t uiLayerDqId, uint8_t uiTargetDqId) { + return (uiLayerDqId == uiTargetDqId); // target layer +} + +inline uint8_t GetTargetDqId (uint8_t uiTargetDqId, SDecodingParam* psParam) { + uint8_t uiRequiredDqId = psParam ? psParam->uiTargetDqLayer : (uint8_t)255; + + return WELS_MIN (uiTargetDqId, uiRequiredDqId); +} + + +inline void HandleReferenceLostL0 (PWelsDecoderContext pCtx, PNalUnit pCurNal) { + if (0 == pCurNal->sNalHeaderExt.uiTemporalId) { + pCtx->bReferenceLostAtT0Flag = true; + } + pCtx->iErrorCode |= dsBitstreamError; +} + +inline void HandleReferenceLost (PWelsDecoderContext pCtx, PNalUnit pCurNal) { + if ((0 == pCurNal->sNalHeaderExt.uiTemporalId) || (1 == pCurNal->sNalHeaderExt.uiTemporalId)) { + pCtx->bReferenceLostAtT0Flag = true; + } + pCtx->iErrorCode |= dsRefLost; +} + +inline int32_t WelsDecodeConstructSlice (PWelsDecoderContext pCtx, PNalUnit pCurNal) { + int32_t iRet = WelsTargetSliceConstruction (pCtx); + + if (iRet) { + HandleReferenceLostL0 (pCtx, pCurNal); + } + + return iRet; +} + +int32_t ParsePredWeightedTable (PBitStringAux pBs, PSliceHeader pSh) { + uint32_t uiCode; + int32_t iList = 0; + int32_t iCode; + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); + WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "luma_log2_weight_denom", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM)); + pSh->sPredWeightTable.uiLumaLog2WeightDenom = uiCode; + if (pSh->pSps->uiChromaArrayType != 0) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); + WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "chroma_log2_weight_denom", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM)); + pSh->sPredWeightTable.uiChromaLog2WeightDenom = uiCode; + } + + if ((pSh->sPredWeightTable.uiLumaLog2WeightDenom | pSh->sPredWeightTable.uiChromaLog2WeightDenom) > 7) + return ERR_NONE; + + do { + + for (int i = 0; i < pSh->uiRefCount[iList]; i++) { + //luma + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); + if (!!uiCode) { + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); + WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_weight", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_WEIGHT)); + pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = iCode; + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); + WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_offset", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_OFFSET)); + pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = iCode; + } else { + pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = 1 << (pSh->sPredWeightTable.uiLumaLog2WeightDenom); + pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = 0; + + } + //chroma + if (pSh->pSps->uiChromaArrayType == 0) + continue; + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); + if (!!uiCode) { + for (int j = 0; j < 2; j++) { + + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); + WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_weight", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_WEIGHT)); + pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = iCode; + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); + WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_offset", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_OFFSET)); + pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = iCode; + } + } else { + for (int j = 0; j < 2; j++) { + + + pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = 1 << (pSh->sPredWeightTable.uiChromaLog2WeightDenom); + pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = 0; + } + } + + } + ++iList; + if (pSh->eSliceType != B_SLICE) { + break; + } + } while (iList < LIST_A);//TODO: SUPPORT LIST_A + return ERR_NONE; +} + +void CreateImplicitWeightTable (PWelsDecoderContext pCtx) { + + PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + if (pCurDqLayer->bUseWeightedBiPredIdc && pSliceHeader->pPps->uiWeightedBipredIdc == 2) { + int32_t iPoc = pSliceHeader->iPicOrderCntLsb; + + //fix Bugzilla 1485229 check if pointers are NULL + if (pCtx->sRefPic.pRefList[LIST_0][0] && pCtx->sRefPic.pRefList[LIST_1][0]) { + if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1 + && int64_t(pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc) + int64_t(pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc) == 2 * int64_t(iPoc)) { + pCurDqLayer->bUseWeightedBiPredIdc = false; + return; + } + } + + pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom = 5; + pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom = 5; + for (int32_t iRef0 = 0; iRef0 < pSliceHeader->uiRefCount[0]; iRef0++) { + if (pCtx->sRefPic.pRefList[LIST_0][iRef0]) { + const int32_t iPoc0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->iFramePoc; + bool bIsLongRef0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->bIsLongRef; + for (int32_t iRef1 = 0; iRef1 < pSliceHeader->uiRefCount[1]; iRef1++) { + if (pCtx->sRefPic.pRefList[LIST_1][iRef1]) { + const int32_t iPoc1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->iFramePoc; + bool bIsLongRef1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->bIsLongRef; + pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 32; + if (!bIsLongRef0 && !bIsLongRef1) { + const int32_t iTd = WELS_CLIP3 (iPoc1 - iPoc0, -128, 127); + if (iTd) { + int32_t iTb = WELS_CLIP3 (iPoc - iPoc0, -128, 127); + int32_t iTx = (16384 + (WELS_ABS (iTd) >> 1)) / iTd; + int32_t iDistScaleFactor = (iTb * iTx + 32) >> 8; + if (iDistScaleFactor >= -64 && iDistScaleFactor <= 128) { + pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 64 - iDistScaleFactor; + } + } + } + } + } + } + } + } + return; +} + +/* + * Predeclared function routines .. + */ +int32_t ParseRefPicListReordering (PBitStringAux pBs, PSliceHeader pSh) { + int32_t iList = 0; + const EWelsSliceType keSt = pSh->eSliceType; + PRefPicListReorderSyn pRefPicListReordering = &pSh->pRefPicListReordering; + PSps pSps = pSh->pSps; + uint32_t uiCode; + if (keSt == I_SLICE || keSt == SI_SLICE) + return ERR_NONE; + + // Common syntaxs for P or B slices: list0, list1 followed if B slices used. + do { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //ref_pic_list_modification_flag_l0 + pRefPicListReordering->bRefPicListReorderingFlag[iList] = !!uiCode; + + if (pRefPicListReordering->bRefPicListReorderingFlag[iList]) { + int32_t iIdx = 0; + do { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //modification_of_pic_nums_idc + const uint32_t kuiIdc = uiCode; + + //Fixed the referrence list reordering crash issue.(fault kIdc value > 3 case)--- + if ((iIdx >= MAX_REF_PIC_COUNT) || (kuiIdc > 3)) { + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING); + } + pRefPicListReordering->sReorderingSyn[iList][iIdx].uiReorderingOfPicNumsIdc = kuiIdc; + if (kuiIdc == 3) + break; + + if (iIdx >= pSh->uiRefCount[iList] || iIdx >= MAX_REF_PIC_COUNT) + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING); + + if (kuiIdc == 0 || kuiIdc == 1) { + // abs_diff_pic_num_minus1 should be in range 0 to MaxPicNum-1, MaxPicNum is derived as + // 2^(4+log2_max_frame_num_minus4) + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //abs_diff_pic_num_minus1 + WELS_CHECK_SE_UPPER_ERROR_NOLOG (uiCode, (uint32_t) (1 << pSps->uiLog2MaxFrameNum), "abs_diff_pic_num_minus1", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING)); + pRefPicListReordering->sReorderingSyn[iList][iIdx].uiAbsDiffPicNumMinus1 = uiCode; // uiAbsDiffPicNumMinus1 + } else if (kuiIdc == 2) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num + pRefPicListReordering->sReorderingSyn[iList][iIdx].uiLongTermPicNum = uiCode; + } + + ++ iIdx; + } while (true); + } + if (keSt != B_SLICE) + break; + ++ iList; + } while (iList < LIST_A); + + return ERR_NONE; +} + +int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSliceHeader pSh, PSps pSps, + const bool kbIdrFlag) { + PRefPicMarking const kpRefMarking = &pSh->sRefMarking; + uint32_t uiCode; + if (kbIdrFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //no_output_of_prior_pics_flag + kpRefMarking->bNoOutputOfPriorPicsFlag = !!uiCode; + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //long_term_reference_flag + kpRefMarking->bLongTermRefFlag = !!uiCode; + } else { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_ref_pic_marking_mode_flag + kpRefMarking->bAdaptiveRefPicMarkingModeFlag = !!uiCode; + if (kpRefMarking->bAdaptiveRefPicMarkingModeFlag) { + int32_t iIdx = 0; + bool bAllowMmco5 = true, bMmco4Exist = false, bMmco5Exist = false, bMmco6Exist = false; + do { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //memory_management_control_operation + const uint32_t kuiMmco = uiCode; + + kpRefMarking->sMmcoRef[iIdx].uiMmcoType = kuiMmco; + if (kuiMmco == MMCO_END) + break; + + if (kuiMmco == MMCO_SHORT2UNUSED || kuiMmco == MMCO_SHORT2LONG) { + bAllowMmco5 = false; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //difference_of_pic_nums_minus1 + kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum = 1 + uiCode; + kpRefMarking->sMmcoRef[iIdx].iShortFrameNum = (pSh->iFrameNum - kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum) & (( + 1 << pSps->uiLog2MaxFrameNum) - 1); + } else if (kuiMmco == MMCO_LONG2UNUSED) { + bAllowMmco5 = false; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num + kpRefMarking->sMmcoRef[iIdx].uiLongTermPicNum = uiCode; + } + if (kuiMmco == MMCO_SHORT2LONG || kuiMmco == MMCO_LONG) { + if (kuiMmco == MMCO_LONG) { + WELS_VERIFY_RETURN_IF (-1, bMmco6Exist); + bMmco6Exist = true; + } + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_frame_idx + kpRefMarking->sMmcoRef[iIdx].iLongTermFrameIdx = uiCode; + } else if (kuiMmco == MMCO_SET_MAX_LONG) { + WELS_VERIFY_RETURN_IF (-1, bMmco4Exist); + bMmco4Exist = true; + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //max_long_term_frame_idx_plus1 + int32_t iMaxLongTermFrameIdx = -1 + uiCode; + if (iMaxLongTermFrameIdx > int32_t (pSps->uiLog2MaxFrameNum)) { + //ISO/IEC 14496-10:2009(E) 7.4.3.3 Decoded reference picture marking semantics page 96 + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_MARKING); + } + kpRefMarking->sMmcoRef[iIdx].iMaxLongTermFrameIdx = iMaxLongTermFrameIdx; + } else if (kuiMmco == MMCO_RESET) { + WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist)); + bMmco5Exist = true; + + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0; + pSh->iPicOrderCntLsb = 0; + if (pCtx->pSliceHeader) + pCtx->pSliceHeader->iPicOrderCntLsb = 0; + } + ++ iIdx; + + } while (iIdx < MAX_MMCO_COUNT); + } + } + + return ERR_NONE; +} + +bool FillDefaultSliceHeaderExt (PSliceHeaderExt pShExt, PNalUnitHeaderExt pNalExt) { + if (pShExt == NULL || pNalExt == NULL) + return false; + + if (pNalExt->iNoInterLayerPredFlag || pNalExt->uiQualityId > 0) + pShExt->bBasePredWeightTableFlag = false; + else + pShExt->bBasePredWeightTableFlag = true; + pShExt->uiRefLayerDqId = (uint8_t) - 1; + pShExt->uiDisableInterLayerDeblockingFilterIdc = 0; + pShExt->iInterLayerSliceAlphaC0Offset = 0; + pShExt->iInterLayerSliceBetaOffset = 0; + pShExt->bConstrainedIntraResamplingFlag = false; + pShExt->uiRefLayerChromaPhaseXPlus1Flag = 0; + pShExt->uiRefLayerChromaPhaseYPlus1 = 1; + //memset(&pShExt->sScaledRefLayer, 0, sizeof(SPosOffset)); + + pShExt->iScaledRefLayerPicWidthInSampleLuma = pShExt->sSliceHeader.iMbWidth << 4; + pShExt->iScaledRefLayerPicHeightInSampleLuma = pShExt->sSliceHeader.iMbHeight << 4; + + pShExt->bSliceSkipFlag = false; + pShExt->bAdaptiveBaseModeFlag = false; + pShExt->bDefaultBaseModeFlag = false; + pShExt->bAdaptiveMotionPredFlag = false; + pShExt->bDefaultMotionPredFlag = false; + pShExt->bAdaptiveResidualPredFlag = false; + pShExt->bDefaultResidualPredFlag = false; + pShExt->bTCoeffLevelPredFlag = false; + pShExt->uiScanIdxStart = 0; + pShExt->uiScanIdxEnd = 15; + + return true; +} + +int32_t InitBsBuffer (PWelsDecoderContext pCtx) { + if (pCtx == NULL) + return ERR_INFO_INVALID_PTR; + + CMemoryAlign* pMa = pCtx->pMemAlign; + + pCtx->iMaxBsBufferSizeInByte = MIN_ACCESS_UNIT_CAPACITY * MAX_BUFFERED_NUM; + if ((pCtx->sRawData.pHead = static_cast (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte, + "pCtx->sRawData.pHead"))) == NULL) { + return ERR_INFO_OUT_OF_MEMORY; + } + pCtx->sRawData.pStartPos = pCtx->sRawData.pCurPos = pCtx->sRawData.pHead; + pCtx->sRawData.pEnd = pCtx->sRawData.pHead + pCtx->iMaxBsBufferSizeInByte; + if (pCtx->pParam->bParseOnly) { + pCtx->pParserBsInfo = static_cast (pMa->WelsMallocz (sizeof (SParserBsInfo), "pCtx->pParserBsInfo")); + if (pCtx->pParserBsInfo == NULL) { + return ERR_INFO_OUT_OF_MEMORY; + } + memset (pCtx->pParserBsInfo, 0, sizeof (SParserBsInfo)); + pCtx->pParserBsInfo->pDstBuff = static_cast (pMa->WelsMallocz (MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t), + "pCtx->pParserBsInfo->pDstBuff")); + if (pCtx->pParserBsInfo->pDstBuff == NULL) { + return ERR_INFO_OUT_OF_MEMORY; + } + memset (pCtx->pParserBsInfo->pDstBuff, 0, MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t)); + + if ((pCtx->sSavedData.pHead = static_cast (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte, + "pCtx->sSavedData.pHead"))) == NULL) { + return ERR_INFO_OUT_OF_MEMORY; + } + pCtx->sSavedData.pStartPos = pCtx->sSavedData.pCurPos = pCtx->sSavedData.pHead; + pCtx->sSavedData.pEnd = pCtx->sSavedData.pHead + pCtx->iMaxBsBufferSizeInByte; + + pCtx->iMaxNalNum = MAX_NAL_UNITS_IN_LAYER + 2; //2 reserved for SPS+PPS + pCtx->pParserBsInfo->pNalLenInByte = static_cast (pMa->WelsMallocz (pCtx->iMaxNalNum * sizeof (int), + "pCtx->pParserBsInfo->pNalLenInByte")); + if (pCtx->pParserBsInfo->pNalLenInByte == NULL) { + return ERR_INFO_OUT_OF_MEMORY; + } + } + return ERR_NONE; +} + +int32_t ExpandBsBuffer (PWelsDecoderContext pCtx, const int kiSrcLen) { + if (pCtx == NULL) + return ERR_INFO_INVALID_PTR; + int32_t iExpandStepShift = 1; + int32_t iNewBuffLen = WELS_MAX ((kiSrcLen * MAX_BUFFERED_NUM), (pCtx->iMaxBsBufferSizeInByte << iExpandStepShift)); + //allocate new bs buffer + CMemoryAlign* pMa = pCtx->pMemAlign; + + //Realloc sRawData + uint8_t* pNewBsBuff = static_cast (pMa->WelsMallocz (iNewBuffLen, "pCtx->sRawData.pHead")); + if (pNewBsBuff == NULL) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewBsBuff (%d)", iNewBuffLen); + pCtx->iErrorCode |= dsOutOfMemory; + return ERR_INFO_OUT_OF_MEMORY; + } + + //Calculate and set the bs start and end position + for (uint32_t i = 0; i <= pCtx->pAccessUnitList->uiActualUnitsNum; i++) { + PBitStringAux pSliceBitsRead = &pCtx->pAccessUnitList->pNalUnitsList[i]->sNalData.sVclNal.sSliceBitsRead; + pSliceBitsRead->pStartBuf = pSliceBitsRead->pStartBuf - pCtx->sRawData.pHead + pNewBsBuff; + pSliceBitsRead->pEndBuf = pSliceBitsRead->pEndBuf - pCtx->sRawData.pHead + pNewBsBuff; + pSliceBitsRead->pCurBuf = pSliceBitsRead->pCurBuf - pCtx->sRawData.pHead + pNewBsBuff; + } + + //Copy current buffer status to new buffer + memcpy (pNewBsBuff, pCtx->sRawData.pHead, pCtx->iMaxBsBufferSizeInByte); + pCtx->sRawData.pStartPos = pNewBsBuff + (pCtx->sRawData.pStartPos - pCtx->sRawData.pHead); + pCtx->sRawData.pCurPos = pNewBsBuff + (pCtx->sRawData.pCurPos - pCtx->sRawData.pHead); + pCtx->sRawData.pEnd = pNewBsBuff + iNewBuffLen; + pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData.pHead"); + pCtx->sRawData.pHead = pNewBsBuff; + + if (pCtx->pParam->bParseOnly) { + //Realloc sSavedData + uint8_t* pNewSavedBsBuff = static_cast (pMa->WelsMallocz (iNewBuffLen, "pCtx->sSavedData.pHead")); + if (pNewSavedBsBuff == NULL) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewSavedBsBuff (%d)", iNewBuffLen); + pCtx->iErrorCode |= dsOutOfMemory; + return ERR_INFO_OUT_OF_MEMORY; + } + + //Copy current buffer status to new buffer + memcpy (pNewSavedBsBuff, pCtx->sSavedData.pHead, pCtx->iMaxBsBufferSizeInByte); + pCtx->sSavedData.pStartPos = pNewSavedBsBuff + (pCtx->sSavedData.pStartPos - pCtx->sSavedData.pHead); + pCtx->sSavedData.pCurPos = pNewSavedBsBuff + (pCtx->sSavedData.pCurPos - pCtx->sSavedData.pHead); + pCtx->sSavedData.pEnd = pNewSavedBsBuff + iNewBuffLen; + pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData.pHead"); + pCtx->sSavedData.pHead = pNewSavedBsBuff; + } + + pCtx->iMaxBsBufferSizeInByte = iNewBuffLen; + return ERR_NONE; +} + +int32_t ExpandBsLenBuffer (PWelsDecoderContext pCtx, const int kiCurrLen) { + SParserBsInfo* pParser = pCtx->pParserBsInfo; + if (!pParser->pNalLenInByte) + return ERR_INFO_INVALID_ACCESS; + + int iNewLen = kiCurrLen; + if (kiCurrLen >= MAX_MB_SIZE + 2) { //exceeds the max MB number of level 5.2 + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Current nal num (%d) exceededs %d.", kiCurrLen, MAX_MB_SIZE); + pCtx->iErrorCode |= dsOutOfMemory; + return ERR_INFO_OUT_OF_MEMORY; + } else { + iNewLen = kiCurrLen << 1; + iNewLen = WELS_MIN (iNewLen, MAX_MB_SIZE + 2); + } + + CMemoryAlign* pMa = pCtx->pMemAlign; + int* pNewLenBuffer = static_cast (pMa->WelsMallocz (iNewLen * sizeof (int), + "pCtx->pParserBsInfo->pNalLenInByte")); + if (pNewLenBuffer == NULL) { + pCtx->iErrorCode |= dsOutOfMemory; + return ERR_INFO_OUT_OF_MEMORY; + } + + //copy existing data from old length buffer to new + memcpy (pNewLenBuffer, pParser->pNalLenInByte, pCtx->iMaxNalNum * sizeof (int)); + pMa->WelsFree (pParser->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte"); + pParser->pNalLenInByte = pNewLenBuffer; + pCtx->iMaxNalNum = iNewLen; + return ERR_NONE; +} + +int32_t CheckBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen) { + if (kiSrcLen > MAX_ACCESS_UNIT_CAPACITY) { //exceeds max allowed data + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Max AU size exceeded. Allowed size = %d, current size = %d", + MAX_ACCESS_UNIT_CAPACITY, + kiSrcLen); + pCtx->iErrorCode |= dsBitstreamError; + return ERR_INFO_INVALID_ACCESS; + } else if (kiSrcLen > pCtx->iMaxBsBufferSizeInByte / + MAX_BUFFERED_NUM) { //may lead to buffer overwrite, prevent it by expanding buffer + if (ExpandBsBuffer (pCtx, kiSrcLen)) { + return ERR_INFO_OUT_OF_MEMORY; + } + } + + return ERR_NONE; +} + +/* + * WelsInitStaticMemory + * Memory request for new introduced data + * Especially for: + * rbsp_au_buffer, cur_dq_layer_ptr and ref_dq_layer_ptr in MB info cache. + * return: + * 0 - success; otherwise returned error_no defined in error_no.h. +*/ +int32_t WelsInitStaticMemory (PWelsDecoderContext pCtx) { + if (pCtx == NULL) { + return ERR_INFO_INVALID_PTR; + } + + if (MemInitNalList (&pCtx->pAccessUnitList, MAX_NAL_UNIT_NUM_IN_AU, pCtx->pMemAlign) != 0) + return ERR_INFO_OUT_OF_MEMORY; + + if (InitBsBuffer (pCtx) != 0) + return ERR_INFO_OUT_OF_MEMORY; + + pCtx->uiTargetDqId = (uint8_t) - 1; + pCtx->bEndOfStreamFlag = false; + + return ERR_NONE; +} + +/* + * WelsFreeStaticMemory + * Free memory introduced in WelsInitStaticMemory at destruction of decoder. + * + */ +void WelsFreeStaticMemory (PWelsDecoderContext pCtx) { + if (pCtx == NULL) + return; + + CMemoryAlign* pMa = pCtx->pMemAlign; + + MemFreeNalList (&pCtx->pAccessUnitList, pMa); + + if (pCtx->sRawData.pHead) { + pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData->pHead"); + } + pCtx->sRawData.pHead = NULL; + pCtx->sRawData.pEnd = NULL; + pCtx->sRawData.pStartPos = NULL; + pCtx->sRawData.pCurPos = NULL; + if (pCtx->pParam->bParseOnly) { + if (pCtx->sSavedData.pHead) { + pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData->pHead"); + } + pCtx->sSavedData.pHead = NULL; + pCtx->sSavedData.pEnd = NULL; + pCtx->sSavedData.pStartPos = NULL; + pCtx->sSavedData.pCurPos = NULL; + if (pCtx->pParserBsInfo) { + if (pCtx->pParserBsInfo->pNalLenInByte) { + pMa->WelsFree (pCtx->pParserBsInfo->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte"); + pCtx->pParserBsInfo->pNalLenInByte = NULL; + pCtx->iMaxNalNum = 0; + } + if (pCtx->pParserBsInfo->pDstBuff) { + pMa->WelsFree (pCtx->pParserBsInfo->pDstBuff, "pCtx->pParserBsInfo->pDstBuff"); + pCtx->pParserBsInfo->pDstBuff = NULL; + } + pMa->WelsFree (pCtx->pParserBsInfo, "pCtx->pParserBsInfo"); + pCtx->pParserBsInfo = NULL; + } + } + + if (NULL != pCtx->pParam) { + pMa->WelsFree (pCtx->pParam, "pCtx->pParam"); + + pCtx->pParam = NULL; + } +} +/* + * DecodeNalHeaderExt + * Trigger condition: NAL_UNIT_TYPE = NAL_UNIT_PREFIX or NAL_UNIT_CODED_SLICE_EXT + * Parameter: + * pNal: target NALUnit ptr + * pSrc: NAL Unit bitstream + */ +void DecodeNalHeaderExt (PNalUnit pNal, uint8_t* pSrc) { + PNalUnitHeaderExt pHeaderExt = &pNal->sNalHeaderExt; + + uint8_t uiCurByte = *pSrc; + pHeaderExt->bIdrFlag = !! (uiCurByte & 0x40); + pHeaderExt->uiPriorityId = uiCurByte & 0x3F; + + uiCurByte = * (++pSrc); + pHeaderExt->iNoInterLayerPredFlag = uiCurByte >> 7; + pHeaderExt->uiDependencyId = (uiCurByte & 0x70) >> 4; + pHeaderExt->uiQualityId = uiCurByte & 0x0F; + uiCurByte = * (++pSrc); + pHeaderExt->uiTemporalId = uiCurByte >> 5; + pHeaderExt->bUseRefBasePicFlag = !! (uiCurByte & 0x10); + pHeaderExt->bDiscardableFlag = !! (uiCurByte & 0x08); + pHeaderExt->bOutputFlag = !! (uiCurByte & 0x04); + pHeaderExt->uiReservedThree2Bits = uiCurByte & 0x03; + pHeaderExt->uiLayerDqId = (pHeaderExt->uiDependencyId << 4) | pHeaderExt->uiQualityId; +} + + +void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatistics, + PSps pSps, PPps pPps) { + pDecoderStatistics->iCurrentActiveSpsId = pSps->iSpsId; + + pDecoderStatistics->iCurrentActivePpsId = pPps->iPpsId; + pDecoderStatistics->uiProfile = static_cast (pSps->uiProfileIdc); + pDecoderStatistics->uiLevel = pSps->uiLevelIdc; +} + +#define SLICE_HEADER_IDR_PIC_ID_MAX 65535 +#define SLICE_HEADER_REDUNDANT_PIC_CNT_MAX 127 +#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN -12 +#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX 12 +#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN -12 +#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX 12 +#define MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1 15 +#define MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1 15 +#define SLICE_HEADER_CABAC_INIT_IDC_MAX 2 +/* + * decode_slice_header_avc + * Parse slice header of bitstream in avc for storing data structure + */ +int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) { + PNalUnit const kpCurNal = + pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum - + 1]; + + PNalUnitHeaderExt pNalHeaderExt = NULL; + PSliceHeader pSliceHead = NULL; + PSliceHeaderExt pSliceHeadExt = NULL; + PSubsetSps pSubsetSps = NULL; + PSps pSps = NULL; + PPps pPps = NULL; + EWelsNalUnitType eNalType = static_cast (0); + int32_t iPpsId = 0; + int32_t iRet = ERR_NONE; + uint8_t uiSliceType = 0; + uint8_t uiQualityId = BASE_QUALITY_ID; + bool bIdrFlag = false; + bool bSgChangeCycleInvolved = false; // involved slice group change cycle ? + uint32_t uiCode; + int32_t iCode; + SLogContext* pLogCtx = & (pCtx->sLogCtx); + + if (kpCurNal == NULL) { + return ERR_INFO_OUT_OF_MEMORY; + } + + pNalHeaderExt = &kpCurNal->sNalHeaderExt; + pSliceHead = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; + eNalType = pNalHeaderExt->sNalUnitHeader.eNalUnitType; + + pSliceHeadExt = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt; + + if (pSliceHeadExt) { + SRefBasePicMarking sBaseMarking; + const bool kbStoreRefBaseFlag = pSliceHeadExt->bStoreRefBasePicFlag; + memcpy (&sBaseMarking, &pSliceHeadExt->sRefBasePicMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage + memset (pSliceHeadExt, 0, sizeof (SSliceHeaderExt)); + pSliceHeadExt->bStoreRefBasePicFlag = kbStoreRefBaseFlag; + memcpy (&pSliceHeadExt->sRefBasePicMarking, &sBaseMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage + } + + kpCurNal->sNalData.sVclNal.bSliceHeaderExtFlag = kbExtensionFlag; + + // first_mb_in_slice + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //first_mb_in_slice + WELS_CHECK_SE_UPPER_ERROR (uiCode, 36863u, "first_mb_in_slice", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, + ERR_INFO_INVALID_FIRST_MB_IN_SLICE)); + pSliceHead->iFirstMbInSlice = uiCode; + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //slice_type + uiSliceType = uiCode; + if (uiSliceType > 9) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "slice type too large (%d) at first_mb(%d)", uiSliceType, + pSliceHead->iFirstMbInSlice); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE); + } + if (uiSliceType > 4) + uiSliceType -= 5; + + if ((NAL_UNIT_CODED_SLICE_IDR == eNalType) && (I_SLICE != uiSliceType)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d) in IDR picture. ", uiSliceType); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE); + } + + if (kbExtensionFlag) { + if (uiSliceType > 2) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d).", uiSliceType); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE); + } + } + + pSliceHead->eSliceType = static_cast (uiSliceType); + + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_parameter_set_id + WELS_CHECK_SE_UPPER_ERROR (uiCode, (MAX_PPS_COUNT - 1), "iPpsId out of range", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, + ERR_INFO_PPS_ID_OVERFLOW)); + iPpsId = uiCode; + + //add check PPS available here + if (pCtx->sSpsPpsCtx.bPpsAvailFlags[iPpsId] == false) { + pCtx->pDecoderStatistics->iPpsReportErrorNum++; + if (pCtx->sSpsPpsCtx.iPPSLastInvalidId != iPpsId) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId, + pCtx->sSpsPpsCtx.iPPSLastInvalidId, pCtx->sSpsPpsCtx.iPPSInvalidNum); + pCtx->sSpsPpsCtx.iPPSLastInvalidId = iPpsId; + pCtx->sSpsPpsCtx.iPPSInvalidNum = 0; + } else { + pCtx->sSpsPpsCtx.iPPSInvalidNum++; + } + pCtx->iErrorCode |= dsNoParamSets; + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID); + } + pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1; + + pPps = &pCtx->sSpsPpsCtx.sPpsBuffer[iPpsId]; + + if (pPps->uiNumSliceGroups == 0) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced"); + pCtx->iErrorCode |= dsNoParamSets; + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS); + } + + if (kbExtensionFlag) { + pSubsetSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pPps->iSpsId]; + pSps = &pSubsetSps->sSps; + if (pCtx->sSpsPpsCtx.bSubspsAvailFlags[pPps->iSpsId] == false) { + pCtx->pDecoderStatistics->iSubSpsReportErrorNum++; + if (pCtx->sSpsPpsCtx.iSubSPSLastInvalidId != pPps->iSpsId) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId, + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId, pCtx->sSpsPpsCtx.iSubSPSInvalidNum); + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = pPps->iSpsId; + pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0; + } else { + pCtx->sSpsPpsCtx.iSubSPSInvalidNum++; + } + pCtx->iErrorCode |= dsNoParamSets; + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID); + } + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1; + } else { + if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId] == false) { + pCtx->pDecoderStatistics->iSpsReportErrorNum++; + if (pCtx->sSpsPpsCtx.iSPSLastInvalidId != pPps->iSpsId) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId, + pCtx->sSpsPpsCtx.iSPSLastInvalidId, pCtx->sSpsPpsCtx.iSPSInvalidNum); + pCtx->sSpsPpsCtx.iSPSLastInvalidId = pPps->iSpsId; + pCtx->sSpsPpsCtx.iSPSInvalidNum = 0; + } else { + pCtx->sSpsPpsCtx.iSPSInvalidNum++; + } + pCtx->iErrorCode |= dsNoParamSets; + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID); + } + pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1; + pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId]; + } + pSliceHead->iPpsId = iPpsId; + pSliceHead->iSpsId = pPps->iSpsId; + pSliceHead->pPps = pPps; + pSliceHead->pSps = pSps; + + pSliceHeadExt->pSubsetSps = pSubsetSps; + + if (pSps->iNumRefFrames == 0) { + if ((uiSliceType != I_SLICE) && (uiSliceType != SI_SLICE)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "slice_type (%d) not supported for num_ref_frames = 0.", uiSliceType); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE); + } + } + + bIdrFlag = (!kbExtensionFlag && eNalType == NAL_UNIT_CODED_SLICE_IDR) || (kbExtensionFlag && pNalHeaderExt->bIdrFlag); + pSliceHead->bIdrFlag = bIdrFlag; + + if (pSps->uiLog2MaxFrameNum == 0) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "non existing SPS referenced"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS); + } + // check first_mb_in_slice + WELS_CHECK_SE_UPPER_ERROR ((uint32_t) (pSliceHead->iFirstMbInSlice), (pSps->uiTotalMbCount - 1), "first_mb_in_slice", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FIRST_MB_IN_SLICE)); + WELS_READ_VERIFY (BsGetBits (pBs, pSps->uiLog2MaxFrameNum, &uiCode)); //frame_num + pSliceHead->iFrameNum = uiCode; + + pSliceHead->bFieldPicFlag = false; + pSliceHead->bBottomFiledFlag = false; + if (!pSps->bFrameMbsOnlyFlag) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseSliceHeaderSyntaxs(): frame_mbs_only_flag = %d not supported. ", + pSps->bFrameMbsOnlyFlag); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MBAFF); + } + pSliceHead->iMbWidth = pSps->iMbWidth; + pSliceHead->iMbHeight = pSps->iMbHeight / (1 + pSliceHead->bFieldPicFlag); + + if (bIdrFlag) { + if (pSliceHead->iFrameNum != 0) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParseSliceHeaderSyntaxs(), invaild frame number: %d due to IDR frame introduced!", + pSliceHead->iFrameNum); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FRAME_NUM); + } + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //idr_pic_id + // standard 7.4.3 idr_pic_id should be in range 0 to 65535, inclusive. + WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_IDR_PIC_ID_MAX, "idr_pic_id", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, + ERR_INFO_INVALID_IDR_PIC_ID)); + pSliceHead->uiIdrPicId = uiCode; /* uiIdrPicId */ +#ifdef LONG_TERM_REF + pCtx->uiCurIdrPicId = pSliceHead->uiIdrPicId; +#endif + } + + pSliceHead->iDeltaPicOrderCntBottom = 0; + pSliceHead->iDeltaPicOrderCnt[0] = + pSliceHead->iDeltaPicOrderCnt[1] = 0; + if (pSps->uiPocType == 0) { + WELS_READ_VERIFY (BsGetBits (pBs, pSps->iLog2MaxPocLsb, &uiCode)); //pic_order_cnt_lsb + const int32_t iMaxPocLsb = 1 << (pSps->iLog2MaxPocLsb); + pSliceHead->iPicOrderCntLsb = uiCode; + if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) { + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt_bottom + pSliceHead->iDeltaPicOrderCntBottom = iCode; + } + //Calculate poc if necessary + int32_t pocLsb = pSliceHead->iPicOrderCntLsb; + if (pSliceHead->bIdrFlag || kpCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) { + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0; + } + int32_t pocMsb; + if (pocLsb < pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb + && pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2) + pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb + iMaxPocLsb; + else if (pocLsb > pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb + && pocLsb - pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb > iMaxPocLsb / 2) + pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb - iMaxPocLsb; + else + pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb; + pSliceHead->iPicOrderCntLsb = pocMsb + pocLsb; + + if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) { + pSliceHead->iPicOrderCntLsb += pSliceHead->iDeltaPicOrderCntBottom; + } + + if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) { + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = pocLsb; + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = pocMsb; + } + //End of Calculating poc + } else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) { + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 0 ] + pSliceHead->iDeltaPicOrderCnt[0] = iCode; + if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) { + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 1 ] + pSliceHead->iDeltaPicOrderCnt[1] = iCode; + } + } + pSliceHead->iRedundantPicCnt = 0; + if (pPps->bRedundantPicCntPresentFlag) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //redundant_pic_cnt + // standard section 7.4.3, redundant_pic_cnt should be in range 0 to 127, inclusive. + WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_REDUNDANT_PIC_CNT_MAX, "redundant_pic_cnt", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT)); + pSliceHead->iRedundantPicCnt = uiCode; + if (pSliceHead->iRedundantPicCnt > 0) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "Redundant picture not supported!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT); + } + } + + if (B_SLICE == uiSliceType) { + //fix me: it needs to use the this flag somewhere for B-Sclice + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //direct_spatial_mv_pred_flag + pSliceHead->iDirectSpatialMvPredFlag = uiCode; + } + + //set defaults, might be overriden a few line later + pSliceHead->uiRefCount[0] = pPps->uiNumRefIdxL0Active; + pSliceHead->uiRefCount[1] = pPps->uiNumRefIdxL1Active; + + bool bReadNumRefFlag = (P_SLICE == uiSliceType || B_SLICE == uiSliceType); + if (kbExtensionFlag) { + bReadNumRefFlag &= (BASE_QUALITY_ID == pNalHeaderExt->uiQualityId); + } + if (bReadNumRefFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //num_ref_idx_active_override_flag + pSliceHead->bNumRefIdxActiveOverrideFlag = !!uiCode; + if (pSliceHead->bNumRefIdxActiveOverrideFlag) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l0_active_minus1 + WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1, "num_ref_idx_l0_active_minus1", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1)); + pSliceHead->uiRefCount[0] = 1 + uiCode; + if (B_SLICE == uiSliceType) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l1_active_minus1 + WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1, "num_ref_idx_l1_active_minus1", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L1_ACTIVE_MINUS1)); + pSliceHead->uiRefCount[1] = 1 + uiCode; + } + } + } + + if (pSliceHead->uiRefCount[0] > MAX_REF_PIC_COUNT || pSliceHead->uiRefCount[1] > MAX_REF_PIC_COUNT) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "reference overflow"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REF_COUNT_OVERFLOW); + } + + if (BASE_QUALITY_ID == uiQualityId) { + iRet = ParseRefPicListReordering (pBs, pSliceHead); + if (iRet != ERR_NONE) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid ref pPic list reordering syntaxs!"); + return iRet; + } + + if ((pPps->bWeightedPredFlag && uiSliceType == P_SLICE) || (pPps->uiWeightedBipredIdc == 1 && uiSliceType == B_SLICE)) { + iRet = ParsePredWeightedTable (pBs, pSliceHead); + if (iRet != ERR_NONE) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid weighted prediction syntaxs!"); + return iRet; + } + } + + if (kbExtensionFlag) { + if (pNalHeaderExt->iNoInterLayerPredFlag || pNalHeaderExt->uiQualityId > 0) + pSliceHeadExt->bBasePredWeightTableFlag = false; + else + pSliceHeadExt->bBasePredWeightTableFlag = true; + } + + if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) { + iRet = ParseDecRefPicMarking (pCtx, pBs, pSliceHead, pSps, bIdrFlag); + if (iRet != ERR_NONE) { + return iRet; + } + + if (kbExtensionFlag && !pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //store_ref_base_pic_flag + pSliceHeadExt->bStoreRefBasePicFlag = !!uiCode; + if ((pNalHeaderExt->bUseRefBasePicFlag || pSliceHeadExt->bStoreRefBasePicFlag) && !bIdrFlag) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParseSliceHeaderSyntaxs(): bUseRefBasePicFlag or bStoreRefBasePicFlag = 1 not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + } + + if (pPps->bEntropyCodingModeFlag) { + if (pSliceHead->eSliceType != I_SLICE && pSliceHead->eSliceType != SI_SLICE) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); + WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_CABAC_INIT_IDC_MAX, "cabac_init_idc", ERR_INFO_INVALID_CABAC_INIT_IDC); + pSliceHead->iCabacInitIdc = uiCode; + } else + pSliceHead->iCabacInitIdc = 0; + } + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_qp_delta + pSliceHead->iSliceQpDelta = iCode; + pSliceHead->iSliceQp = pPps->iPicInitQp + pSliceHead->iSliceQpDelta; + if (pSliceHead->iSliceQp < 0 || pSliceHead->iSliceQp > 51) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "QP %d out of range", pSliceHead->iSliceQp); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_QP); + } + + //FIXME qscale / qp ... stuff + if (!kbExtensionFlag) { + if (uiSliceType == SP_SLICE || uiSliceType == SI_SLICE) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "SP/SI not supported"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SPSI); + } + } + + pSliceHead->uiDisableDeblockingFilterIdc = 0; + pSliceHead->iSliceAlphaC0Offset = 0; + pSliceHead->iSliceBetaOffset = 0; + if (pPps->bDeblockingFilterControlPresentFlag) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_deblocking_filter_idc + pSliceHead->uiDisableDeblockingFilterIdc = uiCode; + //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20 + if (pSliceHead->uiDisableDeblockingFilterIdc > 6) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "disable_deblock_filter_idc (%d) out of range [0, 6]", + pSliceHead->uiDisableDeblockingFilterIdc); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC); + } + if (pSliceHead->uiDisableDeblockingFilterIdc != 1) { + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_alpha_c0_offset_div2 + pSliceHead->iSliceAlphaC0Offset = iCode * 2; + WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceAlphaC0Offset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN, + SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, + ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2)); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_beta_offset_div2 + pSliceHead->iSliceBetaOffset = iCode * 2; + WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceBetaOffset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN, + SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_beta_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, + ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2)); + } + } + + bSgChangeCycleInvolved = (pPps->uiNumSliceGroups > 1 && pPps->uiSliceGroupMapType >= 3 + && pPps->uiSliceGroupMapType <= 5); + if (kbExtensionFlag && bSgChangeCycleInvolved) + bSgChangeCycleInvolved = (bSgChangeCycleInvolved && (uiQualityId == BASE_QUALITY_ID)); + if (bSgChangeCycleInvolved) { + if (pPps->uiSliceGroupChangeRate > 0) { + const int32_t kiNumBits = (int32_t)WELS_CEIL (log (static_cast (1 + pPps->uiPicSizeInMapUnits / + pPps->uiSliceGroupChangeRate))); + WELS_READ_VERIFY (BsGetBits (pBs, kiNumBits, &uiCode)); //lice_group_change_cycle + pSliceHead->iSliceGroupChangeCycle = uiCode; + } else + pSliceHead->iSliceGroupChangeCycle = 0; + } + + if (!kbExtensionFlag) { + FillDefaultSliceHeaderExt (pSliceHeadExt, pNalHeaderExt); + } else { + /* Extra syntax elements newly introduced */ + pSliceHeadExt->pSubsetSps = pSubsetSps; + + if (!pNalHeaderExt->iNoInterLayerPredFlag && BASE_QUALITY_ID == uiQualityId) { + //the following should be deleted for CODE_CLEAN + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //ref_layer_dq_id + pSliceHeadExt->uiRefLayerDqId = uiCode; + if (pSubsetSps->sSpsSvcExt.bInterLayerDeblockingFilterCtrlPresentFlag) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_inter_layer_deblocking_filter_idc + pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc = uiCode; + //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20 + if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc > 6) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "disable_inter_layer_deblock_filter_idc (%d) out of range [0, 6]", + pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC); + } + if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc != 1) { + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_alpha_c0_offset_div2 + pSliceHeadExt->iInterLayerSliceAlphaC0Offset = iCode * 2; + WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceAlphaC0Offset, + SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX, + "inter_layer_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, + ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2)); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_beta_offset_div2 + pSliceHeadExt->iInterLayerSliceBetaOffset = iCode * 2; + WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceBetaOffset, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN, + SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX, "inter_layer_slice_beta_offset_div2 * 2", + GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2)); + } + } + + pSliceHeadExt->uiRefLayerChromaPhaseXPlus1Flag = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseXPlus1Flag; + pSliceHeadExt->uiRefLayerChromaPhaseYPlus1 = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseYPlus1; + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constrained_intra_resampling_flag + pSliceHeadExt->bConstrainedIntraResamplingFlag = !!uiCode; + + { + SPosOffset pos; + pos.iLeftOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iLeftOffset; + pos.iTopOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iTopOffset * (2 - pSps->bFrameMbsOnlyFlag); + pos.iRightOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iRightOffset; + pos.iBottomOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iBottomOffset * (2 - pSps->bFrameMbsOnlyFlag); + //memcpy(&pSliceHeadExt->sScaledRefLayer, &pos, sizeof(SPosOffset));//confirmed_safe_unsafe_usage + pSliceHeadExt->iScaledRefLayerPicWidthInSampleLuma = (pSliceHead->iMbWidth << 4) - + (pos.iLeftOffset + pos.iRightOffset); + pSliceHeadExt->iScaledRefLayerPicHeightInSampleLuma = (pSliceHead->iMbHeight << 4) - + (pos.iTopOffset + pos.iBottomOffset) / (1 + pSliceHead->bFieldPicFlag); + } + } else if (uiQualityId > BASE_QUALITY_ID) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "MGS not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS); + } else { + pSliceHeadExt->uiRefLayerDqId = (uint8_t) - 1; + } + + pSliceHeadExt->bSliceSkipFlag = false; + pSliceHeadExt->bAdaptiveBaseModeFlag = false; + pSliceHeadExt->bDefaultBaseModeFlag = false; + pSliceHeadExt->bAdaptiveMotionPredFlag = false; + pSliceHeadExt->bDefaultMotionPredFlag = false; + pSliceHeadExt->bAdaptiveResidualPredFlag = false; + pSliceHeadExt->bDefaultResidualPredFlag = false; + if (pNalHeaderExt->iNoInterLayerPredFlag) + pSliceHeadExt->bTCoeffLevelPredFlag = false; + else + pSliceHeadExt->bTCoeffLevelPredFlag = pSubsetSps->sSpsSvcExt.bSeqTCoeffLevelPredFlag; + + if (!pNalHeaderExt->iNoInterLayerPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //slice_skip_flag + pSliceHeadExt->bSliceSkipFlag = !!uiCode; + if (pSliceHeadExt->bSliceSkipFlag) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "bSliceSkipFlag == 1 not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SLICESKIP); + } else { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_base_mode_flag + pSliceHeadExt->bAdaptiveBaseModeFlag = !!uiCode; + if (!pSliceHeadExt->bAdaptiveBaseModeFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_base_mode_flag + pSliceHeadExt->bDefaultBaseModeFlag = !!uiCode; + } + if (!pSliceHeadExt->bDefaultBaseModeFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_motion_prediction_flag + pSliceHeadExt->bAdaptiveMotionPredFlag = !!uiCode; + if (!pSliceHeadExt->bAdaptiveMotionPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_motion_prediction_flag + pSliceHeadExt->bDefaultMotionPredFlag = !!uiCode; + } + } + + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_residual_prediction_flag + pSliceHeadExt->bAdaptiveResidualPredFlag = !!uiCode; + if (!pSliceHeadExt->bAdaptiveResidualPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_residual_prediction_flag + pSliceHeadExt->bDefaultResidualPredFlag = !!uiCode; + } + } + if (pSubsetSps->sSpsSvcExt.bAdaptiveTCoeffLevelPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //tcoeff_level_prediction_flag + pSliceHeadExt->bTCoeffLevelPredFlag = !!uiCode; + } + } + + if (!pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) { + WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_start + pSliceHeadExt->uiScanIdxStart = uiCode; + WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_end + pSliceHeadExt->uiScanIdxEnd = uiCode; + if (pSliceHeadExt->uiScanIdxStart != 0 || pSliceHeadExt->uiScanIdxEnd != 15) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "uiScanIdxStart (%d) != 0 and uiScanIdxEnd (%d) !=15 not supported here", + pSliceHeadExt->uiScanIdxStart, pSliceHeadExt->uiScanIdxEnd); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS); + } + } else { + pSliceHeadExt->uiScanIdxStart = 0; + pSliceHeadExt->uiScanIdxEnd = 15; + } + } + + return ERR_NONE; +} + +/* + * Copy relative syntax elements of NALUnitHeaderExt, sRefPicBaseMarking and bStoreRefBasePicFlag in prefix nal unit. + * pSrc: mark as decoded prefix NAL + * ppDst: succeeded VCL NAL based AVC (I/P Slice) + */ +bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst, PNalUnit const kpSrc) { + PNalUnitHeaderExt pNalHdrExtD = NULL, pNalHdrExtS = NULL; + PSliceHeaderExt pShExtD = NULL; + PPrefixNalUnit pPrefixS = NULL; + PSps pSps = NULL; + int32_t iIdx = 0; + + if (kppDst == NULL || kpSrc == NULL) + return false; + + pNalHdrExtD = &kppDst->sNalHeaderExt; + pNalHdrExtS = &kpSrc->sNalHeaderExt; + pShExtD = &kppDst->sNalData.sVclNal.sSliceHeaderExt; + pPrefixS = &kpSrc->sNalData.sPrefixNal; + pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId]; + + pNalHdrExtD->uiDependencyId = pNalHdrExtS->uiDependencyId; + pNalHdrExtD->uiQualityId = pNalHdrExtS->uiQualityId; + pNalHdrExtD->uiTemporalId = pNalHdrExtS->uiTemporalId; + pNalHdrExtD->uiPriorityId = pNalHdrExtS->uiPriorityId; + pNalHdrExtD->bIdrFlag = pNalHdrExtS->bIdrFlag; + pNalHdrExtD->iNoInterLayerPredFlag = pNalHdrExtS->iNoInterLayerPredFlag; + pNalHdrExtD->bDiscardableFlag = pNalHdrExtS->bDiscardableFlag; + pNalHdrExtD->bOutputFlag = pNalHdrExtS->bOutputFlag; + pNalHdrExtD->bUseRefBasePicFlag = pNalHdrExtS->bUseRefBasePicFlag; + pNalHdrExtD->uiLayerDqId = pNalHdrExtS->uiLayerDqId; + + pShExtD->bStoreRefBasePicFlag = pPrefixS->bStoreRefBasePicFlag; + memcpy (&pShExtD->sRefBasePicMarking, &pPrefixS->sRefPicBaseMarking, + sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage + if (pShExtD->sRefBasePicMarking.bAdaptiveRefBasePicMarkingModeFlag) { + PRefBasePicMarking pRefBasePicMarking = &pShExtD->sRefBasePicMarking; + iIdx = 0; + do { + if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_END) + break; + if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_SHORT2UNUSED) + pRefBasePicMarking->mmco_base[iIdx].iShortFrameNum = (pShExtD->sSliceHeader.iFrameNum - + pRefBasePicMarking->mmco_base[iIdx].uiDiffOfPicNums) & ((1 << pSps->uiLog2MaxFrameNum) - 1); + ++ iIdx; + } while (iIdx < MAX_MMCO_COUNT); + } + + return true; +} + + + +int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) { + PAccessUnit pCurAu = pCtx->pAccessUnitList; + int32_t iIdx = pCurAu->uiEndPos; + + // Conversed iterator + pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iIdx]->sNalHeaderExt.uiLayerDqId; + pCurAu->uiActualUnitsNum = iIdx + 1; + pCurAu->bCompletedAuFlag = true; + + // Added for mosaic avoidance, 11/19/2009 +#ifdef LONG_TERM_REF + if (pCtx->bParamSetsLostFlag || pCtx->bNewSeqBegin) +#else + if (pCtx->bReferenceLostAtT0Flag || pCtx->bNewSeqBegin) +#endif + { + uint32_t uiActualIdx = 0; + while (uiActualIdx < pCurAu->uiActualUnitsNum) { + PNalUnit nal = pCurAu->pNalUnitsList[uiActualIdx]; + + if (nal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR || nal->sNalHeaderExt.bIdrFlag) { + break; + } + ++ uiActualIdx; + } + if (uiActualIdx == + pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009 + + pCtx->pDecoderStatistics->uiIDRLostNum++; + if (!pCtx->bParamSetsLostFlag) + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU."); + pCtx->iErrorCode |= dsRefLost; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { +#ifdef LONG_TERM_REF + pCtx->iErrorCode |= dsNoParamSets; + return dsNoParamSets; +#else + pCtx->iErrorCode |= dsRefLost; + return ERR_INFO_REFERENCE_PIC_LOST; +#endif + } + } + } + + return ERR_NONE; +} + +int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) { + int32_t i = 0; + WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0)) + pCtx->sMb.iMbWidth = (kiMaxWidth + 15) >> 4; + pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4; + + if (pCtx->bInitialDqLayersMem && kiMaxWidth <= pCtx->iPicWidthReq + && kiMaxHeight <= pCtx->iPicHeightReq) // have same dimension memory, skipped + return ERR_NONE; + + CMemoryAlign* pMa = pCtx->pMemAlign; + + UninitialDqLayersContext (pCtx); + + do { + PDqLayer pDq = (PDqLayer)pMa->WelsMallocz (sizeof (SDqLayer), "PDqLayer"); + + if (pDq == NULL) + return ERR_INFO_OUT_OF_MEMORY; + + pCtx->pDqLayersList[i] = pDq; //to keep consistence with in UninitialDqLayersContext() + memset (pDq, 0, sizeof (SDqLayer)); + + pCtx->sMb.pMbType[i] = (uint32_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t), + "pCtx->sMb.pMbType[]"); + pCtx->sMb.pMv[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]"); + pCtx->sMb.pMv[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]"); + + pCtx->sMb.pRefIndex[i][LIST_0] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * + pCtx->sMb.iMbHeight * + sizeof ( + int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]"); + pCtx->sMb.pRefIndex[i][LIST_1] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * + pCtx->sMb.iMbHeight * + sizeof ( + int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]"); + pCtx->sMb.pDirect[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * + sizeof ( + int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pDirect[]"); + pCtx->sMb.pLumaQp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t), + "pCtx->sMb.pLumaQp[]"); + pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * + sizeof ( + bool), + "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]"); + pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + bool), + "pCtx->sMb.pTransformSize8x8Flag[]"); + pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t) * 2, + "pCtx->sMb.pChromaQp[]"); + pCtx->sMb.pMvd[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]"); + pCtx->sMb.pMvd[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]"); + pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t), + "pCtx->sMb.pCbfDc[]"); + pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t) * 24, + "pCtx->sMb.pNzc[]"); + pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t) * 24, + "pCtx->sMb.pNzcRs[]"); + pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * + pCtx->sMb.iMbHeight * + sizeof (int16_t) * MB_COEFF_LIST_SIZE, "pCtx->sMb.pScaledTCoeff[]"); + pCtx->sMb.pIntraPredMode[i] = (int8_t (*)[8])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t) * 8, + "pCtx->sMb.pIntraPredMode[]"); + pCtx->sMb.pIntra4x4FinalMode[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * + pCtx->sMb.iMbHeight * + sizeof (int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pIntra4x4FinalMode[]"); + pCtx->sMb.pIntraNxNAvailFlag[i] = (uint8_t (*))pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t), + "pCtx->sMb.pIntraNxNAvailFlag"); + pCtx->sMb.pChromaPredMode[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t), + "pCtx->sMb.pChromaPredMode[]"); + pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t), + "pCtx->sMb.pCbp[]"); + pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * + pCtx->sMb.iMbHeight * + sizeof ( + uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]"); + pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t), + "pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010 + pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t), + "pCtx->sMb.pResidualPredFlag[]"); + pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * + sizeof ( + int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]"); + + pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + bool), + "pCtx->sMb.pMbCorrectlyDecodedFlag[]"); + pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + bool), + "pCtx->pMbRefConcealedFlag[]"); + + // check memory block valid due above allocated.. + WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, + ((NULL == pCtx->sMb.pMbType[i]) || + (NULL == pCtx->sMb.pMv[i][LIST_0]) || + (NULL == pCtx->sMb.pMv[i][LIST_1]) || + (NULL == pCtx->sMb.pRefIndex[i][LIST_0]) || + (NULL == pCtx->sMb.pRefIndex[i][LIST_1]) || + (NULL == pCtx->sMb.pDirect[i]) || + (NULL == pCtx->sMb.pLumaQp[i]) || + (NULL == pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) || + (NULL == pCtx->sMb.pTransformSize8x8Flag[i]) || + (NULL == pCtx->sMb.pChromaQp[i]) || + (NULL == pCtx->sMb.pMvd[i][LIST_0]) || + (NULL == pCtx->sMb.pMvd[i][LIST_1]) || + (NULL == pCtx->sMb.pCbfDc[i]) || + (NULL == pCtx->sMb.pNzc[i]) || + (NULL == pCtx->sMb.pNzcRs[i]) || + (NULL == pCtx->sMb.pScaledTCoeff[i]) || + (NULL == pCtx->sMb.pIntraPredMode[i]) || + (NULL == pCtx->sMb.pIntra4x4FinalMode[i]) || + (NULL == pCtx->sMb.pIntraNxNAvailFlag[i]) || + (NULL == pCtx->sMb.pChromaPredMode[i]) || + (NULL == pCtx->sMb.pCbp[i]) || + (NULL == pCtx->sMb.pSubMbType[i]) || + (NULL == pCtx->sMb.pSliceIdc[i]) || + (NULL == pCtx->sMb.pResidualPredFlag[i]) || + (NULL == pCtx->sMb.pInterPredictionDoneFlag[i]) || + (NULL == pCtx->sMb.pMbRefConcealedFlag[i]) || + (NULL == pCtx->sMb.pMbCorrectlyDecodedFlag[i]) + ) + ) + + memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t))); + + ++ i; + } while (i < LAYER_NUM_EXCHANGEABLE); + + pCtx->bInitialDqLayersMem = true; + pCtx->iPicWidthReq = kiMaxWidth; + pCtx->iPicHeightReq = kiMaxHeight; + + return ERR_NONE; +} + + + +void UninitialDqLayersContext (PWelsDecoderContext pCtx) { + int32_t i = 0; + CMemoryAlign* pMa = pCtx->pMemAlign; + + do { + PDqLayer pDq = pCtx->pDqLayersList[i]; + if (pDq == NULL) { + ++ i; + continue; + } + + if (pCtx->sMb.pMbType[i]) { + pMa->WelsFree (pCtx->sMb.pMbType[i], "pCtx->sMb.pMbType[]"); + + pCtx->sMb.pMbType[i] = NULL; + } + + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (pCtx->sMb.pMv[i][listIdx]) { + pMa->WelsFree (pCtx->sMb.pMv[i][listIdx], "pCtx->sMb.pMv[][]"); + pCtx->sMb.pMv[i][listIdx] = NULL; + } + + if (pCtx->sMb.pRefIndex[i][listIdx]) { + pMa->WelsFree (pCtx->sMb.pRefIndex[i][listIdx], "pCtx->sMb.pRefIndex[][]"); + pCtx->sMb.pRefIndex[i][listIdx] = NULL; + } + + if (pCtx->sMb.pDirect[i]) { + pMa->WelsFree (pCtx->sMb.pDirect[i], "pCtx->sMb.pDirect[]"); + pCtx->sMb.pDirect[i] = NULL; + } + + if (pCtx->sMb.pMvd[i][listIdx]) { + pMa->WelsFree (pCtx->sMb.pMvd[i][listIdx], "pCtx->sMb.pMvd[][]"); + pCtx->sMb.pMvd[i][listIdx] = NULL; + } + } + + if (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) { + pMa->WelsFree (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i], "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]"); + + pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = NULL; + } + + if (pCtx->sMb.pTransformSize8x8Flag[i]) { + pMa->WelsFree (pCtx->sMb.pTransformSize8x8Flag[i], "pCtx->sMb.pTransformSize8x8Flag[]"); + + pCtx->sMb.pTransformSize8x8Flag[i] = NULL; + } + + if (pCtx->sMb.pLumaQp[i]) { + pMa->WelsFree (pCtx->sMb.pLumaQp[i], "pCtx->sMb.pLumaQp[]"); + + pCtx->sMb.pLumaQp[i] = NULL; + } + + if (pCtx->sMb.pChromaQp[i]) { + pMa->WelsFree (pCtx->sMb.pChromaQp[i], "pCtx->sMb.pChromaQp[]"); + + pCtx->sMb.pChromaQp[i] = NULL; + } + + if (pCtx->sMb.pCbfDc[i]) { + pMa->WelsFree (pCtx->sMb.pCbfDc[i], "pCtx->sMb.pCbfDc[]"); + pCtx->sMb.pCbfDc[i] = NULL; + } + + if (pCtx->sMb.pNzc[i]) { + pMa->WelsFree (pCtx->sMb.pNzc[i], "pCtx->sMb.pNzc[]"); + + pCtx->sMb.pNzc[i] = NULL; + } + + if (pCtx->sMb.pNzcRs[i]) { + pMa->WelsFree (pCtx->sMb.pNzcRs[i], "pCtx->sMb.pNzcRs[]"); + + pCtx->sMb.pNzcRs[i] = NULL; + } + + if (pCtx->sMb.pScaledTCoeff[i]) { + pMa->WelsFree (pCtx->sMb.pScaledTCoeff[i], "pCtx->sMb.pScaledTCoeff[]"); + + pCtx->sMb.pScaledTCoeff[i] = NULL; + } + + if (pCtx->sMb.pIntraPredMode[i]) { + pMa->WelsFree (pCtx->sMb.pIntraPredMode[i], "pCtx->sMb.pIntraPredMode[]"); + + pCtx->sMb.pIntraPredMode[i] = NULL; + } + + if (pCtx->sMb.pIntra4x4FinalMode[i]) { + pMa->WelsFree (pCtx->sMb.pIntra4x4FinalMode[i], "pCtx->sMb.pIntra4x4FinalMode[]"); + + pCtx->sMb.pIntra4x4FinalMode[i] = NULL; + } + + if (pCtx->sMb.pIntraNxNAvailFlag[i]) { + pMa->WelsFree (pCtx->sMb.pIntraNxNAvailFlag[i], "pCtx->sMb.pIntraNxNAvailFlag"); + + pCtx->sMb.pIntraNxNAvailFlag[i] = NULL; + } + + if (pCtx->sMb.pChromaPredMode[i]) { + pMa->WelsFree (pCtx->sMb.pChromaPredMode[i], "pCtx->sMb.pChromaPredMode[]"); + + pCtx->sMb.pChromaPredMode[i] = NULL; + } + + if (pCtx->sMb.pCbp[i]) { + pMa->WelsFree (pCtx->sMb.pCbp[i], "pCtx->sMb.pCbp[]"); + + pCtx->sMb.pCbp[i] = NULL; + } + + // if (pCtx->sMb.pMotionPredFlag[i]) + //{ + // pMa->WelsFree( pCtx->sMb.pMotionPredFlag[i], "pCtx->sMb.pMotionPredFlag[]" ); + + // pCtx->sMb.pMotionPredFlag[i] = NULL; + //} + + if (pCtx->sMb.pSubMbType[i]) { + pMa->WelsFree (pCtx->sMb.pSubMbType[i], "pCtx->sMb.pSubMbType[]"); + + pCtx->sMb.pSubMbType[i] = NULL; + } + + if (pCtx->sMb.pSliceIdc[i]) { + pMa->WelsFree (pCtx->sMb.pSliceIdc[i], "pCtx->sMb.pSliceIdc[]"); + + pCtx->sMb.pSliceIdc[i] = NULL; + } + + if (pCtx->sMb.pResidualPredFlag[i]) { + pMa->WelsFree (pCtx->sMb.pResidualPredFlag[i], "pCtx->sMb.pResidualPredFlag[]"); + + pCtx->sMb.pResidualPredFlag[i] = NULL; + } + + if (pCtx->sMb.pInterPredictionDoneFlag[i]) { + pMa->WelsFree (pCtx->sMb.pInterPredictionDoneFlag[i], "pCtx->sMb.pInterPredictionDoneFlag[]"); + + pCtx->sMb.pInterPredictionDoneFlag[i] = NULL; + } + + if (pCtx->sMb.pMbCorrectlyDecodedFlag[i]) { + pMa->WelsFree (pCtx->sMb.pMbCorrectlyDecodedFlag[i], "pCtx->sMb.pMbCorrectlyDecodedFlag[]"); + pCtx->sMb.pMbCorrectlyDecodedFlag[i] = NULL; + } + + if (pCtx->sMb.pMbRefConcealedFlag[i]) { + pMa->WelsFree (pCtx->sMb.pMbRefConcealedFlag[i], "pCtx->sMb.pMbRefConcealedFlag[]"); + pCtx->sMb.pMbRefConcealedFlag[i] = NULL; + } + pMa->WelsFree (pDq, "pDq"); + + pDq = NULL; + pCtx->pDqLayersList[i] = NULL; + + ++ i; + } while (i < LAYER_NUM_EXCHANGEABLE); + + pCtx->iPicWidthReq = 0; + pCtx->iPicHeightReq = 0; + pCtx->bInitialDqLayersMem = false; +} + +void ResetCurrentAccessUnit (PWelsDecoderContext pCtx) { + PAccessUnit pCurAu = pCtx->pAccessUnitList; + pCurAu->uiStartPos = 0; + pCurAu->uiEndPos = 0; + pCurAu->bCompletedAuFlag = false; + if (pCurAu->uiActualUnitsNum > 0) { + uint32_t iIdx = 0; + const uint32_t kuiActualNum = pCurAu->uiActualUnitsNum; + // a more simpler method to do nal units list management prefered here + const uint32_t kuiAvailNum = pCurAu->uiAvailUnitsNum; + const uint32_t kuiLeftNum = kuiAvailNum - kuiActualNum; + + // Swapping active nal unit nodes of succeeding AU with leading of list + while (iIdx < kuiLeftNum) { + PNalUnit t = pCurAu->pNalUnitsList[kuiActualNum + iIdx]; + pCurAu->pNalUnitsList[kuiActualNum + iIdx] = pCurAu->pNalUnitsList[iIdx]; + pCurAu->pNalUnitsList[iIdx] = t; + ++ iIdx; + } + pCurAu->uiActualUnitsNum = pCurAu->uiAvailUnitsNum = kuiLeftNum; + } +} + +/*! + * \brief Force reset current Acess Unit Nal list in case error parsing/decoding in current AU + * \author + * \history 11/16/2009 + */ +void ForceResetCurrentAccessUnit (PAccessUnit pAu) { + uint32_t uiSucAuIdx = pAu->uiEndPos + 1; + uint32_t uiCurAuIdx = 0; + + // swap the succeeding AU's nal units to the front + while (uiSucAuIdx < pAu->uiAvailUnitsNum) { + PNalUnit t = pAu->pNalUnitsList[uiSucAuIdx]; + pAu->pNalUnitsList[uiSucAuIdx] = pAu->pNalUnitsList[uiCurAuIdx]; + pAu->pNalUnitsList[uiCurAuIdx] = t; + ++ uiSucAuIdx; + ++ uiCurAuIdx; + } + + // Update avail/actual units num accordingly for next AU parsing + if (pAu->uiAvailUnitsNum > pAu->uiEndPos) + pAu->uiAvailUnitsNum -= (pAu->uiEndPos + 1); + else + pAu->uiAvailUnitsNum = 0; + pAu->uiActualUnitsNum = 0; + pAu->uiStartPos = 0; + pAu->uiEndPos = 0; + pAu->bCompletedAuFlag = false; +} + +//clear current corrupted NAL from pNalUnitsList +void ForceClearCurrentNal (PAccessUnit pAu) { + if (pAu->uiAvailUnitsNum > 0) + -- pAu->uiAvailUnitsNum; +} + +void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) { + pCtx->sSpsPpsCtx.bSpsExistAheadFlag = false; + pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = false; + pCtx->sSpsPpsCtx.bPpsExistAheadFlag = false; + + // Force clear the AU list + pCtx->pAccessUnitList->uiAvailUnitsNum = 0; + pCtx->pAccessUnitList->uiActualUnitsNum = 0; + pCtx->pAccessUnitList->uiStartPos = 0; + pCtx->pAccessUnitList->uiEndPos = 0; + pCtx->pAccessUnitList->bCompletedAuFlag = false; +} + +void CheckAvailNalUnitsListContinuity (PWelsDecoderContext pCtx, int32_t iStartIdx, int32_t iEndIdx) { + PAccessUnit pCurAu = pCtx->pAccessUnitList; + + uint8_t uiLastNuDependencyId, uiLastNuLayerDqId; + uint8_t uiCurNuDependencyId, uiCurNuQualityId, uiCurNuLayerDqId, uiCurNuRefLayerDqId; + + int32_t iCurNalUnitIdx = 0; + + //check the continuity of pNalUnitsList forwards (from pIdxNoInterLayerPred to end_postion) + uiLastNuDependencyId = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiDependencyId;//starting nal unit + uiLastNuLayerDqId = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiLayerDqId;//starting nal unit + iCurNalUnitIdx = iStartIdx + 1;//current nal unit + while (iCurNalUnitIdx <= iEndIdx) { + uiCurNuDependencyId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiDependencyId; + uiCurNuQualityId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiQualityId; + uiCurNuLayerDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId; + uiCurNuRefLayerDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalData.sVclNal.sSliceHeaderExt.uiRefLayerDqId; + + if (uiCurNuDependencyId == uiLastNuDependencyId) { + uiLastNuLayerDqId = uiCurNuLayerDqId; + ++ iCurNalUnitIdx; + } else { //uiCurNuDependencyId != uiLastNuDependencyId, new dependency arrive + if (uiCurNuQualityId == 0) { + uiLastNuDependencyId = uiCurNuDependencyId; + if (uiCurNuRefLayerDqId == uiLastNuLayerDqId) { + uiLastNuLayerDqId = uiCurNuLayerDqId; + ++ iCurNalUnitIdx; + } else { //cur_nu_layer_id != next_nu_ref_layer_dq_id, the chain is broken at this point + break; + } + } else { //new dependency arrive, but no base quality layer, so we must stop in this point + break; + } + } + } + + -- iCurNalUnitIdx; + pCurAu->uiEndPos = iCurNalUnitIdx; + pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId; +} + +//main purpose: to support multi-slice and to include all slice which have the same uiDependencyId, uiQualityId and frame_num +//for single slice, pIdxNoInterLayerPred SHOULD NOT be modified +void RefineIdxNoInterLayerPred (PAccessUnit pCurAu, int32_t* pIdxNoInterLayerPred) { + int32_t iLastNalDependId = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiDependencyId; + int32_t iLastNalQualityId = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiQualityId; + uint8_t uiLastNalTId = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiTemporalId; + int32_t iLastNalFrameNum = + pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum; + int32_t iLastNalPoc = + pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb; + int32_t iLastNalFirstMb = + pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice; + int32_t iCurNalDependId, iCurNalQualityId, iCurNalTId, iCurNalFrameNum, iCurNalPoc, iCurNalFirstMb, iCurIdx, + iFinalIdxNoInterLayerPred; + + bool bMultiSliceFind = false; + + iFinalIdxNoInterLayerPred = 0; + iCurIdx = *pIdxNoInterLayerPred - 1; + while (iCurIdx >= 0) { + if (pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.iNoInterLayerPredFlag) { + iCurNalDependId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId; + iCurNalQualityId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId; + iCurNalTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId; + iCurNalFrameNum = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum; + iCurNalPoc = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb; + iCurNalFirstMb = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice; + + if (iCurNalDependId == iLastNalDependId && + iCurNalQualityId == iLastNalQualityId && + iCurNalTId == uiLastNalTId && + iCurNalFrameNum == iLastNalFrameNum && + iCurNalPoc == iLastNalPoc && + iCurNalFirstMb != iLastNalFirstMb) { + bMultiSliceFind = true; + iFinalIdxNoInterLayerPred = iCurIdx; + --iCurIdx; + continue; + } else { + break; + } + } + --iCurIdx; + } + + if (bMultiSliceFind && *pIdxNoInterLayerPred != iFinalIdxNoInterLayerPred) { + *pIdxNoInterLayerPred = iFinalIdxNoInterLayerPred; + } +} + +bool CheckPocOfCurValidNalUnits (PAccessUnit pCurAu, int32_t pIdxNoInterLayerPred) { + int32_t iEndIdx = pCurAu->uiEndPos; + int32_t iCurAuPoc = + pCurAu->pNalUnitsList[pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb; + int32_t iTmpPoc, i; + for (i = pIdxNoInterLayerPred + 1; i < iEndIdx; i++) { + iTmpPoc = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb; + if (iTmpPoc != iCurAuPoc) { + return false; + } + } + + return true; +} + +bool CheckIntegrityNalUnitsList (PWelsDecoderContext pCtx) { + PAccessUnit pCurAu = pCtx->pAccessUnitList; + const int32_t kiEndPos = pCurAu->uiEndPos; + int32_t iIdxNoInterLayerPred = 0; + + if (!pCurAu->bCompletedAuFlag) + return false; + + if (pCtx->bNewSeqBegin) { + pCurAu->uiStartPos = 0; + //step1: search the pNalUnit whose iNoInterLayerPredFlag equal to 1 backwards (from uiEndPos to 0) + iIdxNoInterLayerPred = kiEndPos; + while (iIdxNoInterLayerPred >= 0) { + if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) { + break; + } + --iIdxNoInterLayerPred; + } + if (iIdxNoInterLayerPred < 0) { + //can not find the Nal Unit whose no_inter_pred_falg equal to 1, MUST STOP decode + return false; + } + + //step2: support multi-slice, to include all base layer slice + RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred); + pCurAu->uiStartPos = iIdxNoInterLayerPred; + CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos); + + if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) { + return false; + } + + pCtx->iCurSeqIntervalTargetDependId = pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalHeaderExt.uiDependencyId; + pCtx->iCurSeqIntervalMaxPicWidth = + pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbWidth << 4; + pCtx->iCurSeqIntervalMaxPicHeight = + pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbHeight << 4; + } else { //P_SLICE + //step 1: search uiDependencyId equal to pCtx->cur_seq_interval_target_dependency_id + bool bGetDependId = false; + int32_t iIdxDependId = 0; + + iIdxDependId = kiEndPos; + while (iIdxDependId >= 0) { + if (pCtx->iCurSeqIntervalTargetDependId == pCurAu->pNalUnitsList[iIdxDependId]->sNalHeaderExt.uiDependencyId) { + bGetDependId = true; + break; + } else { + --iIdxDependId; + } + } + + //step 2: switch according to whether or not find the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId + if (bGetDependId) { //get the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId + bool bGetNoInterPredFront = false; + //step 2a: search iNoInterLayerPredFlag [0....iIdxDependId] + iIdxNoInterLayerPred = iIdxDependId; + while (iIdxNoInterLayerPred >= 0) { + if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) { + bGetNoInterPredFront = true; + break; + } + --iIdxNoInterLayerPred; + } + //step 2b: switch, whether or not find the NAL unit whose no_inter_pred_flag equal to 1 among [0....iIdxDependId] + if (bGetNoInterPredFront) { //YES + RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred); + pCurAu->uiStartPos = iIdxNoInterLayerPred; + CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, iIdxDependId); + + if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) { + return false; + } + } else { //NO, should find the NAL unit whose no_inter_pred_flag equal to 1 among [iIdxDependId....uiEndPos] + iIdxNoInterLayerPred = iIdxDependId; + while (iIdxNoInterLayerPred <= kiEndPos) { + if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) { + break; + } + ++iIdxNoInterLayerPred; + } + + if (iIdxNoInterLayerPred > kiEndPos) { + return false; //cann't find the index of pNalUnit whose no_inter_pred_flag = 1 + } + + RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred); + pCurAu->uiStartPos = iIdxNoInterLayerPred; + CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos); + + if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) { + return false; + } + } + } else { //without the index of pNalUnit, should process this AU as common case + iIdxNoInterLayerPred = kiEndPos; + while (iIdxNoInterLayerPred >= 0) { + if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) { + break; + } + --iIdxNoInterLayerPred; + } + if (iIdxNoInterLayerPred < 0) { + return false; //cann't find the index of pNalUnit whose iNoInterLayerPredFlag = 1 + } + + RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred); + pCurAu->uiStartPos = iIdxNoInterLayerPred; + CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos); + + if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) { + return false; + } + } + } + + return true; +} + +void CheckOnlyOneLayerInAu (PWelsDecoderContext pCtx) { + PAccessUnit pCurAu = pCtx->pAccessUnitList; + + int32_t iEndIdx = pCurAu->uiEndPos; + int32_t iCurIdx = pCurAu->uiStartPos; + uint8_t uiDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId; + uint8_t uiQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId; + uint8_t uiTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId; + + uint8_t uiCurDId, uiCurQId, uiCurTId; + + pCtx->bOnlyOneLayerInCurAuFlag = true; + + if (iEndIdx == iCurIdx) { //only one NAL in pNalUnitsList + return; + } + + ++iCurIdx; + while (iCurIdx <= iEndIdx) { + uiCurDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId; + uiCurQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId; + uiCurTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId; + + if (uiDId != uiCurDId || uiQId != uiCurQId || uiTId != uiCurTId) { + pCtx->bOnlyOneLayerInCurAuFlag = false; + return; + } + + ++iCurIdx; + } +} + +int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) { + // Roll back NAL units not being belong to current access unit list for proceeded access unit + int32_t iRet = UpdateAccessUnit (pCtx); + if (iRet != ERR_NONE) + return iRet; + + pCtx->pAccessUnitList->uiStartPos = 0; + if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) { + pCtx->iErrorCode |= dsBitstreamError; + return dsBitstreamError; + } + + //check current AU has only one layer or not + //If YES, can use deblocking based on AVC + if (!pCtx->sSpsPpsCtx.bAvcBasedFlag) { + CheckOnlyOneLayerInAu (pCtx); + } + + return ERR_NONE; +} + +void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) { + //save previous header info + PAccessUnit pCurAu = pCtx->pAccessUnitList; + PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos]; + memcpy (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); + memcpy (&pCtx->pLastDecPicInfo->sLastSliceHeader, + &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader)); + // uninitialize context of current access unit and rbsp buffer clean + ResetCurrentAccessUnit (pCtx); +} + +/* CheckNewSeqBeginAndUpdateActiveLayerSps + * return: + * true - the AU to be construct is the start of new sequence; false - not + */ +static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) { + bool bNewSeq = false; + PAccessUnit pCurAu = pCtx->pAccessUnitList; + PSps pTmpLayerSps[MAX_LAYER_NUM]; + for (int i = 0; i < MAX_LAYER_NUM; i++) { + pTmpLayerSps[i] = NULL; + } + // track the layer sps for the current au + for (unsigned int i = pCurAu->uiStartPos; i <= pCurAu->uiEndPos; i++) { + uint32_t uiDid = pCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId; + pTmpLayerSps[uiDid] = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps; + if ((pCurAu->pNalUnitsList[i]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) + || (pCurAu->pNalUnitsList[i]->sNalHeaderExt.bIdrFlag)) + bNewSeq = true; + } + int iMaxActiveLayer = 0, iMaxCurrentLayer = 0; + for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) { + if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] != NULL) { + iMaxActiveLayer = i; + break; + } + } + for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) { + if (pTmpLayerSps[i] != NULL) { + iMaxCurrentLayer = i; + break; + } + } + if ((iMaxCurrentLayer != iMaxActiveLayer) + || (pTmpLayerSps[iMaxCurrentLayer] != pCtx->sSpsPpsCtx.pActiveLayerSps[iMaxActiveLayer])) { + bNewSeq = true; + } + // fill active sps if the current sps is not null while active layer is null + if (!bNewSeq) { + for (int i = 0; i < MAX_LAYER_NUM; i++) { + if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) { + pCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i]; + } + } + } else { + // UpdateActiveLayerSps if new sequence start + memcpy (&pCtx->sSpsPpsCtx.pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps)); + } + return bNewSeq; +} + +static void WriteBackActiveParameters (PWelsDecoderContext pCtx) { + if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS) { + memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT].iPpsId], + &pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps)); + } + if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS) { + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT].iSpsId], + &pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps)); + pCtx->bNewSeqBegin = true; + } + if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS) { + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId], + &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps)); + pCtx->bNewSeqBegin = true; + } + pCtx->sSpsPpsCtx.iOverwriteFlags = OVERWRITE_NONE; +} + +/* + * DecodeFinishUpdate + * decoder finish decoding, update active parameter sets and new seq status + * + */ + +void DecodeFinishUpdate (PWelsDecoderContext pCtx) { + pCtx->bNewSeqBegin = false; + WriteBackActiveParameters (pCtx); + pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin; + pCtx->bNextNewSeqBegin = false; // reset it + if (pCtx->bNewSeqBegin) + ResetActiveSPSForEachLayer (pCtx); +} + +/* +* WelsDecodeInitAccessUnitStart +* check and (re)allocate picture buffers on new sequence begin +* bit_len: size in bit length of data +* buf_len: size in byte length of data +* coded_au: mark an Access Unit decoding finished +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) { + int32_t iErr = ERR_NONE; + PAccessUnit pCurAu = pCtx->pAccessUnitList; + pCtx->bAuReadyFlag = false; + pCtx->pLastDecPicInfo->bLastHasMmco5 = false; + bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx); + pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin; + iErr = WelsDecodeAccessUnitStart (pCtx); + GetVclNalTemporalId (pCtx); + + if (ERR_NONE != iErr) { + ForceResetCurrentAccessUnit (pCtx->pAccessUnitList); + if (!pCtx->pParam->bParseOnly) + pDstInfo->iBufferStatus = 0; + pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin; + pCtx->bNextNewSeqBegin = false; // reset it + if (pCtx->bNewSeqBegin) + ResetActiveSPSForEachLayer (pCtx); + return iErr; + } + + pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps; + pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps; + + return iErr; +} + +/* +* AllocPicBuffOnNewSeqBegin +* check and (re)allocate picture buffers on new sequence begin +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx) { + //try to allocate or relocate DPB memory only when new sequence is coming. + if (GetThreadCount (pCtx) <= 1) { + WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL + } + int32_t iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight); + + if (ERR_NONE != iErr) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr); + return iErr; + } + + return iErr; +} + +/* +* InitConstructAccessUnit +* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to +* joint a collective access unit. +* parameter\ +* SBufferInfo: Buffer info +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) { + int32_t iErr = ERR_NONE; + + iErr = WelsDecodeInitAccessUnitStart (pCtx, pDstInfo); + if (ERR_NONE != iErr) { + return iErr; + } + if (pCtx->bNewSeqBegin) { + iErr = AllocPicBuffOnNewSeqBegin (pCtx); + if (ERR_NONE != iErr) { + return iErr; + } + } + + return iErr; +} + +/* + * ConstructAccessUnit + * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to + * joint a collective access unit. + * parameter\ + * buf: bitstream data buffer + * bit_len: size in bit length of data + * buf_len: size in byte length of data + * coded_au: mark an Access Unit decoding finished + * return: + * 0 - success; otherwise returned error_no defined in error_no.h + */ +int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) { + int32_t iErr = ERR_NONE; + if (GetThreadCount (pCtx) <= 1) { + iErr = InitConstructAccessUnit (pCtx, pDstInfo); + if (ERR_NONE != iErr) { + return iErr; + } + } + if (pCtx->pCabacDecEngine == NULL) { + pCtx->pCabacDecEngine = (SWelsCabacDecEngine*)pCtx->pMemAlign->WelsMallocz (sizeof (SWelsCabacDecEngine), + "pCtx->pCabacDecEngine"); + WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, (NULL == pCtx->pCabacDecEngine)) + } + + iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo); + + WelsDecodeAccessUnitEnd (pCtx); + + if (ERR_NONE != iErr) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "returned error from decoding:[0x%x]", iErr); + return iErr; + } + + return ERR_NONE; +} + +static inline void InitDqLayerInfo (PDqLayer pDqLayer, PLayerInfo pLayerInfo, PNalUnit pNalUnit, PPicture pPicDec) { + PNalUnitHeaderExt pNalHdrExt = &pNalUnit->sNalHeaderExt; + PSliceHeaderExt pShExt = &pNalUnit->sNalData.sVclNal.sSliceHeaderExt; + PSliceHeader pSh = &pShExt->sSliceHeader; + const uint8_t kuiQualityId = pNalHdrExt->uiQualityId; + + memcpy (&pDqLayer->sLayerInfo, pLayerInfo, sizeof (SLayerInfo)); //confirmed_safe_unsafe_usage + + pDqLayer->pDec = pPicDec; + pDqLayer->iMbWidth = pSh->iMbWidth; // MB width of this picture + pDqLayer->iMbHeight = pSh->iMbHeight;// MB height of this picture + + pDqLayer->iSliceIdcBackup = (pSh->iFirstMbInSlice << 7) | (pNalHdrExt->uiDependencyId << 4) | (pNalHdrExt->uiQualityId); + + /* Common syntax elements across all slices of a DQLayer */ + pDqLayer->uiPpsId = pLayerInfo->pPps->iPpsId; + pDqLayer->uiDisableInterLayerDeblockingFilterIdc = pShExt->uiDisableInterLayerDeblockingFilterIdc; + pDqLayer->iInterLayerSliceAlphaC0Offset = pShExt->iInterLayerSliceAlphaC0Offset; + pDqLayer->iInterLayerSliceBetaOffset = pShExt->iInterLayerSliceBetaOffset; + pDqLayer->iSliceGroupChangeCycle = pSh->iSliceGroupChangeCycle; + pDqLayer->bStoreRefBasePicFlag = pShExt->bStoreRefBasePicFlag; + pDqLayer->bTCoeffLevelPredFlag = pShExt->bTCoeffLevelPredFlag; + pDqLayer->bConstrainedIntraResamplingFlag = pShExt->bConstrainedIntraResamplingFlag; + pDqLayer->uiRefLayerDqId = pShExt->uiRefLayerDqId; + pDqLayer->uiRefLayerChromaPhaseXPlus1Flag = pShExt->uiRefLayerChromaPhaseXPlus1Flag; + pDqLayer->uiRefLayerChromaPhaseYPlus1 = pShExt->uiRefLayerChromaPhaseYPlus1; + pDqLayer->bUseWeightPredictionFlag = false; + pDqLayer->bUseWeightedBiPredIdc = false; + //memcpy(&pDqLayer->sScaledRefLayer, &pShExt->sScaledRefLayer, sizeof(SPosOffset));//confirmed_safe_unsafe_usage + + if (kuiQualityId == BASE_QUALITY_ID) { + pDqLayer->pRefPicListReordering = &pSh->pRefPicListReordering; + pDqLayer->pRefPicMarking = &pSh->sRefMarking; + + pDqLayer->bUseWeightPredictionFlag = pSh->pPps->bWeightedPredFlag; + pDqLayer->bUseWeightedBiPredIdc = pSh->pPps->uiWeightedBipredIdc != 0; + if (pSh->pPps->bWeightedPredFlag || pSh->pPps->uiWeightedBipredIdc) { + pDqLayer->pPredWeightTable = &pSh->sPredWeightTable; + } + pDqLayer->pRefPicBaseMarking = &pShExt->sRefBasePicMarking; + } + + pDqLayer->uiLayerDqId = pNalHdrExt->uiLayerDqId; // dq_id of current layer + pDqLayer->bUseRefBasePicFlag = pNalHdrExt->bUseRefBasePicFlag; +} + +void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pSps, PPps pPps) { + PSliceHeader pSh = &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; + + pCtx->eSliceType = pSh->eSliceType; + pCtx->pSliceHeader = pSh; + pCtx->bUsedAsRef = false; + + pCtx->iFrameNum = pSh->iFrameNum; + UpdateDecoderStatisticsForActiveParaset (pCtx->pDecoderStatistics, pSps, pPps); +} + +int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) { + int32_t iRet = ERR_NONE; + if (pCtx->eSliceType == B_SLICE) { + iRet = WelsInitBSliceRefList (pCtx, iPoc); + CreateImplicitWeightTable (pCtx); + } else + iRet = WelsInitRefList (pCtx, iPoc); + if ((pCtx->eSliceType != I_SLICE && pCtx->eSliceType != SI_SLICE)) { +#if 0 + if (pCtx->pSps->uiProfileIdc != 66 && pCtx->pPps->bEntropyCodingModeFlag) + iRet = WelsReorderRefList2 (pCtx); + else +#endif + iRet = WelsReorderRefList (pCtx); + } + + return iRet; +} + +void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) { + if (NULL != pCtx && NULL != pCurDq) { + pCurDq->pMbType = pCtx->sMb.pMbType[0]; + pCurDq->pSliceIdc = pCtx->sMb.pSliceIdc[0]; + pCurDq->pMv[LIST_0] = pCtx->sMb.pMv[0][LIST_0]; + pCurDq->pMv[LIST_1] = pCtx->sMb.pMv[0][LIST_1]; + pCurDq->pRefIndex[LIST_0] = pCtx->sMb.pRefIndex[0][LIST_0]; + pCurDq->pRefIndex[LIST_1] = pCtx->sMb.pRefIndex[0][LIST_1]; + pCurDq->pDirect = pCtx->sMb.pDirect[0]; + pCurDq->pNoSubMbPartSizeLessThan8x8Flag = pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[0]; + pCurDq->pTransformSize8x8Flag = pCtx->sMb.pTransformSize8x8Flag[0]; + pCurDq->pLumaQp = pCtx->sMb.pLumaQp[0]; + pCurDq->pChromaQp = pCtx->sMb.pChromaQp[0]; + pCurDq->pMvd[LIST_0] = pCtx->sMb.pMvd[0][LIST_0]; + pCurDq->pMvd[LIST_1] = pCtx->sMb.pMvd[0][LIST_1]; + pCurDq->pCbfDc = pCtx->sMb.pCbfDc[0]; + pCurDq->pNzc = pCtx->sMb.pNzc[0]; + pCurDq->pNzcRs = pCtx->sMb.pNzcRs[0]; + pCurDq->pScaledTCoeff = pCtx->sMb.pScaledTCoeff[0]; + pCurDq->pIntraPredMode = pCtx->sMb.pIntraPredMode[0]; + pCurDq->pIntra4x4FinalMode = pCtx->sMb.pIntra4x4FinalMode[0]; + pCurDq->pIntraNxNAvailFlag = pCtx->sMb.pIntraNxNAvailFlag[0]; + pCurDq->pChromaPredMode = pCtx->sMb.pChromaPredMode[0]; + pCurDq->pCbp = pCtx->sMb.pCbp[0]; + pCurDq->pSubMbType = pCtx->sMb.pSubMbType[0]; + pCurDq->pInterPredictionDoneFlag = pCtx->sMb.pInterPredictionDoneFlag[0]; + pCurDq->pResidualPredFlag = pCtx->sMb.pResidualPredFlag[0]; + pCurDq->pMbCorrectlyDecodedFlag = pCtx->sMb.pMbCorrectlyDecodedFlag[0]; + pCurDq->pMbRefConcealedFlag = pCtx->sMb.pMbRefConcealedFlag[0]; + } +} + +/* + * DecodeCurrentAccessUnit + * Decode current access unit when current AU is completed. + */ +int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) { + PNalUnit pNalCur = pCtx->pNalCur = NULL; + PAccessUnit pCurAu = pCtx->pAccessUnitList; + + int32_t iIdx = pCurAu->uiStartPos; + int32_t iEndIdx = pCurAu->uiEndPos; + + //get current thread ctx + PWelsDecoderThreadCTX pThreadCtx = NULL; + if (pCtx->pThreadCtx != NULL) { + pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx; + } + //get last thread ctx + PWelsDecoderThreadCTX pLastThreadCtx = NULL; + if (pCtx->pLastThreadCtx != NULL) { + pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx); + if (pLastThreadCtx->pDec == NULL) { + pLastThreadCtx->pDec = PrefetchLastPicForThread (pCtx->pPicBuff, + pLastThreadCtx->iPicBuffIdx); + } + } + int32_t iThreadCount = GetThreadCount (pCtx); + int32_t iPpsId = 0; + int32_t iRet = ERR_NONE; + + bool bAllRefComplete = true; // Assume default all ref picutres are complete + + const uint8_t kuiTargetLayerDqId = GetTargetDqId (pCtx->uiTargetDqId, pCtx->pParam); + const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4; + int16_t iLastIdD = -1, iLastIdQ = -1; + int16_t iCurrIdD = 0, iCurrIdQ = 0; + pCtx->uiNalRefIdc = 0; + bool bFreshSliceAvailable = + true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008 + + //update pCurDqLayer at the starting of AU decoding + if (pCtx->bInitialDqLayersMem || pCtx->pCurDqLayer == NULL) { + pCtx->pCurDqLayer = pCtx->pDqLayersList[0]; + } + + InitCurDqLayerData (pCtx, pCtx->pCurDqLayer); + + pNalCur = pCurAu->pNalUnitsList[iIdx]; + while (iIdx <= iEndIdx) { + PDqLayer dq_cur = pCtx->pCurDqLayer; + SLayerInfo pLayerInfo; + PSliceHeaderExt pShExt = NULL; + PSliceHeader pSh = NULL; + bool isNewFrame = true; + if (iThreadCount > 1) { + isNewFrame = pCtx->pDec == NULL; + } + if (pCtx->pDec == NULL) { + //make call PrefetchPic first before updating reference lists in threaded mode + //this prevents from possible thread-decoding hanging + pCtx->pDec = PrefetchPic (pCtx->pPicBuff); + if (pLastThreadCtx != NULL) { + pLastThreadCtx->pDec->bUsedAsRef = pLastThreadCtx->pCtx->uiNalRefIdc > 0; + if (pLastThreadCtx->pDec->bUsedAsRef) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + uint32_t i = 0; + while (i < MAX_REF_PIC_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) { + pLastThreadCtx->pDec->pRefPic[listIdx][i] = pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]; + ++i; + } + } + pLastThreadCtx->pCtx->sTmpRefPic = pLastThreadCtx->pCtx->sRefPic; + WelsMarkAsRef (pLastThreadCtx->pCtx, pLastThreadCtx->pDec); + pCtx->sRefPic = pLastThreadCtx->pCtx->sTmpRefPic; + } else { + pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic; + } + } + //WelsResetRefPic needs to be called when a new sequence is encountered + //Otherwise artifacts is observed in decoded yuv in couple of unit tests with multiple-slice frame + if (GetThreadCount (pCtx) > 1 && pCtx->bNewSeqBegin) { + WelsResetRefPic (pCtx); + } + if (pCtx->iTotalNumMbRec != 0) + pCtx->iTotalNumMbRec = 0; + + if (NULL == pCtx->pDec) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "DecodeCurrentAccessUnit()::::::PrefetchPic ERROR, pSps->iNumRefFrames:%d.", + pCtx->pSps->iNumRefFrames); + // The error code here need to be separated from the dsOutOfMemory + pCtx->iErrorCode |= dsOutOfMemory; + return ERR_INFO_REF_COUNT_OVERFLOW; + } + if (pThreadCtx != NULL) { + pThreadCtx->pDec = pCtx->pDec; + if (iThreadCount > 1) ++pCtx->pDec->iRefCount; + uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4; + for (uint32_t i = 0; i < uiMbHeight; ++i) { + RESET_EVENT (&pCtx->pDec->pReadyEvent[i]); + } + } + pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding + } else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start + pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding + } + pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp; + pCtx->pDec->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp; + if (pThreadCtx != NULL) { + pThreadCtx->iPicBuffIdx = pCtx->pDec->iPicBuffIdx; + pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag = pCtx->pDec->pMbCorrectlyDecodedFlag; + } + + if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode + for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i) + memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t))); + memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool)); + memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool)); + memset (pCtx->pDec->pRefPic[LIST_0], 0, sizeof (PPicture) * MAX_DPB_COUNT); + memset (pCtx->pDec->pRefPic[LIST_1], 0, sizeof (PPicture) * MAX_DPB_COUNT); + pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight; + pCtx->pDec->iMbEcedNum = 0; + pCtx->pDec->iMbEcedPropNum = 0; + } + pCtx->bRPLRError = false; + GetI4LumaIChromaAddrTable (pCtx->iDecBlockOffsetArray, pCtx->pDec->iLinesize[0], pCtx->pDec->iLinesize[1]); + + if (pNalCur->sNalHeaderExt.uiLayerDqId > kuiTargetLayerDqId) { // confirmed pNalCur will never be NULL + break; // Per formance it need not to decode the remaining bits any more due to given uiLayerDqId required, 9/2/2009 + } + + memset (&pLayerInfo, 0, sizeof (SLayerInfo)); + + /* + * Loop decoding for slices (even FMO and/ multiple slices) within a dq layer + */ + while (iIdx <= iEndIdx) { + bool bReconstructSlice; + iCurrIdQ = pNalCur->sNalHeaderExt.uiQualityId; + iCurrIdD = pNalCur->sNalHeaderExt.uiDependencyId; + pSh = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; + pShExt = &pNalCur->sNalData.sVclNal.sSliceHeaderExt; + pCtx->bRPLRError = false; + bReconstructSlice = CheckSliceNeedReconstruct (pNalCur->sNalHeaderExt.uiLayerDqId, kuiTargetLayerDqId); + + memcpy (&pLayerInfo.sNalHeaderExt, &pNalCur->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); //confirmed_safe_unsafe_usage + + pCtx->pDec->iFrameNum = pSh->iFrameNum; + pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2 + pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag; + pCtx->pDec->eSliceType = pSh->eSliceType; + + memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage + pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag; + pLayerInfo.sSliceInLayer.eSliceType = pSh->eSliceType; + pLayerInfo.sSliceInLayer.iLastMbQp = pSh->iSliceQp; + dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead; + + pCtx->uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc; + + iPpsId = pSh->iPpsId; + + pLayerInfo.pPps = pSh->pPps; + pLayerInfo.pSps = pSh->pSps; + pLayerInfo.pSubsetSps = pShExt->pSubsetSps; + + pCtx->pFmo = &pCtx->sFmoList[iPpsId]; + iRet = FmoParamUpdate (pCtx->pFmo, pLayerInfo.pSps, pLayerInfo.pPps, &pCtx->iActiveFmoNum, pCtx->pMemAlign); + if (ERR_NONE != iRet) { + if (iRet == ERR_INFO_OUT_OF_MEMORY) { + pCtx->iErrorCode |= dsOutOfMemory; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "DecodeCurrentAccessUnit(), Fmo param alloc failed"); + } else { + pCtx->iErrorCode |= dsBitstreamError; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DecodeCurrentAccessUnit(), FmoParamUpdate failed, eSliceType: %d.", + pSh->eSliceType); + } + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_FMO_INIT_FAIL); + } + + bFreshSliceAvailable = (iCurrIdD != iLastIdD + || iCurrIdQ != iLastIdQ); // do not need condition of (first_mb == 0) due multiple slices might be disorder + + + WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps); + + + if ((iLastIdD < 0) || //case 1: first layer + (iLastIdD == iCurrIdD)) { //case 2: same uiDId + InitDqLayerInfo (dq_cur, &pLayerInfo, pNalCur, pCtx->pDec); + + if (!dq_cur->sLayerInfo.pSps->bGapsInFrameNumValueAllowedFlag) { + const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag + || (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR); + // Subclause 8.2.5.2 Decoding process for gaps in frame_num + int32_t iPrevFrameNum = pCtx->pLastDecPicInfo->iPrevFrameNum; + if (pLastThreadCtx != NULL) { + //call GetPrevFrameNum() to get correct iPrevFrameNum to prevent frame gap warning + iPrevFrameNum = pCtx->bNewSeqBegin ? 0 : GetPrevFrameNum (pCtx); + } + if (!kbIdrFlag && + pSh->iFrameNum != iPrevFrameNum && + pSh->iFrameNum != ((iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - + 1))) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", + iPrevFrameNum, + pSh->iFrameNum); + + bAllRefComplete = false; + pCtx->iErrorCode |= dsRefLost; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { +#ifdef LONG_TERM_REF + pCtx->bParamSetsLostFlag = true; +#else + pCtx->bReferenceLostAtT0Flag = true; +#endif + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REFERENCE_PIC_LOST); + } + } + } + + if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID && isNewFrame) { + iRet = InitRefPicList (pCtx, pCtx->uiNalRefIdc, pSh->iPicOrderCntLsb); + if (iThreadCount > 1) isNewFrame = false; + if (iRet) { + pCtx->bRPLRError = true; + bAllRefComplete = false; // RPLR error, set ref pictures complete flag false + HandleReferenceLost (pCtx, pNalCur); + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "reference picture introduced by this frame is lost during transmission! uiTId: %d", + pNalCur->sNalHeaderExt.uiTemporalId); + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + if (pCtx->iTotalNumMbRec == 0) + pCtx->pDec = NULL; + return iRet; + } + } + } + //calculate Colocated mv scaling factor for temporal direct prediction + if (pSh->eSliceType == B_SLICE && !pSh->iDirectSpatialMvPredFlag) + ComputeColocatedTemporalScaling (pCtx); + + if (iThreadCount > 1) { + if (iIdx == 0) { + memset (&pCtx->lastReadyHeightOffset[0][0], -1, LIST_A * MAX_REF_PIC_COUNT * sizeof (int16_t)); + SET_EVENT (&pThreadCtx->sSliceDecodeStart); + } + iRet = WelsDecodeAndConstructSlice (pCtx); + } else { + iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur); + } + + //Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case + if (iRet != ERR_NONE) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "DecodeCurrentAccessUnit() failed (%d) in frame: %d uiDId: %d uiQId: %d", + iRet, pSh->iFrameNum, iCurrIdD, iCurrIdQ); + bAllRefComplete = false; + HandleReferenceLostL0 (pCtx, pNalCur); + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + if (pCtx->iTotalNumMbRec == 0) + pCtx->pDec = NULL; + return iRet; + } + } + + if (iThreadCount <= 1 && bReconstructSlice) { + if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) { + pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false + return iRet; + } + } + if (bAllRefComplete && pCtx->eSliceType != I_SLICE) { + if (iThreadCount <= 1) { + if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) { + bAllRefComplete &= CheckRefPicturesComplete (pCtx); + } else { + bAllRefComplete = false; + } + } + } + } +#if defined (_DEBUG) && !defined (CODEC_FOR_TESTBED) + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "cur_frame : %d\tiCurrIdD : %d\n ", + dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFrameNum, iCurrIdD); +#endif//#if !CODEC_FOR_TESTBED + iLastIdD = iCurrIdD; + iLastIdQ = iCurrIdQ; + + //pNalUnitsList overflow. + ++ iIdx; + if (iIdx <= iEndIdx) { + pNalCur = pCurAu->pNalUnitsList[iIdx]; + } else { + pNalCur = NULL; + } + + if (pNalCur == NULL || + iLastIdD != pNalCur->sNalHeaderExt.uiDependencyId || + iLastIdQ != pNalCur->sNalHeaderExt.uiQualityId) + break; + } + + // Set the current dec picture complete flag. The flag will be reset when current picture need do ErrorCon. + pCtx->pDec->bIsComplete = bAllRefComplete; + if (!pCtx->pDec->bIsComplete) { // Ref pictures ECed, result in ECed + pCtx->iErrorCode |= dsDataErrorConcealed; + } + + // A dq layer decoded here +#if defined (_DEBUG) && !defined (CODEC_FOR_TESTBED) +#undef fprintf + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "POC: #%d, FRAME: #%d, D: %d, Q: %d, T: %d, P: %d, %d\n", + pSh->iPicOrderCntLsb, pSh->iFrameNum, iCurrIdD, iCurrIdQ, dq_cur->sLayerInfo.sNalHeaderExt.uiTemporalId, + dq_cur->sLayerInfo.sNalHeaderExt.uiPriorityId, dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iSliceQp); +#endif//#if !CODEC_FOR_TESTBED + + if (dq_cur->uiLayerDqId == kuiTargetLayerDqId) { + if (!pCtx->bInstantDecFlag) { + if (!pCtx->pParam->bParseOnly) { + //Do error concealment here + if ((NeedErrorCon (pCtx)) && (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE)) { + ImplementErrorCon (pCtx); + pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight; + pCtx->pDec->iSpsId = pCtx->pSps->iSpsId; + pCtx->pDec->iPpsId = pCtx->pPps->iPpsId; + } + } + } + + if (iThreadCount >= 1) { + int32_t id = pThreadCtx->sThreadInfo.uiThrNum; + for (int32_t i = 0; i < iThreadCount; ++i) { + if (i == id || pThreadCtx[i - id].pCtx->uiDecodingTimeStamp == 0) continue; + if (pThreadCtx[i - id].pCtx->uiDecodingTimeStamp < pCtx->uiDecodingTimeStamp) { + WAIT_EVENT (&pThreadCtx[i - id].sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE); + } + } + pCtx->pLastDecPicInfo->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp; + } + iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo); + if (iRet) { + if (iThreadCount > 1) { + SET_EVENT (&pThreadCtx->sSliceDecodeFinish); + } + return iRet; + } + + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC + pCtx->bUsedAsRef = pCtx->uiNalRefIdc > 0; + if (iThreadCount <= 1) { + if (pCtx->bUsedAsRef) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + uint32_t i = 0; + while (i < MAX_DPB_COUNT && pCtx->sRefPic.pRefList[listIdx][i]) { + pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i]; + ++i; + } + } + iRet = WelsMarkAsRef (pCtx); + if (iRet != ERR_NONE) { + if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM) + pCtx->iErrorCode |= dsBitstreamError; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->pDec = NULL; + return iRet; + } + } + if (!pCtx->pParam->bParseOnly) + ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel, + pCtx->pDec->iLinesize, + pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture); + } + } else if (iThreadCount > 1) { + SET_EVENT (&pThreadCtx->sImageReady); + } + pCtx->pDec = NULL; //after frame decoding, always set to NULL + } + + // need update frame_num due current frame is well decoded + if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0) + pCtx->pLastDecPicInfo->iPrevFrameNum = pSh->iFrameNum; + if (pCtx->pLastDecPicInfo->bLastHasMmco5) + pCtx->pLastDecPicInfo->iPrevFrameNum = 0; + if (iThreadCount > 1) { + int32_t id = pThreadCtx->sThreadInfo.uiThrNum; + for (int32_t i = 0; i < iThreadCount; ++i) { + if (pThreadCtx[i - id].pCtx != NULL) { + unsigned long long uiTimeStamp = pThreadCtx[i - id].pCtx->uiTimeStamp; + if (uiTimeStamp > 0 && pThreadCtx[i - id].pCtx->sSpsPpsCtx.iSeqId > pCtx->sSpsPpsCtx.iSeqId) { + CopySpsPps (pThreadCtx[i - id].pCtx, pCtx); + if (pCtx->pPicBuff != pThreadCtx[i - id].pCtx->pPicBuff) { + pCtx->pPicBuff = pThreadCtx[i - id].pCtx->pPicBuff; + } + InitialDqLayersContext (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); + break; + } + } + } + } + if (iThreadCount > 1) { + SET_EVENT (&pThreadCtx->sSliceDecodeFinish); + } + } + return ERR_NONE; +} + +bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) { + PAccessUnit pAu = pCtx->pAccessUnitList; + bool bAuBoundaryFlag = false; + if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data + PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos]; + bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0) + && (CheckAccessUnitBoundaryExt (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt, + &pCtx->pLastDecPicInfo->sLastSliceHeader, + &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader)); + } else { //non VCL + if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) { + bAuBoundaryFlag = true; + } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) { + bAuBoundaryFlag = true; + } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) { + bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS); + } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) { + bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS); + } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) { + bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS); + } + if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first + ConstructAccessUnit (pCtx, ppDst, pDstInfo); + } + } + + //Do Error Concealment here + if (bAuBoundaryFlag && (pCtx->iTotalNumMbRec != 0) && NeedErrorCon (pCtx)) { //AU ready but frame not completely reconed + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + ImplementErrorCon (pCtx); + pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight; + pCtx->pDec->iSpsId = pCtx->pSps->iSpsId; + pCtx->pDec->iPpsId = pCtx->pPps->iPpsId; + + DecodeFrameConstruction (pCtx, ppDst, pDstInfo); + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use + if (pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) { + if (MarkECFrameAsRef (pCtx) == ERR_INFO_INVALID_PTR) { + pCtx->iErrorCode |= dsRefListNullPtrs; + return false; + } + } + } else if (pCtx->pParam->bParseOnly) { //clear parse only internal data status + pCtx->pParserBsInfo->iNalNum = 0; + pCtx->bFrameFinish = true; //clear frame pending status here! + } else { + if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) { + if ((pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) + && (pCtx->pLastDecPicInfo->sLastNalHdrExt.uiTemporalId == 0)) + pCtx->iErrorCode |= dsNoParamSets; + else + pCtx->iErrorCode |= dsBitstreamError; + pCtx->pDec = NULL; + return false; + } + } + pCtx->pDec = NULL; + if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0) + pCtx->pLastDecPicInfo->iPrevFrameNum = pCtx->pLastDecPicInfo->sLastSliceHeader.iFrameNum; //save frame_num + if (pCtx->pLastDecPicInfo->bLastHasMmco5) + pCtx->pLastDecPicInfo->iPrevFrameNum = 0; + } + return ERR_NONE; +} + +bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) { + // Multi Reference, RefIdx may differ + bool bAllRefComplete = true; + int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice; + for (int32_t iMbIdx = 0; bAllRefComplete + && iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) { + switch (pCtx->pCurDqLayer->pDec->pMbType[iRealMbIdx]) { + case MB_TYPE_SKIP: + case MB_TYPE_16x16: + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + break; + + case MB_TYPE_16x8: + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete; + break; + + case MB_TYPE_8x16: + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete; + break; + + case MB_TYPE_8x8: + case MB_TYPE_8x8_REF0: + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete; + break; + + default: + break; + } + iRealMbIdx = (pCtx->pPps->uiNumSliceGroups > 1) ? FmoNextMb (pCtx->pFmo, iRealMbIdx) : + (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice + iMbIdx); + if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb() + return false; + } + + return bAllRefComplete; +} +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder_data_tables.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder_data_tables.cpp new file mode 100644 index 000000000..84ba73566 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/decoder_data_tables.cpp @@ -0,0 +1,568 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +// exp_data.c +// export date cross various modules (.c) +#include "wels_common_basis.h" +#include "mb_cache.h" +#include "vlc_decoder.h" + +namespace WelsDec { + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +const uint8_t g_kuiScan4[16] = { //for mb cache in sMb (only current element, without neighbor) + // 4*4block scan mb cache order + 0, 1, 4, 5, // 0 1 | 4 5 0 1 | 2 3 + 2, 3, 6, 7, // 2 3 | 6 7 4 5 | 6 7 + 8, 9, 12, 13, //----------------->----------- + 10, 11, 14, 15 // 8 9 |12 13 8 9 |10 11 +}; //10 11 |14 15 12 13 |14 15 + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// extern at wels_common_basis.h + +/*common use table*/ +const uint8_t g_kMbNonZeroCountIdx[24] = { + // 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8] + 0, 1, 4, 5, // 2 3 | 6 7 0 | 1 0 1 2 3 + 2, 3, 6, 7, //--------------- --------- 4 5 6 7 + 8, 9, 12, 13, // 8 9 | 12 13 2 | 3 8 9 10 11 + 10, 11, 14, 15, // 10 11 | 14 15-----------------------------> 12 13 14 15 + 16, 17, 20, 21, //---------------- chroma 8*8 block 16 17 18 19 + 18, 19, 22, 23 // 16 17 | 20 21 0 1 20 21 22 23 +}; +//cache element equal to 26 + +const uint8_t g_kCacheNzcScanIdx[4 * 4 + 4 + 4 + 3] = { + /* Luma */ + 9, 10, 17, 18, // 1+1*8, 2+1*8, 1+2*8, 2+2*8, + 11, 12, 19, 20, // 3+1*8, 4+1*8, 3+2*8, 4+2*8, + 25, 26, 33, 34, // 1+3*8, 2+3*8, 1+4*8, 2+4*8, + 27, 28, 35, 36, // 3+3*8, 4+3*8, 3+4*8, 4+4*8, + /* Cb */ + 14, 15, // 6+1*8, 7+1*8, + 22, 23, // 6+2*8, 7+2*8, + + /* Cr */ + 38, 39, // 6+4*8, 7+4*8, + 46, 47, // 6+5*8, 7+5*8, + /* Luma DC */ + 41, // 1+5*8 + /* Chroma DC */ + 42, 43 // 2+5*8, 3+5*8, +}; + +const uint8_t g_kCache26ScanIdx[16] = { //intra4*4_pred_mode and pNonZeroCount cache scan index, 4*4 block as basic unit + 6, 7, 11, 12, + 8, 9, 13, 14, + 16, 17, 21, 22, + 18, 19, 23, 24 +}; + +//cache element equal to 30 +const uint8_t g_kCache30ScanIdx[16] = { //mv or pRefIndex cache scan index, 4*4 block as basic unit + 7, 8, 13, 14, + 9, 10, 15, 16, + 19, 20, 25, 26, + 21, 22, 27, 28 +}; + +const uint8_t g_kNonZeroScanIdxC[4] = { //pNonZeroCount cache for chroma, 4*4 block as basic unit + 4, 5, + 7, 8 +}; + +const uint8_t g_kuiScan8[24] = { // [16 + 2*4] + 9, 10, 17, 18, // 1+1*8, 2+1*8, 1+2*8, 2+2*8, + 11, 12, 19, 20, // 3+1*8, 4+1*8, 3+2*8, 4+2*8, + 25, 26, 33, 34, // 1+3*8, 2+3*8, 1+4*8, 2+4*8, + 27, 28, 35, 36, // 3+3*8, 4+3*8, 3+4*8, 4+4*8, + 14, 15, // 6+1*8, 7+1*8, + 22, 23, // 6+2*8, 7+2*8, + 38, 39, // 6+4*8, 7+4*8, + 46, 47, // 6+5*8, 7+5*8, +}; + +const uint8_t g_kuiLumaDcZigzagScan[16] = { + 0, 16, 32, 128, // 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64, + 48, 64, 80, 96, // 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64, + 144, 160, 176, 192, // 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64, + 112, 208, 224, 240 // 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64, +}; + +const uint8_t g_kuiChromaDcScan[4] = { + 0, 16, 32, 48 +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +const uint8_t g_kuiIntra4x4CbpTable[48] = { + 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, //15 + 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, //31 + 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 //47 +}; + +const uint8_t g_kuiIntra4x4CbpTable400[16] = { + 15, 0, 7, 11, 13, 14, 3, 5, 10, 12, 1, 2, 4, 8, 6, 9 + +}; + +const uint8_t g_kuiInterCbpTable[48] = { + 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, //15 + 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, //31 + 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 //47 +}; + +const uint8_t g_kuiInterCbpTable400[16] = { + 0, 1, 2, 4, 8, 3, 5, 10, 12, 15, 7, 11, 13, 14, 6, 9 +}; + +const uint8_t g_kuiLeadingZeroTable[256] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// extern at vlc_decoder.h + +const uint8_t g_kuiVlcChromaTable[256][2] = { + {13, 7}, {13, 7}, {12, 8}, {11, 8}, {8, 7}, {8, 7}, {7, 7}, {7, 7}, {10, 6}, {10, 6}, {10, 6}, {10, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, //15 + { 3, 6}, { 3, 6}, { 3, 6}, { 3, 6}, {9, 6}, {9, 6}, {9, 6}, {9, 6}, { 4, 6}, { 4, 6}, { 4, 6}, { 4, 6}, {1, 6}, {1, 6}, {1, 6}, {1, 6}, //31 + { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, {5, 3}, {5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, {5, 3}, {5, 3}, //47 + { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, {5, 3}, {5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, {5, 3}, {5, 3}, //63 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, //79 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, //95 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, //111 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, //127 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, //143 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, //159 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, //175 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, //191 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, //207 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, //223 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, //239 + { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1}, {2, 1}, {2, 1}, {2, 1}, {2, 1} //255 +}; + +const uint8_t g_kuiVlcTable_0[256][2] += { //[0] means the index of vlc table, [1] means the length of vlc code [256] value means the value of 8bits + { 0, 0}, { 0, 0}, { 0, 0}, {0, 0}, {21, 8}, {12, 8}, {7, 8}, {3, 8}, {17, 7}, {17, 7}, {8, 7}, {8, 7}, {13, 6}, {13, 6}, {13, 6}, {13, 6}, //15 + { 4, 6}, { 4, 6}, { 4, 6}, {4, 6}, { 1, 6}, { 1, 6}, {1, 6}, {1, 6}, { 9, 5}, { 9, 5}, {9, 5}, {9, 5}, { 9, 5}, { 9, 5}, { 9, 5}, { 9, 5}, //31 + { 5, 3}, { 5, 3}, { 5, 3}, {5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, //47 + { 5, 3}, { 5, 3}, { 5, 3}, {5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, { 5, 3}, { 5, 3}, {5, 3}, {5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, //63 + { 2, 2}, { 2, 2}, { 2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //79 + { 2, 2}, { 2, 2}, { 2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //95 + { 2, 2}, { 2, 2}, { 2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //111 + { 2, 2}, { 2, 2}, { 2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, {2, 2}, {2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //127 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, //143 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, //159 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, //175 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, //191 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, //207 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, //223 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, //239 + { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} //255 +}; + +const uint8_t g_kuiVlcTable_0_0[256][2] = { // read 8 bits // for g_kuiVlcTable_0[0] //checked no error-- + { 0, 0}, { 0, 0}, {47, 7}, {47, 7}, {58, 8}, {60, 8}, {59, 8}, {54, 8}, {61, 8}, {56, 8}, {55, 8}, {50, 8}, {57, 8}, {52, 8}, {51, 8}, {46, 8}, //15 + {53, 7}, {53, 7}, {48, 7}, {48, 7}, {43, 7}, {43, 7}, {42, 7}, {42, 7}, {49, 7}, {49, 7}, {44, 7}, {44, 7}, {39, 7}, {39, 7}, {38, 7}, {38, 7}, //31 + {45, 6}, {45, 6}, {45, 6}, {45, 6}, {40, 6}, {40, 6}, {40, 6}, {40, 6}, {35, 6}, {35, 6}, {35, 6}, {35, 6}, {34, 6}, {34, 6}, {34, 6}, {34, 6}, //47 + {41, 6}, {41, 6}, {41, 6}, {41, 6}, {36, 6}, {36, 6}, {36, 6}, {36, 6}, {31, 6}, {31, 6}, {31, 6}, {31, 6}, {30, 6}, {30, 6}, {30, 6}, {30, 6}, //63 + {26, 5}, {26, 5}, {26, 5}, {26, 5}, {26, 5}, {26, 5}, {26, 5}, {26, 5}, {32, 5}, {32, 5}, {32, 5}, {32, 5}, {32, 5}, {32, 5}, {32, 5}, {32, 5}, //79 + {27, 5}, {27, 5}, {27, 5}, {27, 5}, {27, 5}, {27, 5}, {27, 5}, {27, 5}, {22, 5}, {22, 5}, {22, 5}, {22, 5}, {22, 5}, {22, 5}, {22, 5}, {22, 5}, //95 + {37, 5}, {37, 5}, {37, 5}, {37, 5}, {37, 5}, {37, 5}, {37, 5}, {37, 5}, {28, 5}, {28, 5}, {28, 5}, {28, 5}, {28, 5}, {28, 5}, {28, 5}, {28, 5}, //111 + {23, 5}, {23, 5}, {23, 5}, {23, 5}, {23, 5}, {23, 5}, {23, 5}, {23, 5}, {18, 5}, {18, 5}, {18, 5}, {18, 5}, {18, 5}, {18, 5}, {18, 5}, {18, 5}, //127 + {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, //143 + {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, {33, 3}, //159 + {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, //175 + {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, {24, 3}, //191 + {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, //207 + {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, {19, 3}, //223 + {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, //239 + {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3}, {14, 3} //255 +}; + +const uint8_t g_kuiVlcTable_0_1[4][2] = { // read 2 bits // for g_kuiVlcTable_0[1] //checked no error-- + {29, 2}, {20, 2}, {15, 2}, {10, 2} +}; + +const uint8_t g_kuiVlcTable_0_2[2][2] = { // read 1 bit // for g_kuiVlcTable_0[2] //checked no error-- + {25, 1}, {16, 1} +}; + +const uint8_t g_kuiVlcTable_0_3[2][2] = { // read 1 bit // for g_kuiVlcTable_0[3] //checked no error-- + {11, 1}, {6, 1} +}; + +const uint8_t g_kuiVlcTable_1[256][2] = { //checked no error-- + { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, {14, 8}, {20, 8}, {19, 8}, {10, 8}, {29, 7}, {29, 7}, {16, 7}, {16, 7}, {15, 7}, {15, 7}, { 6, 7}, { 6, 7}, //15 + {25, 6}, {25, 6}, {25, 6}, {25, 6}, {12, 6}, {12, 6}, {12, 6}, {12, 6}, {11, 6}, {11, 6}, {11, 6}, {11, 6}, { 3, 6}, { 3, 6}, { 3, 6}, { 3, 6}, //31 + {21, 6}, {21, 6}, {21, 6}, {21, 6}, { 8, 6}, { 8, 6}, { 8, 6}, { 8, 6}, { 7, 6}, { 7, 6}, { 7, 6}, { 7, 6}, { 1, 6}, { 1, 6}, { 1, 6}, { 1, 6}, //47 + {17, 5}, {17, 5}, {17, 5}, {17, 5}, {17, 5}, {17, 5}, {17, 5}, {17, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, //63 + {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, //79 + { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, //95 + { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, //111 + { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, //127 + { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //143 + { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //159 + { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //175 + { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, //191 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, //207 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, //223 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, //239 + { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} //255 + +}; + +const uint8_t g_kuiVlcTable_1_0[64][2] = { // read 6 bits // for g_kuiVlcTable_1[0] //checked no error-- + { 0, 0}, { 0, 0}, {57, 5}, {57, 5}, {61, 6}, {60, 6}, {59, 6}, {58, 6}, {55, 6}, {54, 6}, {56, 6}, {51, 6}, {52, 5}, {52, 5}, {50, 5}, {50, 5}, //15 + {53, 5}, {53, 5}, {48, 5}, {48, 5}, {47, 5}, {47, 5}, {46, 5}, {46, 5}, {49, 5}, {49, 5}, {44, 5}, {44, 5}, {43, 5}, {43, 5}, {42, 5}, {42, 5}, //31 + {38, 4}, {38, 4}, {38, 4}, {38, 4}, {40, 4}, {40, 4}, {40, 4}, {40, 4}, {39, 4}, {39, 4}, {39, 4}, {39, 4}, {34, 4}, {34, 4}, {34, 4}, {34, 4}, //47 + {45, 4}, {45, 4}, {45, 4}, {45, 4}, {36, 4}, {36, 4}, {36, 4}, {36, 4}, {35, 4}, {35, 4}, {35, 4}, {35, 4}, {30, 4}, {30, 4}, {30, 4}, {30, 4} //63 +}; + +const uint8_t g_kuiVlcTable_1_1[8][2] = { // read 3 bits // for g_kuiVlcTable_1[1] //checked no error-- + {41, 3}, {32, 3}, {31, 3}, {26, 3}, {37, 3}, {28, 3}, {27, 3}, {22, 3} +}; + +const uint8_t g_kuiVlcTable_1_2[2][2] = { // read 1 bit // for g_kuiVlcTable_1[2] //checked no error-- + {33, 1}, {24, 1} +}; + +const uint8_t g_kuiVlcTable_1_3[2][2] = { // read 1 bit // for g_kuiVlcTable_1[3] //checked no error-- + {23, 1}, {18, 1} +}; + +const uint8_t g_kuiVlcTable_2[256][2] = { //checked no error-- + { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, {45, 8}, {40, 8}, {35, 8}, {30, 8}, {41, 8}, {36, 8}, {31, 8}, {26, 8}, //15 + {22, 7}, {22, 7}, {18, 7}, {18, 7}, {32, 7}, {32, 7}, {14, 7}, {14, 7}, {37, 7}, {37, 7}, {28, 7}, {28, 7}, {27, 7}, {27, 7}, {10, 7}, {10, 7}, //31 + { 6, 6}, { 6, 6}, { 6, 6}, { 6, 6}, {24, 6}, {24, 6}, {24, 6}, {24, 6}, {23, 6}, {23, 6}, {23, 6}, {23, 6}, { 3, 6}, { 3, 6}, { 3, 6}, { 3, 6}, //47 + {33, 6}, {33, 6}, {33, 6}, {33, 6}, {20, 6}, {20, 6}, {20, 6}, {20, 6}, {19, 6}, {19, 6}, {19, 6}, {19, 6}, { 1, 6}, { 1, 6}, { 1, 6}, { 1, 6}, //63 + {15, 5}, {15, 5}, {15, 5}, {15, 5}, {15, 5}, {15, 5}, {15, 5}, {15, 5}, {16, 5}, {16, 5}, {16, 5}, {16, 5}, {16, 5}, {16, 5}, {16, 5}, {16, 5}, //79 + {11, 5}, {11, 5}, {11, 5}, {11, 5}, {11, 5}, {11, 5}, {11, 5}, {11, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, //95 + { 7, 5}, { 7, 5}, { 7, 5}, { 7, 5}, { 7, 5}, { 7, 5}, { 7, 5}, { 7, 5}, {29, 5}, {29, 5}, {29, 5}, {29, 5}, {29, 5}, {29, 5}, {29, 5}, {29, 5}, //111 + { 8, 5}, { 8, 5}, { 8, 5}, { 8, 5}, { 8, 5}, { 8, 5}, { 8, 5}, { 8, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, { 4, 5}, //127 + {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, {25, 4}, //143 + {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, {21, 4}, //159 + {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, {17, 4}, //175 + {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, {13, 4}, //191 + { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, { 9, 4}, //207 + { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, { 5, 4}, //223 + { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, //239 + { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4}, { 0, 4} //255 +}; + +const uint8_t g_kuiVlcTable_2_0[4][2] = { // read 2 bits // for g_kuiVlcTable_2[0] //checked + {0, 0}, {58, 2}, {61, 2}, {60, 2} +}; + + +const uint8_t g_kuiVlcTable_2_1[4][2] = { // read 2 bits // for g_kuiVlcTable_2[1] //checked + {59, 2}, {54, 2}, {57, 2}, {56, 2} +}; + +const uint8_t g_kuiVlcTable_2_2[4][2] = { // read 2 bits // for g_kuiVlcTable_2[2] //checked + {55, 2}, {50, 2}, {53, 2}, {52, 2} +}; + +const uint8_t g_kuiVlcTable_2_3[4][2] = { // read 2 bits // for g_kuiVlcTable_2[3] //checked + {51, 2}, {46, 2}, {47, 1}, {47, 1} +}; + +const uint8_t g_kuiVlcTable_2_4[2][2] = { // read 1 bit // for g_kuiVlcTable_2[4] //checked + {42, 1}, {48, 1} +}; + +const uint8_t g_kuiVlcTable_2_5[2][2] = { // read 1 bit // for g_kuiVlcTable_2[5] //checked + {43, 1}, {38, 1} +}; + +const uint8_t g_kuiVlcTable_2_6[2][2] = { // read 1 bit // for g_kuiVlcTable_2[6] //checked no error-- + {49, 1}, {44, 1} +}; + +const uint8_t g_kuiVlcTable_2_7[2][2] = { // read 1 bit // for g_kuiVlcTable_2[7] //checked no error-- + {39, 1}, {34, 1} +}; + +const uint8_t g_kuiVlcTable_3[64][2] = { // read 6 bits //corrected + { 1, 6}, { 2, 6}, { 0, 0}, { 0, 6}, { 3, 6}, { 4, 6}, { 5, 6}, { 0, 0}, { 6, 6}, { 7, 6}, { 8, 6}, { 9, 6}, {10, 6}, {11, 6}, {12, 6}, {13, 6}, //15 + {14, 6}, {15, 6}, {16, 6}, {17, 6}, {18, 6}, {19, 6}, {20, 6}, {21, 6}, {22, 6}, {23, 6}, {24, 6}, {25, 6}, {26, 6}, {27, 6}, {28, 6}, {29, 6}, //31 + {30, 6}, {31, 6}, {32, 6}, {33, 6}, {34, 6}, {35, 6}, {36, 6}, {37, 6}, {38, 6}, {39, 6}, {40, 6}, {41, 6}, {42, 6}, {43, 6}, {44, 6}, {45, 6}, //47 + {46, 6}, {47, 6}, {48, 6}, {49, 6}, {50, 6}, {51, 6}, {52, 6}, {53, 6}, {54, 6}, {55, 6}, {56, 6}, {57, 6}, {58, 6}, {59, 6}, {60, 6}, {61, 6}, //63 +}; + + +const uint8_t g_kuiVlcTableNeedMoreBitsThread[3] = { + 4, 4, 8 +}; + +const uint8_t g_kuiVlcTableMoreBitsCount0[4] = { + 8, 2, 1, 1 +}; + +const uint8_t g_kuiVlcTableMoreBitsCount1[4] = { + 6, 3, 1, 1 +}; + +const uint8_t g_kuiVlcTableMoreBitsCount2[8] = { + 2, 2, 2, 2, 1, 1, 1, 1 +}; + +const uint8_t g_kuiNcMapTable[17] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 +}; + + +const uint8_t g_kuiVlcTrailingOneTotalCoeffTable[62][2] = { + {0, 0}, + {0, 1}, {1, 1}, + {0, 2}, {1, 2}, {2, 2}, + {0, 3}, {1, 3}, {2, 3}, {3, 3}, + {0, 4}, {1, 4}, {2, 4}, {3, 4}, + {0, 5}, {1, 5}, {2, 5}, {3, 5}, + {0, 6}, {1, 6}, {2, 6}, {3, 6}, + {0, 7}, {1, 7}, {2, 7}, {3, 7}, + {0, 8}, {1, 8}, {2, 8}, {3, 8}, + {0, 9}, {1, 9}, {2, 9}, {3, 9}, + {0, 10}, {1, 10}, {2, 10}, {3, 10}, + {0, 11}, {1, 11}, {2, 11}, {3, 11}, + {0, 12}, {1, 12}, {2, 12}, {3, 12}, + {0, 13}, {1, 13}, {2, 13}, {3, 13}, + {0, 14}, {1, 14}, {2, 14}, {3, 14}, + {0, 15}, {1, 15}, {2, 15}, {3, 15}, + {0, 16}, {1, 16}, {2, 16}, {3, 16} +}; + +const uint8_t g_kuiTotalZerosTable0[512][2] += { //read 9 bits, generated by tzVlcIndex=1 in Table 9-7 in H.264/AVC standard + {0, 0}, {15, 9}, {14, 9}, {13, 9}, {12, 8}, {12, 8}, {11, 8}, {11, 8}, {10, 7}, {10, 7}, {10, 7}, {10, 7}, {9, 7}, {9, 7}, {9, 7}, {9, 7}, //15 + {8, 6}, { 8, 6}, { 8, 6}, { 8, 6}, { 8, 6}, { 8, 6}, { 8, 6}, { 8, 6}, { 7, 6}, { 7, 6}, { 7, 6}, { 7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, //31 + {6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, { 6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, //47 + {5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, { 5, 5}, {5, 5}, {5, 5}, {5, 5}, {5, 5}, //63 + {4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, //79 + {4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, //95 + {3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, {3, 4}, {3, 4}, {3, 4}, {3, 4}, //111 + {3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, { 3, 4}, {3, 4}, {3, 4}, {3, 4}, {3, 4}, //127 + {2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, //143 + {2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, //159 + {2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, //175 + {2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, //191 + {1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, //207 + {1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, //223 + {1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, //239 + {1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, //255 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //271 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //287 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //303 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //319 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //335 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //351 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //367 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //383 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //399 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //415 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //431 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //447 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //463 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //479 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, //495 + {0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} //511 +}; + +const uint8_t g_kuiTotalZerosTable1[64][2] += { //read 6 bits, generated by tzVlcIndex=2 in Table 9-7 in H.264/AVC standard + {14, 6}, {13, 6}, {12, 6}, {11, 6}, {10, 5}, {10, 5}, {9, 5}, {9, 5}, {8, 4}, {8, 4}, {8, 4}, {8, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, //15 + { 6, 4}, { 6, 4}, { 6, 4}, { 6, 4}, { 5, 4}, { 5, 4}, {5, 4}, {5, 4}, {4, 3}, {4, 3}, {4, 3}, {4, 3}, {4, 3}, {4, 3}, {4, 3}, {4, 3}, //31 + { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, {3, 3}, {3, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, //47 + { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, { 1, 3}, {1, 3}, {1, 3}, {0, 3}, {0, 3}, {0, 3}, {0, 3}, {0, 3}, {0, 3}, {0, 3}, {0, 3} //63 +}; + +const uint8_t g_kuiTotalZerosTable2[64][2] += { //read 6 bits, generated by tzVlcIndex=3 in Table 9-7 in H.264/AVC standard + {13, 6}, {11, 6}, {12, 5}, {12, 5}, {10, 5}, {10, 5}, {9, 5}, {9, 5}, {8, 4}, {8, 4}, {8, 4}, {8, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, //15 + { 4, 4}, { 4, 4}, { 4, 4}, { 4, 4}, { 0, 4}, { 0, 4}, {0, 4}, {0, 4}, {7, 3}, {7, 3}, {7, 3}, {7, 3}, {7, 3}, {7, 3}, {7, 3}, {7, 3}, //31 + { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, {6, 3}, {6, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, //47 + { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3} //63 +}; + +const uint8_t g_kuiTotalZerosTable3[32][2] += { //read 5 bits, generated by tzVlcIndex=4 in Table 9-7 in H.264/AVC standard + {12, 5}, {11, 5}, {10, 5}, {0, 5}, {9, 4}, {9, 4}, {7, 4}, {7, 4}, {3, 4}, {3, 4}, {2, 4}, {2, 4}, {8, 3}, {8, 3}, {8, 3}, {8, 3}, //15 + { 6, 3}, { 6, 3}, { 6, 3}, {6, 3}, {5, 3}, {5, 3}, {5, 3}, {5, 3}, {4, 3}, {4, 3}, {4, 3}, {4, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, //31 +}; + +const uint8_t g_kuiTotalZerosTable4[32][2] += { //read 5 bits, generated by tzVlcIndex=5 in Table 9-7 in H.264/AVC standard + {11, 5}, { 9, 5}, {10, 4}, {10, 4}, { 8, 4}, { 8, 4}, { 2, 4}, { 2, 4}, { 1, 4}, { 1, 4}, { 0, 4}, { 0, 4}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, //15 + { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3} //31 +}; + +const uint8_t g_kuiTotalZerosTable5[64][2] += { //read 6 bits, generated by tzVlcIndex=6 in Table 9-7 in H.264/AVC standard + {10, 6}, { 0, 6}, { 1, 5}, { 1, 5}, { 8, 4}, { 8, 4}, { 8, 4}, { 8, 4}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, //15 + { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, //31 + { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, //47 + { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3} //63 +}; + +const uint8_t g_kuiTotalZerosTable6[64][2] += { //read 6 bits, generated by tzVlcIndex=7 in Table 9-7 in H.264/AVC standard + { 9, 6}, { 0, 6}, { 1, 5}, { 1, 5}, { 7, 4}, { 7, 4}, { 7, 4}, { 7, 4}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, //15 + { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, { 4, 3}, //31 + { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, //47 + { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2} //63 +}; + +const uint8_t g_kuiTotalZerosTable7[64][2] += { //read 6 bits, generated by tzVlcIndex=8 in Table 9-7 in H.264/AVC standard + { 8, 6}, { 0, 6}, { 2, 5}, { 2, 5}, { 1, 4}, { 1, 4}, { 1, 4}, { 1, 4}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, { 7, 3}, //15 + { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 6, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, { 3, 3}, //31 + { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, //47 + { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2} //63 +}; + +const uint8_t g_kuiTotalZerosTable8[64][2] += { //read 6 bits, generated by tzVlcIndex=9 in Table 9-7 in H.264/AVC standard + { 1, 6}, { 0, 6}, { 7, 5}, { 7, 5}, { 2, 4}, { 2, 4}, { 2, 4}, { 2, 4}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, { 5, 3}, //15 + { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, //31 + { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, //47 + { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2} //63 +}; + +const uint8_t g_kuiTotalZerosTable9[32][2] += { //read 5 bits, generated by tzVlcIndex=10 in Table 9-7 in H.264/AVC standard + { 1, 5}, { 0, 5}, { 6, 4}, { 6, 4}, { 2, 3}, { 2, 3}, { 2, 3}, { 2, 3}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, { 5, 2}, //15 + { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 4, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2}, { 3, 2} //31 +}; + +const uint8_t g_kuiTotalZerosTable10[16][2] += { //read 4 bits, generated by tzVlcIndex=11 in Table 9-7 in H.264/AVC standard + { 0, 4}, { 1, 4}, { 2, 3}, { 2, 3}, { 3, 3}, { 3, 3}, { 5, 3}, { 5, 3}, { 4, 1}, { 4, 1}, { 4, 1}, { 4, 1}, { 4, 1}, { 4, 1}, { 4, 1}, { 4, 1} //15 +}; + +const uint8_t g_kuiTotalZerosTable11[16][2] += { //read 4 bits, generated by tzVlcIndex=12 in Table 9-7 in H.264/AVC standard + { 0, 4}, { 1, 4}, { 4, 3}, { 4, 3}, { 2, 2}, { 2, 2}, { 2, 2}, { 2, 2}, { 3, 1}, { 3, 1}, { 3, 1}, { 3, 1}, { 3, 1}, { 3, 1}, { 3, 1}, { 3, 1} //15 +}; + +const uint8_t g_kuiTotalZerosTable12[8][2] += { //read 3 bits, generated by tzVlcIndex=13 in Table 9-7 in H.264/AVC standard + { 0, 3}, { 1, 3}, { 3, 2}, { 3, 2}, { 2, 1}, { 2, 1}, { 2, 1}, { 2, 1} //8 +}; + +const uint8_t g_kuiTotalZerosTable13[4][2] += { //read 2 bits, generated by tzVlcIndex=14 in Table 9-7 in H.264/AVC standard + { 0, 2}, { 1, 2}, { 2, 1}, { 2, 1} +}; + +const uint8_t g_kuiTotalZerosTable14[2][2] += { //read 1 bits generated by tzVlcIndex=15 in Table 9-7 in H.264/AVC standard + { 0, 1}, { 1, 1} +}; + +const uint8_t g_kuiTotalZerosBitNumMap[15] = { + 9, 6, 6, 5, 5, 6, 6, 6, 6, 5, 4, 4, 3, 2, 1 +}; + + +const uint8_t g_kuiTotalZerosChromaTable0[8][2] += { //read 3 bits, generated by tzVlcIndex=1 in Table 9-9(a) in H.264/AVC standard + { 3, 3}, { 2, 3}, { 1, 2}, { 1, 2}, { 0, 1}, { 0, 1}, { 0, 1}, { 0, 1} +}; + +const uint8_t g_kuiTotalZerosChromaTable1[4][2] += { //read 2 bits, generated by tzVlcIndex=2 in Table 9-9(a) in H.264/AVC standard + { 2, 2}, { 1, 2}, { 0, 1}, { 0, 1} +}; + +const uint8_t g_kuiTotalZerosChromaTable2[2][2] += { //read 1 bits, generated by tzVlcIndex=3 in Table 9-9(a) in H.264/AVC standard + { 1, 1}, { 0, 1} +}; + +const uint8_t g_kuiTotalZerosBitNumChromaMap[3] = { + 3, 2, 1 +}; + +const uint8_t g_kuiZeroLeftTable0[2][2] = { //read 1 bits + {1, 1}, {0, 1} +}; + +const uint8_t g_kuiZeroLeftTable1[4][2] = { //read 2 bits + {2, 2}, {1, 2}, {0, 1}, {0, 1} +}; + +const uint8_t g_kuiZeroLeftTable2[4][2] = { //read 2 bits + {3, 2}, {2, 2}, {1, 2}, {0, 2} +}; + +const uint8_t g_kuiZeroLeftTable3[8][2] = { //read 3 bits + {4, 3}, {3, 3}, {2, 2}, {2, 2}, {1, 2}, {1, 2}, {0, 2}, {0, 2} +}; + +const uint8_t g_kuiZeroLeftTable4[8][2] = { //read 3 bits + {5, 3}, {4, 3}, {3, 3}, {2, 3}, {1, 2}, {1, 2}, {0, 2}, {0, 2} +}; + +const uint8_t g_kuiZeroLeftTable5[8][2] = { //read 3 bits + {1, 3}, {2, 3}, {4, 3}, {3, 3}, {6, 3}, {5, 3}, {0, 2}, {0, 2} +}; + +const uint8_t g_kuiZeroLeftTable6[8][2] = { //read 3 bits + {7, 3}, {6, 3}, {5, 3}, {4, 3}, {3, 3}, {2, 3}, {1, 3}, {0, 3} +}; + +const uint8_t g_kuiZeroLeftBitNumMap[16] = { + 0, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +}; + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp new file mode 100644 index 000000000..64da75469 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp @@ -0,0 +1,480 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * error_concealment.cpp: Wels decoder error concealment implementation + */ + +#include "error_code.h" +#include "expand_pic.h" +#include "manage_dec_ref.h" +#include "copy_mb.h" +#include "error_concealment.h" +#include "cpu_core.h" + +namespace WelsDec { +//Init +void InitErrorCon (PWelsDecoderContext pCtx) { + if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY) + || (pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY_CROSS_IDR) + || (pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_MV_COPY_CROSS_IDR) + || (pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE) + || (pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE)) { + if ((pCtx->pParam->eEcActiveIdc != ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE) + && (pCtx->pParam->eEcActiveIdc != ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE)) { + pCtx->bFreezeOutput = false; + } + pCtx->sCopyFunc.pCopyLumaFunc = WelsCopy16x16_c; + pCtx->sCopyFunc.pCopyChromaFunc = WelsCopy8x8_c; + +#if defined(X86_ASM) + if (pCtx->uiCpuFlag & WELS_CPU_MMXEXT) { + pCtx->sCopyFunc.pCopyChromaFunc = WelsCopy8x8_mmx; //aligned + } + + if (pCtx->uiCpuFlag & WELS_CPU_SSE2) { + pCtx->sCopyFunc.pCopyLumaFunc = WelsCopy16x16_sse2; //this is aligned copy; + } +#endif //X86_ASM + +#if defined(HAVE_NEON) + if (pCtx->uiCpuFlag & WELS_CPU_NEON) { + pCtx->sCopyFunc.pCopyLumaFunc = WelsCopy16x16_neon; //aligned + pCtx->sCopyFunc.pCopyChromaFunc = WelsCopy8x8_neon; //aligned + } +#endif //HAVE_NEON + +#if defined(HAVE_NEON_AARCH64) + if (pCtx->uiCpuFlag & WELS_CPU_NEON) { + pCtx->sCopyFunc.pCopyLumaFunc = WelsCopy16x16_AArch64_neon; //aligned + pCtx->sCopyFunc.pCopyChromaFunc = WelsCopy8x8_AArch64_neon; //aligned + } +#endif //HAVE_NEON_AARCH64 + } //TODO add more methods here + return; +} + +//Do error concealment using frame copy method +void DoErrorConFrameCopy (PWelsDecoderContext pCtx) { + PPicture pDstPic = pCtx->pDec; + PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb; + uint32_t uiHeightInPixelY = (pCtx->pSps->iMbHeight) << 4; + int32_t iStrideY = pDstPic->iLinesize[0]; + int32_t iStrideUV = pDstPic->iLinesize[1]; + pCtx->pDec->iMbEcedNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight; + if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_FRAME_COPY) && (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag)) + pSrcPic = NULL; //no cross IDR method, should fill in data instead of copy + if (pSrcPic == NULL) { //no ref pic, assign specific data to picture + memset (pDstPic->pData[0], 128, uiHeightInPixelY * iStrideY); + memset (pDstPic->pData[1], 128, (uiHeightInPixelY >> 1) * iStrideUV); + memset (pDstPic->pData[2], 128, (uiHeightInPixelY >> 1) * iStrideUV); + } else if (pSrcPic == pDstPic) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DoErrorConFrameCopy()::EC memcpy overlap."); + } else { //has ref pic here + memcpy (pDstPic->pData[0], pSrcPic->pData[0], uiHeightInPixelY * iStrideY); + memcpy (pDstPic->pData[1], pSrcPic->pData[1], (uiHeightInPixelY >> 1) * iStrideUV); + memcpy (pDstPic->pData[2], pSrcPic->pData[2], (uiHeightInPixelY >> 1) * iStrideUV); + } +} + + +//Do error concealment using slice copy method +void DoErrorConSliceCopy (PWelsDecoderContext pCtx) { + int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth; + int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight; + PPicture pDstPic = pCtx->pDec; + PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb; + if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY) && (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag)) + pSrcPic = NULL; //no cross IDR method, should fill in data instead of copy + + //uint8_t *pDstData[3], *pSrcData[3]; + bool* pMbCorrectlyDecodedFlag = pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag; + //Do slice copy late + int32_t iMbXyIndex; + uint8_t* pSrcData, *pDstData; + uint32_t iSrcStride; // = pSrcPic->iLinesize[0]; + uint32_t iDstStride = pDstPic->iLinesize[0]; + if (pSrcPic == pDstPic) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DoErrorConSliceCopy()::EC memcpy overlap."); + return; + } + for (int32_t iMbY = 0; iMbY < iMbHeight; ++iMbY) { + for (int32_t iMbX = 0; iMbX < iMbWidth; ++iMbX) { + iMbXyIndex = iMbY * iMbWidth + iMbX; + if (!pMbCorrectlyDecodedFlag[iMbXyIndex]) { + pCtx->pDec->iMbEcedNum++; + if (pSrcPic != NULL) { + iSrcStride = pSrcPic->iLinesize[0]; + //Y component + pDstData = pDstPic->pData[0] + iMbY * 16 * iDstStride + iMbX * 16; + pSrcData = pSrcPic->pData[0] + iMbY * 16 * iSrcStride + iMbX * 16; + pCtx->sCopyFunc.pCopyLumaFunc (pDstData, iDstStride, pSrcData, iSrcStride); + //U component + pDstData = pDstPic->pData[1] + iMbY * 8 * iDstStride / 2 + iMbX * 8; + pSrcData = pSrcPic->pData[1] + iMbY * 8 * iSrcStride / 2 + iMbX * 8; + pCtx->sCopyFunc.pCopyChromaFunc (pDstData, iDstStride / 2, pSrcData, iSrcStride / 2); + //V component + pDstData = pDstPic->pData[2] + iMbY * 8 * iDstStride / 2 + iMbX * 8; + pSrcData = pSrcPic->pData[2] + iMbY * 8 * iSrcStride / 2 + iMbX * 8; + pCtx->sCopyFunc.pCopyChromaFunc (pDstData, iDstStride / 2, pSrcData, iSrcStride / 2); + } else { //pSrcPic == NULL + //Y component + pDstData = pDstPic->pData[0] + iMbY * 16 * iDstStride + iMbX * 16; + for (int32_t i = 0; i < 16; ++i) { + memset (pDstData, 128, 16); + pDstData += iDstStride; + } + //U component + pDstData = pDstPic->pData[1] + iMbY * 8 * iDstStride / 2 + iMbX * 8; + for (int32_t i = 0; i < 8; ++i) { + memset (pDstData, 128, 8); + pDstData += iDstStride / 2; + } + //V component + pDstData = pDstPic->pData[2] + iMbY * 8 * iDstStride / 2 + iMbX * 8; + for (int32_t i = 0; i < 8; ++i) { + memset (pDstData, 128, 8); + pDstData += iDstStride / 2; + } + } // + } //!pMbCorrectlyDecodedFlag[iMbXyIndex] + } //iMbX + } //iMbY +} + +//Do error concealment using slice MV copy method +void DoMbECMvCopy (PWelsDecoderContext pCtx, PPicture pDec, PPicture pRef, int32_t iMbXy, int32_t iMbX, int32_t iMbY, + sMCRefMember* pMCRefMem) { + if (pDec == pRef) { + return; // for protection, shall never go into this logic, error info printed outside. + } + int16_t iMVs[2]; + int32_t iMbXInPix = iMbX << 4; + int32_t iMbYInPix = iMbY << 4; + int32_t iScale0; + int32_t iScale1; + uint8_t* pDst[3]; + int32_t iCurrPoc = pDec->iFramePoc; + pDst[0] = pDec->pData[0] + iMbXInPix + iMbYInPix * pMCRefMem->iDstLineLuma; + pDst[1] = pDec->pData[1] + (iMbXInPix >> 1) + (iMbYInPix >> 1) * pMCRefMem->iDstLineChroma; + pDst[2] = pDec->pData[2] + (iMbXInPix >> 1) + (iMbYInPix >> 1) * pMCRefMem->iDstLineChroma; + if (pDec->bIdrFlag == true || pCtx->pECRefPic[0] == NULL) { + uint8_t* pSrcData; + //Y component + pSrcData = pMCRefMem->pSrcY + iMbY * 16 * pMCRefMem->iSrcLineLuma + iMbX * 16; + pCtx->sCopyFunc.pCopyLumaFunc (pDst[0], pMCRefMem->iDstLineLuma, pSrcData, pMCRefMem->iSrcLineLuma); + //U component + pSrcData = pMCRefMem->pSrcU + iMbY * 8 * pMCRefMem->iSrcLineChroma + iMbX * 8; + pCtx->sCopyFunc.pCopyChromaFunc (pDst[1], pMCRefMem->iDstLineChroma, pSrcData, pMCRefMem->iSrcLineChroma); + //V component + pSrcData = pMCRefMem->pSrcV + iMbY * 8 * pMCRefMem->iSrcLineChroma + iMbX * 8; + pCtx->sCopyFunc.pCopyChromaFunc (pDst[2], pMCRefMem->iDstLineChroma, pSrcData, pMCRefMem->iSrcLineChroma); + return; + } + + if (pCtx->pECRefPic[0]) { + if (pCtx->pECRefPic[0] == pRef) { + iMVs[0] = pCtx->iECMVs[0][0]; + iMVs[1] = pCtx->iECMVs[0][1]; + } else { + iScale0 = pCtx->pECRefPic[0]->iFramePoc - iCurrPoc; + iScale1 = pRef->iFramePoc - iCurrPoc; + iMVs[0] = iScale0 == 0 ? 0 : pCtx->iECMVs[0][0] * iScale1 / iScale0; + iMVs[1] = iScale0 == 0 ? 0 : pCtx->iECMVs[0][1] * iScale1 / iScale0; + } + pMCRefMem->pDstY = pDst[0]; + pMCRefMem->pDstU = pDst[1]; + pMCRefMem->pDstV = pDst[2]; + int32_t iFullMVx = (iMbXInPix << 2) + iMVs[0]; //quarter pixel + int32_t iFullMVy = (iMbYInPix << 2) + iMVs[1]; + // only use to be output pixels to EC; + int32_t iPicWidthLeftLimit = 0; + int32_t iPicHeightTopLimit = 0; + int32_t iPicWidthRightLimit = pMCRefMem->iPicWidth; + int32_t iPicHeightBottomLimit = pMCRefMem->iPicHeight; + if (pCtx->pSps->bFrameCroppingFlag) { + iPicWidthLeftLimit = 0 + pCtx->sFrameCrop.iLeftOffset * 2; + iPicWidthRightLimit = (pMCRefMem->iPicWidth - pCtx->sFrameCrop.iRightOffset * 2); + iPicHeightTopLimit = 0 + pCtx->sFrameCrop.iTopOffset * 2; + iPicHeightBottomLimit = (pMCRefMem->iPicHeight - pCtx->sFrameCrop.iTopOffset * 2); + } + // further make sure no need to expand picture + int32_t iMinLeftOffset = (iPicWidthLeftLimit + 2) * (1 << 2); + int32_t iMaxRightOffset = ((iPicWidthRightLimit - 18) * (1 << 2)); + int32_t iMinTopOffset = (iPicHeightTopLimit + 2) * (1 << 2); + int32_t iMaxBottomOffset = ((iPicHeightBottomLimit - 18) * (1 << 2)); + if (iFullMVx < iMinLeftOffset) { + iFullMVx = (iFullMVx >> 2) * (1 << 2); + iFullMVx = WELS_MAX (iPicWidthLeftLimit, iFullMVx); + } else if (iFullMVx > iMaxRightOffset) { + iFullMVx = (iFullMVx >> 2) * (1 << 2); + iFullMVx = WELS_MIN (((iPicWidthRightLimit - 16) * (1 << 2)), iFullMVx); + } + if (iFullMVy < iMinTopOffset) { + iFullMVy = (iFullMVy >> 2) * (1 << 2); + iFullMVy = WELS_MAX (iPicHeightTopLimit, iFullMVy); + } else if (iFullMVy > iMaxBottomOffset) { + iFullMVy = (iFullMVy >> 2) * (1 << 2); + iFullMVy = WELS_MIN (((iPicHeightBottomLimit - 16) * (1 << 2)), iFullMVy); + } + iMVs[0] = iFullMVx - (iMbXInPix << 2); + iMVs[1] = iFullMVy - (iMbYInPix << 2); + BaseMC (pCtx, pMCRefMem, -1, -1, iMbXInPix, iMbYInPix, &pCtx->sMcFunc, 16, 16, iMVs); + } + return; +} + +void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx) { + int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth; + int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight; + bool* pMbCorrectlyDecodedFlag = pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iInterMbCorrectNum[16]; + int32_t iMbXyIndex; + + int8_t iRefIdx; + memset (pCtx->iECMVs, 0, sizeof (int32_t) * 32); + memset (pCtx->pECRefPic, 0, sizeof (PPicture) * 16); + memset (iInterMbCorrectNum, 0, sizeof (int32_t) * 16); + + for (int32_t iMbY = 0; iMbY < iMbHeight; ++iMbY) { + for (int32_t iMbX = 0; iMbX < iMbWidth; ++iMbX) { + iMbXyIndex = iMbY * iMbWidth + iMbX; + if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pDec->pMbType[iMbXyIndex])) { + uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMbXyIndex]; + switch (iMBType) { + case MB_TYPE_SKIP: + case MB_TYPE_16x16: + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1]; + pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; + iInterMbCorrectNum[iRefIdx]++; + break; + case MB_TYPE_16x8: + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1]; + pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; + iInterMbCorrectNum[iRefIdx]++; + + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][8]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][1]; + pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; + iInterMbCorrectNum[iRefIdx]++; + break; + case MB_TYPE_8x16: + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1]; + pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; + iInterMbCorrectNum[iRefIdx]++; + + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][2]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][1]; + pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; + iInterMbCorrectNum[iRefIdx]++; + break; + case MB_TYPE_8x8: + case MB_TYPE_8x8_REF0: { + uint32_t iSubMBType; + int32_t i, j, iIIdx, iJIdx; + + for (i = 0; i < 4; i++) { + iSubMBType = pCurDqLayer->pSubMbType[iMbXyIndex][i]; + iIIdx = ((i >> 1) << 3) + ((i & 1) << 1); + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][iIIdx]; + pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; + switch (iSubMBType) { + case SUB_MB_TYPE_8x8: + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1]; + iInterMbCorrectNum[iRefIdx]++; + + break; + case SUB_MB_TYPE_8x4: + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1]; + + + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][1]; + iInterMbCorrectNum[iRefIdx] += 2; + + break; + case SUB_MB_TYPE_4x8: + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1]; + + + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][1]; + iInterMbCorrectNum[iRefIdx] += 2; + break; + case SUB_MB_TYPE_4x4: { + for (j = 0; j < 4; j++) { + iJIdx = ((j >> 1) << 2) + (j & 1); + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][1]; + } + iInterMbCorrectNum[iRefIdx] += 4; + } + break; + default: + break; + } + } + } + break; + default: + break; + } + } //pMbCorrectlyDecodedFlag[iMbXyIndex] + } //iMbX + } //iMbY + for (int32_t i = 0; i < 16; i++) { + if (iInterMbCorrectNum[i]) { + pCtx->iECMVs[i][0] = pCtx->iECMVs[i][0] / iInterMbCorrectNum[i]; + pCtx->iECMVs[i][1] = pCtx->iECMVs[i][1] / iInterMbCorrectNum[i]; + } + } +} + +void DoErrorConSliceMVCopy (PWelsDecoderContext pCtx) { + int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth; + int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight; + PPicture pDstPic = pCtx->pDec; + PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb; + + bool* pMbCorrectlyDecodedFlag = pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag; + int32_t iMbXyIndex; + uint8_t* pDstData; + uint32_t iDstStride = pDstPic->iLinesize[0]; + sMCRefMember sMCRefMem; + if (pSrcPic != NULL) { + sMCRefMem.iSrcLineLuma = pSrcPic->iLinesize[0]; + sMCRefMem.iSrcLineChroma = pSrcPic->iLinesize[1]; + sMCRefMem.pSrcY = pSrcPic->pData[0]; + sMCRefMem.pSrcU = pSrcPic->pData[1]; + sMCRefMem.pSrcV = pSrcPic->pData[2]; + sMCRefMem.iDstLineLuma = pDstPic->iLinesize[0]; + sMCRefMem.iDstLineChroma = pDstPic->iLinesize[1]; + sMCRefMem.iPicWidth = pDstPic->iWidthInPixel; + sMCRefMem.iPicHeight = pDstPic->iHeightInPixel; + if (pDstPic == pSrcPic) { + // output error info, EC will be ignored in DoMbECMvCopy + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DoErrorConSliceMVCopy()::EC memcpy overlap."); + return; + } + } + + for (int32_t iMbY = 0; iMbY < iMbHeight; ++iMbY) { + for (int32_t iMbX = 0; iMbX < iMbWidth; ++iMbX) { + iMbXyIndex = iMbY * iMbWidth + iMbX; + if (!pMbCorrectlyDecodedFlag[iMbXyIndex]) { + pCtx->pDec->iMbEcedNum++; + if (pSrcPic != NULL) { + DoMbECMvCopy (pCtx, pDstPic, pSrcPic, iMbXyIndex, iMbX, iMbY, &sMCRefMem); + } else { //pSrcPic == NULL + //Y component + pDstData = pDstPic->pData[0] + iMbY * 16 * iDstStride + iMbX * 16; + for (int32_t i = 0; i < 16; ++i) { + memset (pDstData, 128, 16); + pDstData += iDstStride; + } + //U component + pDstData = pDstPic->pData[1] + iMbY * 8 * iDstStride / 2 + iMbX * 8; + for (int32_t i = 0; i < 8; ++i) { + memset (pDstData, 128, 8); + pDstData += iDstStride / 2; + } + //V component + pDstData = pDstPic->pData[2] + iMbY * 8 * iDstStride / 2 + iMbX * 8; + for (int32_t i = 0; i < 8; ++i) { + memset (pDstData, 128, 8); + pDstData += iDstStride / 2; + } + } // + + } //!pMbCorrectlyDecodedFlag[iMbXyIndex] + } //iMbX + } //iMbY +} + +//Mark erroneous frame as Ref Pic into DPB +int32_t MarkECFrameAsRef (PWelsDecoderContext pCtx) { + int32_t iRet = WelsMarkAsRef (pCtx); + // Under EC mode, the ERR_INFO_DUPLICATE_FRAME_NUM does not need to be process + if (iRet != ERR_NONE) { + return iRet; + } + ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel, + pCtx->pDec->iLinesize, + pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture); + + return ERR_NONE; +} + +bool NeedErrorCon (PWelsDecoderContext pCtx) { + bool bNeedEC = false; + int32_t iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight; + for (int32_t i = 0; i < iMbNum; ++i) { + if (!pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag[i]) { + bNeedEC = true; + break; + } + } + return bNeedEC; +} + +// ImplementErrorConceal +// Do actual error concealment +void ImplementErrorCon (PWelsDecoderContext pCtx) { + if (ERROR_CON_DISABLE == pCtx->pParam->eEcActiveIdc) { + pCtx->iErrorCode |= dsBitstreamError; + return; + } else if ((ERROR_CON_FRAME_COPY == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_FRAME_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)) { + DoErrorConFrameCopy (pCtx); + } else if ((ERROR_CON_SLICE_COPY == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_SLICE_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)) { + DoErrorConSliceCopy (pCtx); + } else if ((ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)) { + GetAvilInfoFromCorrectMb (pCtx); + DoErrorConSliceMVCopy (pCtx); + } //TODO add other EC methods here in the future + pCtx->iErrorCode |= dsDataErrorConcealed; + pCtx->pDec->bIsComplete = false; // Set complete flag to false after do EC. +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/fmo.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/fmo.cpp new file mode 100644 index 000000000..efaa1f112 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/fmo.cpp @@ -0,0 +1,326 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file fmo.c + * + * \brief Flexible Macroblock Ordering implementation + * + * \date 2/4/2009 Created + * + ************************************************************************************* + */ + +#include "fmo.h" +#include "memory_align.h" +#include "error_code.h" + +namespace WelsDec { + +/*! + * \brief Generate MB allocated map for interleaved slice group (TYPE 0) + * + * \param pFmo fmo context + * \param pPps pps context + * + * \return 0 - successful; none 0 - failed + */ +static inline int32_t FmoGenerateMbAllocMapType0 (PFmo pFmo, PPps pPps) { + uint32_t uiNumSliceGroups = 0; + int32_t iMbNum = 0; + int32_t i = 0; + + WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pFmo || NULL == pPps)) + uiNumSliceGroups = pPps->uiNumSliceGroups; + iMbNum = pFmo->iCountMbNum; + WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pFmo->pMbAllocMap || iMbNum <= 0 + || uiNumSliceGroups > MAX_SLICEGROUP_IDS)) + + do { + uint8_t uiGroup = 0; + do { + const int32_t kiRunIdx = pPps->uiRunLength[uiGroup]; + int32_t j = 0; + do { + pFmo->pMbAllocMap[i + j] = uiGroup; + ++ j; + } while (j < kiRunIdx && i + j < iMbNum); + i += kiRunIdx; + ++ uiGroup; + } while (uiGroup < uiNumSliceGroups && i < iMbNum); + } while (i < iMbNum); + + return ERR_NONE; // well here +} + +/*! + * \brief Generate MB allocated map for dispersed slice group (TYPE 1) + * + * \param pFmo fmo context + * \param pPps pps context + * \param iMbWidth MB width + * + * \return 0 - successful; none 0 - failed + */ +static inline int32_t FmoGenerateMbAllocMapType1 (PFmo pFmo, PPps pPps, const int32_t kiMbWidth) { + uint32_t uiNumSliceGroups = 0; + int32_t iMbNum = 0; + int32_t i = 0; + WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pFmo || NULL == pPps)) + uiNumSliceGroups = pPps->uiNumSliceGroups; + iMbNum = pFmo->iCountMbNum; + WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pFmo->pMbAllocMap || iMbNum <= 0 || kiMbWidth == 0 + || uiNumSliceGroups > MAX_SLICEGROUP_IDS)) + + do { + pFmo->pMbAllocMap[i] = (uint8_t) (((i % kiMbWidth) + (((i / kiMbWidth) * uiNumSliceGroups) >> 1)) % uiNumSliceGroups); + ++ i; + } while (i < iMbNum); + + return ERR_NONE; // well here +} + +/*! + * \brief Generate MB allocated map for various type of slice group cases (TYPE 0, .., 6) + * + * \param pFmo fmo context + * \param pPps pps context + * \param kiMbWidth MB width + * \param kiMbHeight MB height + * + * \return 0 - successful; none 0 - failed + */ +static inline int32_t FmoGenerateSliceGroup (PFmo pFmo, const PPps kpPps, const int32_t kiMbWidth, + const int32_t kiMbHeight, CMemoryAlign* pMa) { + int32_t iNumMb = 0; + int32_t iErr = 0; + bool bResolutionChanged = false; + + // the cases we would not like + WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pFmo || NULL == kpPps)) + + iNumMb = kiMbWidth * kiMbHeight; + + if (0 == iNumMb) + return ERR_INFO_INVALID_PARAM; + + pMa->WelsFree (pFmo->pMbAllocMap, "_fmo->pMbAllocMap"); + pFmo->pMbAllocMap = (uint8_t*)pMa->WelsMallocz (iNumMb * sizeof (uint8_t), "_fmo->pMbAllocMap"); + WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, (NULL == pFmo->pMbAllocMap)) // out of memory + + pFmo->iCountMbNum = iNumMb; + + if (kpPps->uiNumSliceGroups < 2 && iNumMb > 0) { // only one slice group, exactly it is single slice based + memset (pFmo->pMbAllocMap, 0, iNumMb * sizeof (int8_t)); // for safe + + pFmo->iSliceGroupCount = 1; + + return ERR_NONE; + } + + if (bResolutionChanged || ((int32_t)kpPps->uiSliceGroupMapType != pFmo->iSliceGroupType) + || ((int32_t)kpPps->uiNumSliceGroups != pFmo->iSliceGroupCount)) { + switch (kpPps->uiSliceGroupMapType) { + case 0: + iErr = FmoGenerateMbAllocMapType0 (pFmo, kpPps); + break; + case 1: + iErr = FmoGenerateMbAllocMapType1 (pFmo, kpPps, kiMbWidth); + break; + case 2: + case 3: + case 4: + case 5: + case 6: + // Reserve for others slice group type + iErr = 1; + break; + default: + return ERR_INFO_UNSUPPORTED_FMOTYPE; + } + } + + if (0 == iErr) { // well now + pFmo->iSliceGroupCount = kpPps->uiNumSliceGroups; + pFmo->iSliceGroupType = kpPps->uiSliceGroupMapType; + } + + return iErr; +} + +/*! + * \brief Initialize Wels Flexible Macroblock Ordering (FMO) + * + * \param pFmo Wels fmo to be initialized + * \param pPps pps argument + * \param kiMbWidth mb width + * \param kiMbHeight mb height + * + * \return 0 - successful; none 0 - failed; + */ +int32_t InitFmo (PFmo pFmo, PPps pPps, const int32_t kiMbWidth, const int32_t kiMbHeight, CMemoryAlign* pMa) { + return FmoGenerateSliceGroup (pFmo, pPps, kiMbWidth, kiMbHeight, pMa); +} + + +/*! + * \brief Uninitialize Wels Flexible Macroblock Ordering (FMO) list + * + * \param pFmo Wels base fmo ptr to be uninitialized + * \param kiCnt count number of PPS per list + * \param kiAvail count available number of PPS in list + * + * \return NONE + */ +void UninitFmoList (PFmo pFmo, const int32_t kiCnt, const int32_t kiAvail, CMemoryAlign* pMa) { + PFmo pIter = pFmo; + int32_t i = 0; + int32_t iFreeNodes = 0; + + if (NULL == pIter || kiAvail <= 0 || kiCnt < kiAvail) + return; + + while (i < kiCnt) { + if (pIter != NULL && pIter->bActiveFlag) { + if (NULL != pIter->pMbAllocMap) { + pMa->WelsFree (pIter->pMbAllocMap, "pIter->pMbAllocMap"); + + pIter->pMbAllocMap = NULL; + } + pIter->iSliceGroupCount = 0; + pIter->iSliceGroupType = -1; + pIter->iCountMbNum = 0; + pIter->bActiveFlag = false; + ++ iFreeNodes; + if (iFreeNodes >= kiAvail) + break; + } + ++ pIter; + ++ i; + } +} + +/*! + * \brief detect parameter sets are changed or not + * + * \param pFmo fmo context + * \param kiCountNumMb (iMbWidth * iMbHeight) in Sps + * \param iSliceGroupType slice group type if fmo is exactly enabled + * \param iSliceGroupCount slice group count if fmo is exactly enabled + * + * \return true - changed or not initialized yet; false - not change at all + */ +bool FmoParamSetsChanged (PFmo pFmo, const int32_t kiCountNumMb, const int32_t kiSliceGroupType, + const int32_t kiSliceGroupCount) { + WELS_VERIFY_RETURN_IF (false, (NULL == pFmo)) + + return ((!pFmo->bActiveFlag) + || (kiCountNumMb != pFmo->iCountMbNum) + || (kiSliceGroupType != pFmo->iSliceGroupType) + || (kiSliceGroupCount != pFmo->iSliceGroupCount)); +} + +/*! + * \brief update/insert FMO parameter unit + * + * \param _fmo FMO context + * \param _sps PSps + * \param _pps PPps + * \param pActiveFmoNum int32_t* [in/out] + * + * \return true - update/insert successfully; false - failed; + */ +int32_t FmoParamUpdate (PFmo pFmo, PSps pSps, PPps pPps, int32_t* pActiveFmoNum, CMemoryAlign* pMa) { + const uint32_t kuiMbWidth = pSps->iMbWidth; + const uint32_t kuiMbHeight = pSps->iMbHeight; + int32_t iRet = ERR_NONE; + if (FmoParamSetsChanged (pFmo, kuiMbWidth * kuiMbHeight, pPps->uiSliceGroupMapType, pPps->uiNumSliceGroups)) { + iRet = InitFmo (pFmo, pPps, kuiMbWidth, kuiMbHeight, pMa); + WELS_VERIFY_RETURN_IF (iRet, iRet); + + if (!pFmo->bActiveFlag && *pActiveFmoNum < MAX_PPS_COUNT) { + ++ (*pActiveFmoNum); + pFmo->bActiveFlag = true; + } + } + return iRet; +} + +/*! + * \brief Convert kMbXy to slice group idc correspondingly + * + * \param pFmo Wels fmo context + * \param kMbXy kMbXy to be converted + * + * \return slice group idc - successful; -1 - failed; + */ +int32_t FmoMbToSliceGroup (PFmo pFmo, const MB_XY_T kiMbXy) { + const int32_t kiMbNum = pFmo->iCountMbNum; + const uint8_t* kpMbMap = pFmo->pMbAllocMap; + + if (kiMbXy < 0 || kiMbXy >= kiMbNum || kpMbMap == NULL) + return -1; + + return kpMbMap[ kiMbXy ]; +} + +/*! + * \brief Get successive mb to be processed with given current kMbXy + * + * \param pFmo Wels fmo context + * \param kMbXy current kMbXy + * + * \return iNextMb - successful; -1 - failed; + */ +MB_XY_T FmoNextMb (PFmo pFmo, const MB_XY_T kiMbXy) { + const int32_t kiTotalMb = pFmo->iCountMbNum; + const uint8_t* kpMbMap = pFmo->pMbAllocMap; + MB_XY_T iNextMb = kiMbXy; + const uint8_t kuiSliceGroupIdc = (uint8_t)FmoMbToSliceGroup (pFmo, kiMbXy); + + if (kuiSliceGroupIdc == (uint8_t) (-1)) + return -1; + + do { + ++ iNextMb; + if (iNextMb >= kiTotalMb) { + iNextMb = -1; + break; + } + if (kpMbMap[iNextMb] == kuiSliceGroupIdc) { + break; + } + } while (1); + + // -1: No further MB in this slice (could be end of picture) + return iNextMb; +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/get_intra_predictor.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/get_intra_predictor.cpp new file mode 100644 index 000000000..0ec74535b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/get_intra_predictor.cpp @@ -0,0 +1,1157 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file get_intra_predictor.c + * + * \brief implementation for get intra predictor about 16x16, 4x4, chroma. + * + * \date 4/2/2009 Created + * 9/14/2009 C level based optimization with high performance gained. + * [const, using ST32/ST64 to replace memset, memcpy and memmove etc.] + * + ************************************************************************************* + */ +#include + +#include "macros.h" +#include "ls_defines.h" +#include "get_intra_predictor.h" + +namespace WelsDec { + +#define I4x4_COUNT 4 +#define I8x8_COUNT 8 +#define I16x16_COUNT 16 + +void WelsI4x4LumaPredV_c (uint8_t* pPred, const int32_t kiStride) { + const uint32_t kuiVal = LD32A4 (pPred - kiStride); + + ST32A4 (pPred, kuiVal); + ST32A4 (pPred + kiStride, kuiVal); + ST32A4 (pPred + (kiStride << 1), kuiVal); + ST32A4 (pPred + (kiStride << 1) + kiStride, kuiVal); +} + +void WelsI4x4LumaPredH_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride2 + kiStride; + const uint32_t kuiL0 = 0x01010101U * pPred[-1 ]; + const uint32_t kuiL1 = 0x01010101U * pPred[-1 + kiStride ]; + const uint32_t kuiL2 = 0x01010101U * pPred[-1 + kiStride2]; + const uint32_t kuiL3 = 0x01010101U * pPred[-1 + kiStride3]; + + ST32A4 (pPred, kuiL0); + ST32A4 (pPred + kiStride, kuiL1); + ST32A4 (pPred + kiStride2, kuiL2); + ST32A4 (pPred + kiStride3, kuiL3); +} + +void WelsI4x4LumaPredDc_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride2 + kiStride; + const uint8_t kuiMean = (pPred[-1] + pPred[-1 + kiStride] + pPred[-1 + kiStride2] + pPred[-1 + kiStride3] + + pPred[-kiStride] + pPred[-kiStride + 1] + pPred[-kiStride + 2] + pPred[-kiStride + 3] + 4) >> 3; + const uint32_t kuiMean32 = 0x01010101U * kuiMean; + + ST32A4 (pPred, kuiMean32); + ST32A4 (pPred + kiStride, kuiMean32); + ST32A4 (pPred + kiStride2, kuiMean32); + ST32A4 (pPred + kiStride3, kuiMean32); +} + +void WelsI4x4LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride2 + kiStride; + const uint8_t kuiMean = (pPred[-1] + pPred[-1 + kiStride] + pPred[-1 + kiStride2] + pPred[-1 + kiStride3] + 2) >> 2; + const uint32_t kuiMean32 = 0x01010101U * kuiMean; + + ST32A4 (pPred, kuiMean32); + ST32A4 (pPred + kiStride, kuiMean32); + ST32A4 (pPred + kiStride2, kuiMean32); + ST32A4 (pPred + kiStride3, kuiMean32); +} + +void WelsI4x4LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride2 + kiStride; + const uint8_t kuiMean = (pPred[-kiStride] + pPred[-kiStride + 1] + pPred[-kiStride + 2] + pPred[-kiStride + 3] + 2) + >> 2; + const uint32_t kuiMean32 = 0x01010101U * kuiMean; + + ST32A4 (pPred, kuiMean32); + ST32A4 (pPred + kiStride, kuiMean32); + ST32A4 (pPred + kiStride2, kuiMean32); + ST32A4 (pPred + kiStride3, kuiMean32); +} + +void WelsI4x4LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride) { + const uint32_t kuiDC32 = 0x80808080U; + + ST32A4 (pPred, kuiDC32); + ST32A4 (pPred + kiStride, kuiDC32); + ST32A4 (pPred + (kiStride << 1), kuiDC32); + ST32A4 (pPred + (kiStride << 1) + kiStride, kuiDC32); +} + +/*down pLeft*/ +void WelsI4x4LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + /*get pTop*/ + uint8_t* ptop = &pPred[-kiStride]; + const uint8_t kuiT0 = *ptop; + const uint8_t kuiT1 = * (ptop + 1); + const uint8_t kuiT2 = * (ptop + 2); + const uint8_t kuiT3 = * (ptop + 3); + const uint8_t kuiT4 = * (ptop + 4); + const uint8_t kuiT5 = * (ptop + 5); + const uint8_t kuiT6 = * (ptop + 6); + const uint8_t kuiT7 = * (ptop + 7); + const uint8_t kuiDDL0 = (2 + kuiT0 + kuiT2 + (kuiT1 << 1)) >> 2; // kDDL0 + const uint8_t kuiDDL1 = (2 + kuiT1 + kuiT3 + (kuiT2 << 1)) >> 2; // kDDL1 + const uint8_t kuiDDL2 = (2 + kuiT2 + kuiT4 + (kuiT3 << 1)) >> 2; // kDDL2 + const uint8_t kuiDDL3 = (2 + kuiT3 + kuiT5 + (kuiT4 << 1)) >> 2; // kDDL3 + const uint8_t kuiDDL4 = (2 + kuiT4 + kuiT6 + (kuiT5 << 1)) >> 2; // kDDL4 + const uint8_t kuiDDL5 = (2 + kuiT5 + kuiT7 + (kuiT6 << 1)) >> 2; // kDDL5 + const uint8_t kuiDDL6 = (2 + kuiT6 + kuiT7 + (kuiT7 << 1)) >> 2; // kDDL6 + const uint8_t kuiList[8] = { kuiDDL0, kuiDDL1, kuiDDL2, kuiDDL3, kuiDDL4, kuiDDL5, kuiDDL6, 0 }; + + ST32A4 (pPred, LD32 (kuiList)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 1)); + ST32A4 (pPred + kiStride2, LD32 (kuiList + 2)); + ST32A4 (pPred + kiStride3, LD32 (kuiList + 3)); +} + +/*down pLeft*/ +void WelsI4x4LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + /*get pTop*/ + uint8_t* ptop = &pPred[-kiStride]; + const uint8_t kuiT0 = *ptop; + const uint8_t kuiT1 = * (ptop + 1); + const uint8_t kuiT2 = * (ptop + 2); + const uint8_t kuiT3 = * (ptop + 3); + const uint16_t kuiT01 = 1 + kuiT0 + kuiT1; + const uint16_t kuiT12 = 1 + kuiT1 + kuiT2; + const uint16_t kuiT23 = 1 + kuiT2 + kuiT3; + const uint16_t kuiT33 = 1 + (kuiT3 << 1); + const uint8_t kuiDLT0 = (kuiT01 + kuiT12) >> 2; // kDLT0 + const uint8_t kuiDLT1 = (kuiT12 + kuiT23) >> 2; // kDLT1 + const uint8_t kuiDLT2 = (kuiT23 + kuiT33) >> 2; // kDLT2 + const uint8_t kuiDLT3 = kuiT33 >> 1; // kDLT3 + const uint8_t kuiList[8] = { kuiDLT0, kuiDLT1, kuiDLT2, kuiDLT3, kuiDLT3, kuiDLT3, kuiDLT3, kuiDLT3 }; + + ST32A4 (pPred, LD32 (kuiList)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 1)); + ST32A4 (pPred + kiStride2, LD32 (kuiList + 2)); + ST32A4 (pPred + kiStride3, LD32 (kuiList + 3)); +} + + +/*down right*/ +void WelsI4x4LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + uint8_t* ptopleft = &pPred[- (kiStride + 1)]; + uint8_t* pleft = &pPred[-1]; + const uint8_t kuiLT = *ptopleft; + /*get pLeft and pTop*/ + const uint8_t kuiL0 = *pleft; + const uint8_t kuiL1 = * (pleft + kiStride); + const uint8_t kuiL2 = * (pleft + kiStride2); + const uint8_t kuiL3 = * (pleft + kiStride3); + const uint8_t kuiT0 = * (ptopleft + 1); + const uint8_t kuiT1 = * (ptopleft + 2); + const uint8_t kuiT2 = * (ptopleft + 3); + const uint8_t kuiT3 = * (ptopleft + 4); + const uint16_t kuiTL0 = 1 + kuiLT + kuiL0; + const uint16_t kuiLT0 = 1 + kuiLT + kuiT0; + const uint16_t kuiT01 = 1 + kuiT0 + kuiT1; + const uint16_t kuiT12 = 1 + kuiT1 + kuiT2; + const uint16_t kuiT23 = 1 + kuiT2 + kuiT3; + const uint16_t kuiL01 = 1 + kuiL0 + kuiL1; + const uint16_t kuiL12 = 1 + kuiL1 + kuiL2; + const uint16_t kuiL23 = 1 + kuiL2 + kuiL3; + const uint8_t kuiDDR0 = (kuiTL0 + kuiLT0) >> 2; // kuiDDR0 + const uint8_t kuiDDR1 = (kuiLT0 + kuiT01) >> 2; // kuiDDR1 + const uint8_t kuiDDR2 = (kuiT01 + kuiT12) >> 2; // kuiDDR2 + const uint8_t kuiDDR3 = (kuiT12 + kuiT23) >> 2; // kuiDDR3 + const uint8_t kuiDDR4 = (kuiTL0 + kuiL01) >> 2; // kuiDDR4 + const uint8_t kuiDDR5 = (kuiL01 + kuiL12) >> 2; // kuiDDR5 + const uint8_t kuiDDR6 = (kuiL12 + kuiL23) >> 2; // kuiDDR6 + const uint8_t kuiList[8] = { kuiDDR6, kuiDDR5, kuiDDR4, kuiDDR0, kuiDDR1, kuiDDR2, kuiDDR3, 0 }; + + ST32A4 (pPred, LD32 (kuiList + 3)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 2)); + ST32A4 (pPred + kiStride2, LD32 (kuiList + 1)); + ST32A4 (pPred + kiStride3, LD32 (kuiList)); +} + + +/*vertical pLeft*/ +void WelsI4x4LumaPredVL_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + uint8_t* ptopleft = &pPred[- (kiStride + 1)]; + /*get pTop*/ + const uint8_t kuiT0 = * (ptopleft + 1); + const uint8_t kuiT1 = * (ptopleft + 2); + const uint8_t kuiT2 = * (ptopleft + 3); + const uint8_t kuiT3 = * (ptopleft + 4); + const uint8_t kuiT4 = * (ptopleft + 5); + const uint8_t kuiT5 = * (ptopleft + 6); + const uint8_t kuiT6 = * (ptopleft + 7); + const uint16_t kuiT01 = 1 + kuiT0 + kuiT1; + const uint16_t kuiT12 = 1 + kuiT1 + kuiT2; + const uint16_t kuiT23 = 1 + kuiT2 + kuiT3; + const uint16_t kuiT34 = 1 + kuiT3 + kuiT4; + const uint16_t kuiT45 = 1 + kuiT4 + kuiT5; + const uint16_t kuiT56 = 1 + kuiT5 + kuiT6; + const uint8_t kuiVL0 = kuiT01 >> 1; // kuiVL0 + const uint8_t kuiVL1 = kuiT12 >> 1; // kuiVL1 + const uint8_t kuiVL2 = kuiT23 >> 1; // kuiVL2 + const uint8_t kuiVL3 = kuiT34 >> 1; // kuiVL3 + const uint8_t kuiVL4 = kuiT45 >> 1; // kuiVL4 + const uint8_t kuiVL5 = (kuiT01 + kuiT12) >> 2; // kuiVL5 + const uint8_t kuiVL6 = (kuiT12 + kuiT23) >> 2; // kuiVL6 + const uint8_t kuiVL7 = (kuiT23 + kuiT34) >> 2; // kuiVL7 + const uint8_t kuiVL8 = (kuiT34 + kuiT45) >> 2; // kuiVL8 + const uint8_t kuiVL9 = (kuiT45 + kuiT56) >> 2; // kuiVL9 + const uint8_t kuiList[10] = { kuiVL0, kuiVL1, kuiVL2, kuiVL3, kuiVL4, kuiVL5, kuiVL6, kuiVL7, kuiVL8, kuiVL9 }; + + ST32A4 (pPred, LD32 (kuiList)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 5)); + ST32A4 (pPred + kiStride2, LD32 (kuiList + 1)); + ST32A4 (pPred + kiStride3, LD32 (kuiList + 6)); +} + +/*vertical pLeft*/ +void WelsI4x4LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + uint8_t* ptopleft = &pPred[- (kiStride + 1)]; + /*get pTop*/ + const uint8_t kuiT0 = * (ptopleft + 1); + const uint8_t kuiT1 = * (ptopleft + 2); + const uint8_t kuiT2 = * (ptopleft + 3); + const uint8_t kuiT3 = * (ptopleft + 4); + const uint16_t kuiT01 = 1 + kuiT0 + kuiT1; + const uint16_t kuiT12 = 1 + kuiT1 + kuiT2; + const uint16_t kuiT23 = 1 + kuiT2 + kuiT3; + const uint16_t kuiT33 = 1 + (kuiT3 << 1); + const uint8_t kuiVL0 = kuiT01 >> 1; + const uint8_t kuiVL1 = kuiT12 >> 1; + const uint8_t kuiVL2 = kuiT23 >> 1; + const uint8_t kuiVL3 = kuiT33 >> 1; + const uint8_t kuiVL4 = (kuiT01 + kuiT12) >> 2; + const uint8_t kuiVL5 = (kuiT12 + kuiT23) >> 2; + const uint8_t kuiVL6 = (kuiT23 + kuiT33) >> 2; + const uint8_t kuiVL7 = kuiVL3; + const uint8_t kuiList[10] = { kuiVL0, kuiVL1, kuiVL2, kuiVL3, kuiVL3, kuiVL4, kuiVL5, kuiVL6, kuiVL7, kuiVL7 }; + + ST32A4 (pPred, LD32 (kuiList)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 5)); + ST32A4 (pPred + kiStride2, LD32 (kuiList + 1)); + ST32A4 (pPred + kiStride3, LD32 (kuiList + 6)); +} + + +/*vertical right*/ +void WelsI4x4LumaPredVR_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + const uint8_t kuiLT = pPred[-kiStride - 1]; + /*get pLeft and pTop*/ + const uint8_t kuiL0 = pPred[ - 1]; + const uint8_t kuiL1 = pPred[kiStride - 1]; + const uint8_t kuiL2 = pPred[kiStride2 - 1]; + const uint8_t kuiT0 = pPred[ -kiStride]; + const uint8_t kuiT1 = pPred[1 - kiStride]; + const uint8_t kuiT2 = pPred[2 - kiStride]; + const uint8_t kuiT3 = pPred[3 - kiStride]; + const uint8_t kuiVR0 = (1 + kuiLT + kuiT0) >> 1; // kuiVR0 + const uint8_t kuiVR1 = (1 + kuiT0 + kuiT1) >> 1; // kuiVR1 + const uint8_t kuiVR2 = (1 + kuiT1 + kuiT2) >> 1; // kuiVR2 + const uint8_t kuiVR3 = (1 + kuiT2 + kuiT3) >> 1; // kuiVR3 + const uint8_t kuiVR4 = (2 + kuiL0 + (kuiLT << 1) + kuiT0) >> 2; // kuiVR4 + const uint8_t kuiVR5 = (2 + kuiLT + (kuiT0 << 1) + kuiT1) >> 2; // kuiVR5 + const uint8_t kuiVR6 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; // kuiVR6 + const uint8_t kuiVR7 = (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2; // kuiVR7 + const uint8_t kuiVR8 = (2 + kuiLT + (kuiL0 << 1) + kuiL1) >> 2; // kuiVR8 + const uint8_t kuiVR9 = (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2; // kuiVR9 + const uint8_t kuiList[10] = { kuiVR8, kuiVR0, kuiVR1, kuiVR2, kuiVR3, kuiVR9, kuiVR4, kuiVR5, kuiVR6, kuiVR7 }; + + ST32A4 (pPred, LD32 (kuiList + 1)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 6)); + ST32A4 (pPred + kiStride2, LD32 (kuiList)); + ST32A4 (pPred + kiStride3, LD32 (kuiList + 5)); +} + +/*horizontal up*/ +void WelsI4x4LumaPredHU_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + /*get pLeft*/ + const uint8_t kuiL0 = pPred[ - 1]; + const uint8_t kuiL1 = pPred[kiStride - 1]; + const uint8_t kuiL2 = pPred[kiStride2 - 1]; + const uint8_t kuiL3 = pPred[kiStride3 - 1]; + const uint16_t kuiL01 = 1 + kuiL0 + kuiL1; + const uint16_t kuiL12 = 1 + kuiL1 + kuiL2; + const uint16_t kuiL23 = 1 + kuiL2 + kuiL3; + const uint8_t kuiHU0 = kuiL01 >> 1; + const uint8_t kuiHU1 = (kuiL01 + kuiL12) >> 2; + const uint8_t kuiHU2 = kuiL12 >> 1; + const uint8_t kuiHU3 = (kuiL12 + kuiL23) >> 2; + const uint8_t kuiHU4 = kuiL23 >> 1; + const uint8_t kuiHU5 = (1 + kuiL23 + (kuiL3 << 1)) >> 2; + const uint8_t kuiList[10] = { kuiHU0, kuiHU1, kuiHU2, kuiHU3, kuiHU4, kuiHU5, kuiL3, kuiL3, kuiL3, kuiL3 }; + + ST32A4 (pPred, LD32 (kuiList)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 2)); + ST32A4 (pPred + kiStride2, LD32 (kuiList + 4)); + ST32A4 (pPred + kiStride3, LD32 (kuiList + 6)); +} + +/*horizontal down*/ +void WelsI4x4LumaPredHD_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride3 = kiStride + kiStride2; + const uint8_t kuiLT = pPred[- (kiStride + 1)]; + /*get pLeft and pTop*/ + const uint8_t kuiL0 = pPred[-1 ]; + const uint8_t kuiL1 = pPred[-1 + kiStride ]; + const uint8_t kuiL2 = pPred[-1 + kiStride2]; + const uint8_t kuiL3 = pPred[-1 + kiStride3]; + const uint8_t kuiT0 = pPred[-kiStride ]; + const uint8_t kuiT1 = pPred[-kiStride + 1 ]; + const uint8_t kuiT2 = pPred[-kiStride + 2 ]; + const uint16_t kuiTL0 = 1 + kuiLT + kuiL0; + const uint16_t kuiLT0 = 1 + kuiLT + kuiT0; + const uint16_t kuiT01 = 1 + kuiT0 + kuiT1; + const uint16_t kuiT12 = 1 + kuiT1 + kuiT2; + const uint16_t kuiL01 = 1 + kuiL0 + kuiL1; + const uint16_t kuiL12 = 1 + kuiL1 + kuiL2; + const uint16_t kuiL23 = 1 + kuiL2 + kuiL3; + const uint8_t kuiHD0 = kuiTL0 >> 1; + const uint8_t kuiHD1 = (kuiTL0 + kuiLT0) >> 2; + const uint8_t kuiHD2 = (kuiLT0 + kuiT01) >> 2; + const uint8_t kuiHD3 = (kuiT01 + kuiT12) >> 2; + const uint8_t kuiHD4 = kuiL01 >> 1; + const uint8_t kuiHD5 = (kuiTL0 + kuiL01) >> 2; + const uint8_t kuiHD6 = kuiL12 >> 1; + const uint8_t kuiHD7 = (kuiL01 + kuiL12) >> 2; + const uint8_t kuiHD8 = kuiL23 >> 1; + const uint8_t kuiHD9 = (kuiL12 + kuiL23) >> 2; + const uint8_t kuiList[10] = { kuiHD8, kuiHD9, kuiHD6, kuiHD7, kuiHD4, kuiHD5, kuiHD0, kuiHD1, kuiHD2, kuiHD3 }; + + ST32A4 (pPred, LD32 (kuiList + 6)); + ST32A4 (pPred + kiStride, LD32 (kuiList + 4)); + ST32A4 (pPred + kiStride2, LD32 (kuiList + 2)); + ST32A4 (pPred + kiStride3, LD32 (kuiList)); +} + +void WelsI8x8LumaPredV_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + uint64_t uiTop = 0; + int32_t iStride[8]; + uint8_t uiPixelFilterT[8]; + int32_t i; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : (( + pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : (( + pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + + // 8-89 + for (i = 7; i >= 0; i--) { + uiTop = ((uiTop << 8) | uiPixelFilterT[i]); + } + + for (i = 0; i < 8; i++) { + ST64A8 (pPred + kiStride * i, uiTop); + } +} + +void WelsI8x8LumaPredH_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + uint64_t uiLeft; + int32_t iStride[8]; + uint8_t uiPixelFilterL[8]; + int32_t i; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : (( + pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >> + 2); + } + uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2); + + // 8-90 + for (i = 0; i < 8; i++) { + uiLeft = 0x0101010101010101ULL * uiPixelFilterL[i]; + ST64A8 (pPred + iStride[i], uiLeft); + } +} + +void WelsI8x8LumaPredDc_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + int32_t iStride[8]; + uint8_t uiPixelFilterL[8]; + uint8_t uiPixelFilterT[8]; + uint16_t uiTotal = 0; + int32_t i; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : (( + pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2); + uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : (( + pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >> + 2); + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2); + uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : (( + pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + + // 8-91 + for (i = 0; i < 8; i++) { + uiTotal += uiPixelFilterL[i]; + uiTotal += uiPixelFilterT[i]; + } + + const uint8_t kuiMean = ((uiTotal + 8) >> 4); + const uint64_t kuiMean64 = 0x0101010101010101ULL * kuiMean; + + for (i = 0; i < 8; i++) { + ST64A8 (pPred + iStride[i], kuiMean64); + } +} + +void WelsI8x8LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + int32_t iStride[8]; + uint8_t uiPixelFilterL[8]; + uint16_t uiTotal = 0; + int32_t i; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : (( + pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >> + 2); + } + uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2); + + // 8-92 + for (i = 0; i < 8; i++) { + uiTotal += uiPixelFilterL[i]; + } + + const uint8_t kuiMean = ((uiTotal + 4) >> 3); + const uint64_t kuiMean64 = 0x0101010101010101ULL * kuiMean; + + for (i = 0; i < 8; i++) { + ST64A8 (pPred + iStride[i], kuiMean64); + } +} + +void WelsI8x8LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + int32_t iStride[8]; + uint8_t uiPixelFilterT[8]; + uint16_t uiTotal = 0; + int32_t i; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : (( + pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : (( + pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + + // 8-93 + for (i = 0; i < 8; i++) { + uiTotal += uiPixelFilterT[i]; + } + + const uint8_t kuiMean = ((uiTotal + 4) >> 3); + const uint64_t kuiMean64 = 0x0101010101010101ULL * kuiMean; + + for (i = 0; i < 8; i++) { + ST64A8 (pPred + iStride[i], kuiMean64); + } +} + +void WelsI8x8LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // for normal 8 bit depth, 8-94 + const uint64_t kuiDC64 = 0x8080808080808080ULL; + + int32_t iStride[8]; + int32_t i; + ST64A8 (pPred, kuiDC64); + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + ST64A8 (pPred + iStride[i], kuiDC64); + } +} + +/*down pLeft*/ +void WelsI8x8LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // Top and Top-right available + int32_t iStride[8]; + uint8_t uiPixelFilterT[16]; + int32_t i, j; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : (( + pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 15; i++) { + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterT[15] = ((pPred[14 - kiStride] + pPred[15 - kiStride] * 3 + 2) >> 2); + + for (i = 0; i < 8; i++) { // y + for (j = 0; j < 8; j++) { // x + if (i == 7 && j == 7) { // 8-95 + pPred[j + iStride[i]] = (uiPixelFilterT[14] + 3 * uiPixelFilterT[15] + 2) >> 2; + } else { // 8-96 + pPred[j + iStride[i]] = (uiPixelFilterT[i + j] + (uiPixelFilterT[i + j + 1] << 1) + uiPixelFilterT[i + j + 2] + 2) >> 2; + } + } + } +} + +/*down pLeft*/ +void WelsI8x8LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // Top available and Top-right unavailable + int32_t iStride[8]; + uint8_t uiPixelFilterT[16]; + int32_t i, j; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : (( + pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + // p[x, -1] x=8...15 are replaced with p[7, -1] + uiPixelFilterT[7] = ((pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + for (i = 8; i < 16; i++) { + uiPixelFilterT[i] = pPred[7 - kiStride]; + } + + for (i = 0; i < 8; i++) { // y + for (j = 0; j < 8; j++) { // x + if (i == 7 && j == 7) { // 8-95 + pPred[j + iStride[i]] = (uiPixelFilterT[14] + 3 * uiPixelFilterT[15] + 2) >> 2; + } else { // 8-96 + pPred[j + iStride[i]] = (uiPixelFilterT[i + j] + (uiPixelFilterT[i + j + 1] << 1) + uiPixelFilterT[i + j + 2] + 2) >> 2; + } + } + } +} + +/*down right*/ +void WelsI8x8LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // The TopLeft, Top, Left are all available under this mode + int32_t iStride[8]; + uint8_t uiPixelFilterTL; + uint8_t uiPixelFilterL[8]; + uint8_t uiPixelFilterT[8]; + int32_t i, j; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2; + + uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2); + uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >> + 2); + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2); + uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : (( + pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + + for (i = 0; i < 8; i++) { // y + // 8-98, x < y-1 + for (j = 0; j < (i - 1); j++) { + pPred[j + iStride[i]] = (uiPixelFilterL[i - j - 2] + (uiPixelFilterL[i - j - 1] << 1) + uiPixelFilterL[i - j] + 2) >> 2; + } + // 8-98, special case, x == y-1 + if (i >= 1) { + j = i - 1; + pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterL[0] << 1) + uiPixelFilterL[1] + 2) >> 2; + } + // 8-99, x==y + j = i; + pPred[j + iStride[i]] = (uiPixelFilterT[0] + (uiPixelFilterTL << 1) + uiPixelFilterL[0] + 2) >> 2; + // 8-97, special case, x == y+1 + if (i < 7) { + j = i + 1; + pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterT[0] << 1) + uiPixelFilterT[1] + 2) >> 2; + } + for (j = i + 2; j < 8; j++) { // 8-97, x > y+1 + pPred[j + iStride[i]] = (uiPixelFilterT[j - i - 2] + (uiPixelFilterT[j - i - 1] << 1) + uiPixelFilterT[j - i] + 2) >> 2; + } + } +} + +/*vertical pLeft*/ +void WelsI8x8LumaPredVL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // Top and Top-right available + int32_t iStride[8]; + uint8_t uiPixelFilterT[16]; + int32_t i, j; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : (( + pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 15; i++) { + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterT[15] = ((pPred[14 - kiStride] + pPred[15 - kiStride] * 3 + 2) >> 2); + + for (i = 0; i < 8; i++) { // y + if ((i & 0x01) == 0) { // 8-108 + for (j = 0; j < 8; j++) { // x + pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + uiPixelFilterT[j + (i >> 1) + 1] + 1) >> 1; + } + } else { // 8-109 + for (j = 0; j < 8; j++) { // x + pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + (uiPixelFilterT[j + (i >> 1) + 1] << 1) + uiPixelFilterT[j + + (i >> 1) + 2] + 2) >> 2; + } + } + } +} + +/*vertical pLeft*/ +void WelsI8x8LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // Top available and Top-right unavailable + int32_t iStride[8]; + uint8_t uiPixelFilterT[16]; + int32_t i, j; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : (( + pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + // p[x, -1] x=8...15 are replaced with p[7, -1] + uiPixelFilterT[7] = ((pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + for (i = 8; i < 16; i++) { + uiPixelFilterT[i] = pPred[7 - kiStride]; + } + + for (i = 0; i < 8; i++) { // y + if ((i & 0x01) == 0) { // 8-108 + for (j = 0; j < 8; j++) { // x + pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + uiPixelFilterT[j + (i >> 1) + 1] + 1) >> 1; + } + } else { // 8-109 + for (j = 0; j < 8; j++) { // x + pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + (uiPixelFilterT[j + (i >> 1) + 1] << 1) + uiPixelFilterT[j + + (i >> 1) + 2] + 2) >> 2; + } + } + } +} + +/*vertical right*/ +void WelsI8x8LumaPredVR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // The TopLeft, Top, Left are always available under this mode + int32_t iStride[8]; + uint8_t uiPixelFilterTL; + uint8_t uiPixelFilterL[8]; + uint8_t uiPixelFilterT[8]; + int32_t i, j; + int32_t izVR, izVRDiv; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2; + + uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2); + uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >> + 2); + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2); + uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : (( + pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + + for (i = 0; i < 8; i++) { // y + for (j = 0; j < 8; j++) { // x + izVR = (j << 1) - i; // 2 * x - y + izVRDiv = j - (i >> 1); + if (izVR >= 0) { + if ((izVR & 0x01) == 0) { // 8-100 + if (izVRDiv > 0) { + pPred[j + iStride[i]] = (uiPixelFilterT[izVRDiv - 1] + uiPixelFilterT[izVRDiv] + 1) >> 1; + } else { + pPred[j + iStride[i]] = (uiPixelFilterTL + uiPixelFilterT[0] + 1) >> 1; + } + } else { // 8-101 + if (izVRDiv > 1) { + pPred[j + iStride[i]] = (uiPixelFilterT[izVRDiv - 2] + (uiPixelFilterT[izVRDiv - 1] << 1) + uiPixelFilterT[izVRDiv] + 2) + >> 2; + } else { + pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterT[0] << 1) + uiPixelFilterT[1] + 2) >> 2; + } + } + } else if (izVR == -1) { // 8-102 + pPred[j + iStride[i]] = (uiPixelFilterL[0] + (uiPixelFilterTL << 1) + uiPixelFilterT[0] + 2) >> 2; + } else if (izVR < -2) { // 8-103 + pPred[j + iStride[i]] = (uiPixelFilterL[-izVR - 1] + (uiPixelFilterL[-izVR - 2] << 1) + uiPixelFilterL[-izVR - 3] + 2) + >> 2; + } else { // izVR==-2, 8-103, special case + pPred[j + iStride[i]] = (uiPixelFilterL[1] + (uiPixelFilterL[0] << 1) + uiPixelFilterTL + 2) >> 2; + } + } + } +} + +/*horizontal up*/ +void WelsI8x8LumaPredHU_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + int32_t iStride[8]; + uint8_t uiPixelFilterL[8]; + int32_t i, j; + int32_t izHU; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : (( + pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >> + 2); + } + uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2); + + for (i = 0; i < 8; i++) { // y + for (j = 0; j < 8; j++) { // x + izHU = j + (i << 1); // x + 2 * y + if (izHU < 13) { + if ((izHU & 0x01) == 0) { // 8-110 + pPred[j + iStride[i]] = (uiPixelFilterL[izHU >> 1] + uiPixelFilterL[1 + (izHU >> 1)] + 1) >> 1; + } else { // 8-111 + pPred[j + iStride[i]] = (uiPixelFilterL[izHU >> 1] + (uiPixelFilterL[1 + (izHU >> 1)] << 1) + uiPixelFilterL[2 + + (izHU >> 1)] + 2) >> 2; + } + } else if (izHU == 13) { // 8-112 + pPred[j + iStride[i]] = (uiPixelFilterL[6] + 3 * uiPixelFilterL[7] + 2) >> 2; + } else { // 8-113 + pPred[j + iStride[i]] = uiPixelFilterL[7]; + } + } + } +} + +/*horizontal down*/ +void WelsI8x8LumaPredHD_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) { + // The TopLeft, Top, Left are all available under this mode + int32_t iStride[8]; + uint8_t uiPixelFilterTL; + uint8_t uiPixelFilterL[8]; + uint8_t uiPixelFilterT[8]; + int32_t i, j; + int32_t izHD, izHDDiv; + + for (iStride[0] = 0, i = 1; i < 8; i++) { + iStride[i] = iStride[i - 1] + kiStride; + } + + uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2; + + uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2); + uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2); + for (i = 1; i < 7; i++) { + uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >> + 2); + uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2); + } + uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2); + uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : (( + pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2); + + for (i = 0; i < 8; i++) { // y + for (j = 0; j < 8; j++) { // x + izHD = (i << 1) - j; // 2*y - x + izHDDiv = i - (j >> 1); + if (izHD >= 0) { + if ((izHD & 0x01) == 0) { // 8-104 + if (izHDDiv == 0) { + pPred[j + iStride[i]] = (uiPixelFilterTL + uiPixelFilterL[0] + 1) >> 1; + } else { + pPred[j + iStride[i]] = (uiPixelFilterL[izHDDiv - 1] + uiPixelFilterL[izHDDiv] + 1) >> 1; + } + } else { // 8-105 + if (izHDDiv == 1) { + pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterL[0] << 1) + uiPixelFilterL[1] + 2) >> 2; + } else { + pPred[j + iStride[i]] = (uiPixelFilterL[izHDDiv - 2] + (uiPixelFilterL[izHDDiv - 1] << 1) + uiPixelFilterL[izHDDiv] + 2) + >> 2; + } + } + } else if (izHD == -1) { // 8-106 + pPred[j + iStride[i]] = (uiPixelFilterL[0] + (uiPixelFilterTL << 1) + uiPixelFilterT[0] + 2) >> 2; + } else if (izHD < -2) { // 8-107 + pPred[j + iStride[i]] = (uiPixelFilterT[-izHD - 1] + (uiPixelFilterT[-izHD - 2] << 1) + uiPixelFilterT[-izHD - 3] + 2) + >> 2; + } else { // 8-107 special case, izHD==-2 + pPred[j + iStride[i]] = (uiPixelFilterT[1] + (uiPixelFilterT[0] << 1) + uiPixelFilterTL + 2) >> 2; + } + } + } +} + + +void WelsIChromaPredV_c (uint8_t* pPred, const int32_t kiStride) { + const uint64_t kuiVal64 = LD64A8 (&pPred[-kiStride]); + const int32_t kiStride2 = kiStride << 1; + const int32_t kiStride4 = kiStride2 << 1; + + ST64A8 (pPred, kuiVal64); + ST64A8 (pPred + kiStride, kuiVal64); + ST64A8 (pPred + kiStride2, kuiVal64); + ST64A8 (pPred + kiStride2 + kiStride, kuiVal64); + ST64A8 (pPred + kiStride4, kuiVal64); + ST64A8 (pPred + kiStride4 + kiStride, kuiVal64); + ST64A8 (pPred + kiStride4 + kiStride2, kuiVal64); + ST64A8 (pPred + (kiStride << 3) - kiStride, kuiVal64); +} + +void WelsIChromaPredH_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 3) - kiStride; + uint8_t i = 7; + + do { + const uint8_t kuiVal8 = pPred[iTmp - 1]; + const uint64_t kuiVal64 = 0x0101010101010101ULL * kuiVal8; + + ST64A8 (pPred + iTmp, kuiVal64); + + iTmp -= kiStride; + } while (i-- > 0); +} + + +void WelsIChromaPredPlane_c (uint8_t* pPred, const int32_t kiStride) { + int32_t a = 0, b = 0, c = 0, H = 0, V = 0; + int32_t i, j; + uint8_t* pTop = &pPred[-kiStride]; + uint8_t* pLeft = &pPred[-1]; + + for (i = 0 ; i < 4 ; i ++) { + H += (i + 1) * (pTop[4 + i] - pTop[2 - i]); + V += (i + 1) * (pLeft[ (4 + i) * kiStride] - pLeft[ (2 - i) * kiStride]); + } + + a = (pLeft[7 * kiStride] + pTop[7]) << 4; + b = (17 * H + 16) >> 5; + c = (17 * V + 16) >> 5; + + for (i = 0 ; i < 8 ; i ++) { + for (j = 0 ; j < 8 ; j ++) { + int32_t iTmp = (a + b * (j - 3) + c * (i - 3) + 16) >> 5; + iTmp = WelsClip1 (iTmp); + pPred[j] = iTmp; + } + pPred += kiStride; + } +} + + +void WelsIChromaPredDc_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiL1 = kiStride - 1; + const int32_t kiL2 = kiL1 + kiStride; + const int32_t kiL3 = kiL2 + kiStride; + const int32_t kiL4 = kiL3 + kiStride; + const int32_t kiL5 = kiL4 + kiStride; + const int32_t kiL6 = kiL5 + kiStride; + const int32_t kiL7 = kiL6 + kiStride; + /*caculate the kMean value*/ + const uint8_t kuiM1 = (pPred[-kiStride] + pPred[1 - kiStride] + pPred[2 - kiStride] + pPred[3 - kiStride] + + pPred[-1] + pPred[kiL1] + pPred[kiL2] + pPred[kiL3] + 4) >> 3 ; + const uint32_t kuiSum2 = pPred[4 - kiStride] + pPred[5 - kiStride] + pPred[6 - kiStride] + pPred[7 - kiStride]; + const uint32_t kuiSum3 = pPred[kiL4] + pPred[kiL5] + pPred[kiL6] + pPred[kiL7]; + const uint8_t kuiM2 = (kuiSum2 + 2) >> 2; + const uint8_t kuiM3 = (kuiSum3 + 2) >> 2; + const uint8_t kuiM4 = (kuiSum2 + kuiSum3 + 4) >> 3; + const uint8_t kuiMUP[8] = {kuiM1, kuiM1, kuiM1, kuiM1, kuiM2, kuiM2, kuiM2, kuiM2}; + const uint8_t kuiMDown[8] = {kuiM3, kuiM3, kuiM3, kuiM3, kuiM4, kuiM4, kuiM4, kuiM4}; + const uint64_t kuiUP64 = LD64 (kuiMUP); + const uint64_t kuiDN64 = LD64 (kuiMDown); + + ST64A8 (pPred, kuiUP64); + ST64A8 (pPred + kiL1 + 1, kuiUP64); + ST64A8 (pPred + kiL2 + 1, kuiUP64); + ST64A8 (pPred + kiL3 + 1, kuiUP64); + ST64A8 (pPred + kiL4 + 1, kuiDN64); + ST64A8 (pPred + kiL5 + 1, kuiDN64); + ST64A8 (pPred + kiL6 + 1, kuiDN64); + ST64A8 (pPred + kiL7 + 1, kuiDN64); +} + +void WelsIChromaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride) { + const int32_t kiL1 = -1 + kiStride; + const int32_t kiL2 = kiL1 + kiStride; + const int32_t kiL3 = kiL2 + kiStride; + const int32_t kiL4 = kiL3 + kiStride; + const int32_t kiL5 = kiL4 + kiStride; + const int32_t kiL6 = kiL5 + kiStride; + const int32_t kiL7 = kiL6 + kiStride; + /*caculate the kMean value*/ + const uint8_t kuiMUP = (pPred[-1] + pPred[kiL1] + pPred[kiL2] + pPred[kiL3] + 2) >> 2 ; + const uint8_t kuiMDown = (pPred[kiL4] + pPred[kiL5] + pPred[kiL6] + pPred[kiL7] + 2) >> 2; + const uint64_t kuiUP64 = 0x0101010101010101ULL * kuiMUP; + const uint64_t kuiDN64 = 0x0101010101010101ULL * kuiMDown; + + ST64A8 (pPred, kuiUP64); + ST64A8 (pPred + kiL1 + 1, kuiUP64); + ST64A8 (pPred + kiL2 + 1, kuiUP64); + ST64A8 (pPred + kiL3 + 1, kuiUP64); + ST64A8 (pPred + kiL4 + 1, kuiDN64); + ST64A8 (pPred + kiL5 + 1, kuiDN64); + ST64A8 (pPred + kiL6 + 1, kuiDN64); + ST64A8 (pPred + kiL7 + 1, kuiDN64); +} + +void WelsIChromaPredDcTop_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 3) - kiStride; + /*caculate the kMean value*/ + const uint8_t kuiM1 = (pPred[-kiStride] + pPred[1 - kiStride] + pPred[2 - kiStride] + pPred[3 - kiStride] + 2) >> 2; + const uint8_t kuiM2 = (pPred[4 - kiStride] + pPred[5 - kiStride] + pPred[6 - kiStride] + pPred[7 - kiStride] + 2) >> + 2; + const uint8_t kuiM[8] = {kuiM1, kuiM1, kuiM1, kuiM1, kuiM2, kuiM2, kuiM2, kuiM2}; + + uint8_t i = 7; + + do { + ST64A8 (pPred + iTmp, LD64 (kuiM)); + + iTmp -= kiStride; + } while (i-- > 0); +} + +void WelsIChromaPredDcNA_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 3) - kiStride; + const uint64_t kuiDC64 = 0x8080808080808080ULL; + uint8_t i = 7; + + do { + ST64A8 (pPred + iTmp, kuiDC64); + + iTmp -= kiStride; + } while (i-- > 0); +} + +void WelsI16x16LumaPredV_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 4) - kiStride; + const uint64_t kuiTop1 = LD64A8 (pPred - kiStride); + const uint64_t kuiTop2 = LD64A8 (pPred - kiStride + 8); + uint8_t i = 15; + + do { + ST64A8 (pPred + iTmp, kuiTop1); + ST64A8 (pPred + iTmp + 8, kuiTop2); + + iTmp -= kiStride; + } while (i-- > 0); +} + +void WelsI16x16LumaPredH_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 4) - kiStride; + uint8_t i = 15; + + do { + const uint8_t kuiVal8 = pPred[iTmp - 1]; + const uint64_t kuiVal64 = 0x0101010101010101ULL * kuiVal8; + + ST64A8 (pPred + iTmp, kuiVal64); + ST64A8 (pPred + iTmp + 8, kuiVal64); + + iTmp -= kiStride; + } while (i-- > 0); +} + +void WelsI16x16LumaPredPlane_c (uint8_t* pPred, const int32_t kiStride) { + int32_t a = 0, b = 0, c = 0, H = 0, V = 0; + int32_t i, j; + uint8_t* pTop = &pPred[-kiStride]; + uint8_t* pLeft = &pPred[-1]; + + for (i = 0 ; i < 8 ; i ++) { + H += (i + 1) * (pTop[8 + i] - pTop[6 - i]); + V += (i + 1) * (pLeft[ (8 + i) * kiStride] - pLeft[ (6 - i) * kiStride]); + } + + a = (pLeft[15 * kiStride] + pTop[15]) << 4; + b = (5 * H + 32) >> 6; + c = (5 * V + 32) >> 6; + + for (i = 0 ; i < 16 ; i ++) { + for (j = 0 ; j < 16 ; j ++) { + int32_t iTmp = (a + b * (j - 7) + c * (i - 7) + 16) >> 5; + iTmp = WelsClip1 (iTmp); + pPred[j] = iTmp; + } + pPred += kiStride; + } +} + +void WelsI16x16LumaPredDc_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 4) - kiStride; + int32_t iSum = 0; + uint8_t i = 15; + uint8_t uiMean = 0; + + /*caculate the kMean value*/ + do { + iSum += pPred[-1 + iTmp] + pPred[-kiStride + i]; + iTmp -= kiStride; + } while (i-- > 0); + uiMean = (16 + iSum) >> 5; + + iTmp = (kiStride << 4) - kiStride; + i = 15; + do { + memset (&pPred[iTmp], uiMean, I16x16_COUNT); + iTmp -= kiStride; + } while (i-- > 0); +} + + +void WelsI16x16LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 4) - kiStride; + int32_t iSum = 0; + uint8_t i = 15; + uint8_t uiMean = 0; + + /*caculate the kMean value*/ + do { + iSum += pPred[-kiStride + i]; + } while (i-- > 0); + uiMean = (8 + iSum) >> 4; + + i = 15; + do { + memset (&pPred[iTmp], uiMean, I16x16_COUNT); + iTmp -= kiStride; + } while (i-- > 0); +} + +void WelsI16x16LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride) { + int32_t iTmp = (kiStride << 4) - kiStride; + int32_t iSum = 0; + uint64_t uiMean64 = 0; + uint8_t uiMean = 0; + uint8_t i = 15; + + /*caculate the kMean value*/ + do { + iSum += pPred[-1 + iTmp]; + iTmp -= kiStride; + } while (i-- > 0); + uiMean = (8 + iSum) >> 4; + uiMean64 = 0x0101010101010101ULL * uiMean; + + iTmp = (kiStride << 4) - kiStride; + i = 15; + do { + ST64A8 (pPred + iTmp, uiMean64); + ST64A8 (pPred + iTmp + 8, uiMean64); + + iTmp -= kiStride; + } while (i-- > 0); +} + +void WelsI16x16LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride) { + const uint64_t kuiDC64 = 0x8080808080808080ULL; + int32_t iTmp = (kiStride << 4) - kiStride; + uint8_t i = 15; + + do { + ST64A8 (pPred + iTmp, kuiDC64); + ST64A8 (pPred + iTmp + 8, kuiDC64); + + iTmp -= kiStride; + } while (i-- > 0); +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp new file mode 100644 index 000000000..571ce41d7 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp @@ -0,0 +1,958 @@ +/*! + * \copy + * Copyright (c) 2008-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * manage_dec_ref.cpp + * + * Abstract + * Implementation for managing reference picture + * + * History + * 07/21/2008 Created + * + *****************************************************************************/ + +#include "manage_dec_ref.h" +#include "error_concealment.h" +#include "error_code.h" +#include "decoder.h" + +namespace WelsDec { + +static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum); +static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameIdx); +static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum); +static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx); + +static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking); +static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType, + int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx); +static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic); + +static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic); +static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx, uint32_t uiLongTermPicNum); +static int32_t MarkAsLongTerm (PRefPic pRefPic, int32_t iFrameNum, int32_t iLongTermFrameIdx, + uint32_t uiLongTermPicNum); +static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx); +#ifdef LONG_TERM_REF +int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum); +#endif +static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic); + +static void SetUnRef (PPicture pRef) { + if (NULL != pRef) { + pRef->bUsedAsRef = false; + pRef->bIsLongRef = false; + pRef->iFrameNum = -1; + pRef->iFrameWrapNum = -1; + //pRef->iFramePoc = 0; + pRef->iLongTermFrameIdx = -1; + pRef->uiLongTermPicNum = 0; + pRef->uiQualityId = -1; + pRef->uiTemporalId = -1; + pRef->uiSpatialId = -1; + pRef->iSpsId = -1; + pRef->bIsComplete = false; + pRef->iRefCount = 0; + + if (pRef->eSliceType == I_SLICE) { + return; + } + int32_t lists = pRef->eSliceType == P_SLICE ? 1 : 2; + for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) { + for (int32_t list = 0; list < lists; ++list) { + if (pRef->pRefPic[list][i] != NULL) { + pRef->pRefPic[list][i]->iRefCount = 0; + pRef->pRefPic[list][i] = NULL; + } + } + } + } +} + +//reset pRefList when +// 1.sps arrived that is new sequence starting +// 2.IDR NAL i.e. 1st layer in IDR AU + +void WelsResetRefPic (PWelsDecoderContext pCtx) { + int32_t i = 0; + PRefPic pRefPic = &pCtx->sRefPic; + pCtx->sRefPic.uiLongRefCount[LIST_0] = pCtx->sRefPic.uiShortRefCount[LIST_0] = 0; + + pRefPic->uiRefCount[LIST_0] = 0; + pRefPic->uiRefCount[LIST_1] = 0; + + for (i = 0; i < MAX_DPB_COUNT; i++) { + if (pRefPic->pShortRefList[LIST_0][i] != NULL) { + SetUnRef (pRefPic->pShortRefList[LIST_0][i]); + pRefPic->pShortRefList[LIST_0][i] = NULL; + } + } + pRefPic->uiShortRefCount[LIST_0] = 0; + + for (i = 0; i < MAX_DPB_COUNT; i++) { + if (pRefPic->pLongRefList[LIST_0][i] != NULL) { + SetUnRef (pRefPic->pLongRefList[LIST_0][i]); + pRefPic->pLongRefList[LIST_0][i] = NULL; + } + } + pRefPic->uiLongRefCount[LIST_0] = 0; +} + +void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx) { + int32_t i = 0; + PRefPic pRefPic = &pCtx->sRefPic; + pCtx->sRefPic.uiLongRefCount[LIST_0] = pCtx->sRefPic.uiShortRefCount[LIST_0] = 0; + + pRefPic->uiRefCount[LIST_0] = 0; + pRefPic->uiRefCount[LIST_1] = 0; + + for (i = 0; i < MAX_DPB_COUNT; i++) { + pRefPic->pShortRefList[LIST_0][i] = NULL; + } + pRefPic->uiShortRefCount[LIST_0] = 0; + + for (i = 0; i < MAX_DPB_COUNT; i++) { + pRefPic->pLongRefList[LIST_0][i] = NULL; + } + pRefPic->uiLongRefCount[LIST_0] = 0; +} + +static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) { + if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0) + && (pCtx->eSliceType != I_SLICE + && pCtx->eSliceType != SI_SLICE)) { + if (pCtx->pParam->eEcActiveIdc != + ERROR_CON_DISABLE) { //IDR lost!, recover it for future decoding with data all set to 0 + PPicture pRef = PrefetchPic (pCtx->pPicBuff); + if (pRef != NULL) { + // IDR lost, set new + pRef->bIsComplete = false; // Set complete flag to false for lost IDR ref picture + pRef->iSpsId = pCtx->pSps->iSpsId; + pRef->iPpsId = pCtx->pPps->iPpsId; + if (pCtx->eSliceType == B_SLICE) { + //reset reference's references when IDR is lost + for (int32_t list = LIST_0; list < LIST_A; ++list) { + for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) { + pRef->pRefPic[list][i] = NULL; + } + } + } + pCtx->iErrorCode |= dsDataErrorConcealed; + bool bCopyPrevious = ((ERROR_CON_FRAME_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_SLICE_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) + || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)) + && (NULL != pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb); + bCopyPrevious = bCopyPrevious + && (pRef->iWidthInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iWidthInPixel) + && (pRef->iHeightInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iHeightInPixel); + + if (!bCopyPrevious) { + memset (pRef->pData[0], 128, pRef->iLinesize[0] * pRef->iHeightInPixel); + memset (pRef->pData[1], 128, pRef->iLinesize[1] * pRef->iHeightInPixel / 2); + memset (pRef->pData[2], 128, pRef->iLinesize[2] * pRef->iHeightInPixel / 2); + } else if (pRef == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsInitRefList()::EC memcpy overlap."); + } else { + memcpy (pRef->pData[0], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[0], + pRef->iLinesize[0] * pRef->iHeightInPixel); + memcpy (pRef->pData[1], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[1], + pRef->iLinesize[1] * pRef->iHeightInPixel / 2); + memcpy (pRef->pData[2], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[2], + pRef->iLinesize[2] * pRef->iHeightInPixel / 2); + } + pRef->iFrameNum = 0; + pRef->iFramePoc = 0; + pRef->uiTemporalId = pRef->uiQualityId = 0; + pRef->eSliceType = pCtx->eSliceType; + ExpandReferencingPicture (pRef->pData, pRef->iWidthInPixel, pRef->iHeightInPixel, pRef->iLinesize, + pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture); + AddShortTermToList (&pCtx->sRefPic, pRef); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "WelsInitRefList()::PrefetchPic for EC errors."); + pCtx->iErrorCode |= dsOutOfMemory; + return ERR_INFO_REF_COUNT_OVERFLOW; + } + } + } + return ERR_NONE; +} + +static void WrapShortRefPicNum (PWelsDecoderContext pCtx) { + int32_t i; + PSliceHeader pSliceHeader = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader; + int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum; + PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0]; + int32_t iShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0]; + //wrap pic num + for (i = 0; i < iShortRefCount; i++) { + if (ppShoreRefList[i]) { + if (ppShoreRefList[i]->iFrameNum > pSliceHeader->iFrameNum) + ppShoreRefList[i]->iFrameWrapNum = ppShoreRefList[i]->iFrameNum - iMaxPicNum; + else + ppShoreRefList[i]->iFrameWrapNum = ppShoreRefList[i]->iFrameNum; + } + } +} + +/** +* fills the pRefPic.pRefList LIST_0 and LIST_0 for B-Slice. +*/ +int32_t WelsInitBSliceRefList (PWelsDecoderContext pCtx, int32_t iPoc) { + + int32_t err = WelsCheckAndRecoverForFutureDecoding (pCtx); + if (err != ERR_NONE) return err; + + WrapShortRefPicNum (pCtx); + + PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0]; + PPicture* ppLongRefList = pCtx->sRefPic.pLongRefList[LIST_0]; + memset (pCtx->sRefPic.pRefList[LIST_0], 0, MAX_DPB_COUNT * sizeof (PPicture)); + memset (pCtx->sRefPic.pRefList[LIST_1], 0, MAX_DPB_COUNT * sizeof (PPicture)); + int32_t iLSCurrPocCount = 0; + int32_t iLTCurrPocCount = 0; + PPicture pLSCurrPocList0[MAX_DPB_COUNT]; + PPicture pLTCurrPocList0[MAX_DPB_COUNT]; + for (int32_t i = 0; i < pCtx->sRefPic.uiShortRefCount[LIST_0]; ++i) { + if (ppShoreRefList[i]->iFramePoc < iPoc) { + pLSCurrPocList0[iLSCurrPocCount++] = ppShoreRefList[i]; + } + } + for (int32_t i = pCtx->sRefPic.uiShortRefCount[LIST_0] - 1; i >= 0; --i) { + if (ppShoreRefList[i]->iFramePoc > iPoc) { + pLTCurrPocList0[iLTCurrPocCount++] = ppShoreRefList[i]; + } + } + if (pCtx->sRefPic.uiLongRefCount[LIST_0] > 1) { + //long sorts in increasing order + PPicture pTemp; + for (int32_t i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++i) { + for (int32_t j = i + 1; j < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++j) { + if (ppLongRefList[j]->iFramePoc < ppLongRefList[i]->iFramePoc) { + pTemp = ppLongRefList[i]; + ppLongRefList[i] = ppLongRefList[j]; + ppLongRefList[j] = pTemp; + } + } + } + } + int32_t iCurrPocCount = iLSCurrPocCount + iLTCurrPocCount; + int32_t iCount = 0; + //LIST_0 + //short + //It may need to sort LIST_0 and LIST_1 so that they will have the right default orders. + for (int32_t i = 0; i < iLSCurrPocCount; ++i) { + pCtx->sRefPic.pRefList[LIST_0][iCount++] = pLSCurrPocList0[i]; + } + if (iLSCurrPocCount > 1) { + //LIST_0 short sorts in decreasing order + PPicture pTemp; + for (int32_t i = 0; i < iLSCurrPocCount; ++i) { + for (int32_t j = i + 1; j < iLSCurrPocCount; ++j) { + if (pCtx->sRefPic.pRefList[LIST_0][j]->iFramePoc > pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc) { + pTemp = pCtx->sRefPic.pRefList[LIST_0][i]; + pCtx->sRefPic.pRefList[LIST_0][i] = pCtx->sRefPic.pRefList[LIST_0][j]; + pCtx->sRefPic.pRefList[LIST_0][j] = pTemp; + } + } + } + } + for (int32_t i = 0; i < iLTCurrPocCount; ++i) { + pCtx->sRefPic.pRefList[LIST_0][iCount++] = pLTCurrPocList0[i]; + } + if (iLTCurrPocCount > 1) { + //LIST_0 short sorts in increasing order + PPicture pTemp; + for (int32_t i = iLSCurrPocCount; i < iCurrPocCount; ++i) { + for (int32_t j = i + 1; j < iCurrPocCount; ++j) { + if (pCtx->sRefPic.pRefList[LIST_0][j]->iFramePoc < pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc) { + pTemp = pCtx->sRefPic.pRefList[LIST_0][i]; + pCtx->sRefPic.pRefList[LIST_0][i] = pCtx->sRefPic.pRefList[LIST_0][j]; + pCtx->sRefPic.pRefList[LIST_0][j] = pTemp; + } + } + } + } + //long + for (int32_t i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++i) { + pCtx->sRefPic.pRefList[LIST_0][iCount++] = ppLongRefList[i]; + } + pCtx->sRefPic.uiRefCount[LIST_0] = iCount; + + iCount = 0; + //LIST_1 + //short + for (int32_t i = 0; i < iLTCurrPocCount; ++i) { + pCtx->sRefPic.pRefList[LIST_1][iCount++] = pLTCurrPocList0[i]; + } + if (iLTCurrPocCount > 1) { + //LIST_1 short sorts in increasing order + PPicture pTemp; + for (int32_t i = 0; i < iLTCurrPocCount; ++i) { + for (int32_t j = i + 1; j < iLTCurrPocCount; ++j) { + if (pCtx->sRefPic.pRefList[LIST_1][j]->iFramePoc < pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc) { + pTemp = pCtx->sRefPic.pRefList[LIST_1][i]; + pCtx->sRefPic.pRefList[LIST_1][i] = pCtx->sRefPic.pRefList[LIST_1][j]; + pCtx->sRefPic.pRefList[LIST_1][j] = pTemp; + } + } + } + } + for (int32_t i = 0; i < iLSCurrPocCount; ++i) { + pCtx->sRefPic.pRefList[LIST_1][iCount++] = pLSCurrPocList0[i]; + } + if (iLSCurrPocCount > 1) { + //LIST_1 short sorts in decreasing order + PPicture pTemp; + for (int32_t i = iLTCurrPocCount; i < iCurrPocCount; ++i) { + for (int32_t j = i + 1; j < iCurrPocCount; ++j) { + if (pCtx->sRefPic.pRefList[LIST_1][j]->iFramePoc > pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc) { + pTemp = pCtx->sRefPic.pRefList[LIST_1][i]; + pCtx->sRefPic.pRefList[LIST_1][i] = pCtx->sRefPic.pRefList[LIST_1][j]; + pCtx->sRefPic.pRefList[LIST_1][j] = pTemp; + } + } + } + } + //long + for (int32_t i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++i) { + pCtx->sRefPic.pRefList[LIST_1][iCount++] = ppLongRefList[i]; + } + pCtx->sRefPic.uiRefCount[LIST_1] = iCount; + return ERR_NONE; +} + +/** + * fills the pRefPic.pRefList. + */ +int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc) { + + int32_t err = WelsCheckAndRecoverForFutureDecoding (pCtx); + if (err != ERR_NONE) return err; + + WrapShortRefPicNum (pCtx); + + PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0]; + PPicture* ppLongRefList = pCtx->sRefPic.pLongRefList[LIST_0]; + memset (pCtx->sRefPic.pRefList[LIST_0], 0, MAX_DPB_COUNT * sizeof (PPicture)); + + int32_t i, iCount = 0; + //short + for (i = 0; i < pCtx->sRefPic.uiShortRefCount[LIST_0]; ++i) { + pCtx->sRefPic.pRefList[LIST_0][iCount++ ] = ppShoreRefList[i]; + } + + //long + for (i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0] ; ++i) { + pCtx->sRefPic.pRefList[LIST_0][iCount++ ] = ppLongRefList[i]; + } + pCtx->sRefPic.uiRefCount[LIST_0] = iCount; + + return ERR_NONE; +} + +int32_t WelsReorderRefList (PWelsDecoderContext pCtx) { + + if (pCtx->eSliceType == I_SLICE || pCtx->eSliceType == SI_SLICE) { + return ERR_NONE; + } + + PRefPicListReorderSyn pRefPicListReorderSyn = pCtx->pCurDqLayer->pRefPicListReordering; + PNalUnitHeaderExt pNalHeaderExt = &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt; + PSliceHeader pSliceHeader = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader; + int32_t ListCount = 1; + if (pCtx->eSliceType == B_SLICE) ListCount = 2; + for (int32_t listIdx = 0; listIdx < ListCount; ++listIdx) { + PPicture pPic = NULL; + PPicture* ppRefList = pCtx->sRefPic.pRefList[listIdx]; + int32_t iMaxRefIdx = pCtx->iPicQueueNumber; + if (iMaxRefIdx >= MAX_REF_PIC_COUNT) { + iMaxRefIdx = MAX_REF_PIC_COUNT - 1; + } + int32_t iRefCount = pSliceHeader->uiRefCount[listIdx]; + int32_t iPredFrameNum = pSliceHeader->iFrameNum; + int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum; + int32_t iAbsDiffPicNum = -1; + int32_t iReorderingIndex = 0; + int32_t i = 0; + + if (iRefCount <= 0) { + pCtx->iErrorCode = dsNoParamSets; //No any reference for decoding, SHOULD request IDR + return ERR_INFO_REFERENCE_PIC_LOST; + } + + if (pRefPicListReorderSyn->bRefPicListReorderingFlag[listIdx]) { + while ((iReorderingIndex < iMaxRefIdx) + && (pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiReorderingOfPicNumsIdc != 3)) { + uint16_t uiReorderingOfPicNumsIdc = + pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiReorderingOfPicNumsIdc; + if (uiReorderingOfPicNumsIdc < 2) { + iAbsDiffPicNum = pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiAbsDiffPicNumMinus1 + 1; + + if (uiReorderingOfPicNumsIdc == 0) { + iPredFrameNum -= iAbsDiffPicNum; + } else { + iPredFrameNum += iAbsDiffPicNum; + } + iPredFrameNum &= iMaxPicNum - 1; + + for (i = iMaxRefIdx - 1; i >= 0; i--) { + if (ppRefList[i] != NULL && ppRefList[i]->iFrameNum == iPredFrameNum && !ppRefList[i]->bIsLongRef) { + if ((pNalHeaderExt->uiQualityId == ppRefList[i]->uiQualityId) + && (pSliceHeader->iSpsId != ppRefList[i]->iSpsId)) { //check; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsReorderRefList()::::BASE LAYER::::iSpsId:%d, ref_sps_id:%d", + pSliceHeader->iSpsId, ppRefList[i]->iSpsId); + pCtx->iErrorCode = dsNoParamSets; //cross-IDR reference frame selection, SHOULD request IDR.-- + return ERR_INFO_REFERENCE_PIC_LOST; + } else { + break; + } + } + } + + } else if (uiReorderingOfPicNumsIdc == 2) { + for (i = iMaxRefIdx - 1; i >= 0; i--) { + if (ppRefList[i] != NULL && ppRefList[i]->bIsLongRef + && ppRefList[i]->iLongTermFrameIdx == + pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiLongTermPicNum) { + if ((pNalHeaderExt->uiQualityId == ppRefList[i]->uiQualityId) + && (pSliceHeader->iSpsId != ppRefList[i]->iSpsId)) { //check; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsReorderRefList()::::BASE LAYER::::iSpsId:%d, ref_sps_id:%d", + pSliceHeader->iSpsId, ppRefList[i]->iSpsId); + pCtx->iErrorCode = dsNoParamSets; //cross-IDR reference frame selection, SHOULD request IDR.-- + return ERR_INFO_REFERENCE_PIC_LOST; + } else { + break; + } + } + } + } + if (i < 0) { + return ERR_INFO_REFERENCE_PIC_LOST; + } + pPic = ppRefList[i]; + if (i > iReorderingIndex) { + memmove (&ppRefList[1 + iReorderingIndex], &ppRefList[iReorderingIndex], + (i - iReorderingIndex) * sizeof (PPicture)); //confirmed_safe_unsafe_usage + } else if (i < iReorderingIndex) { + memmove (&ppRefList[1 + iReorderingIndex], &ppRefList[iReorderingIndex], + (iMaxRefIdx - iReorderingIndex) * sizeof (PPicture)); + } + ppRefList[iReorderingIndex] = pPic; + iReorderingIndex++; + } + } + } + return ERR_NONE; +} + +//WelsReorderRefList2 is the test code +int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx) { + + if (pCtx->eSliceType == I_SLICE || pCtx->eSliceType == SI_SLICE) { + return ERR_NONE; + } + + PRefPicListReorderSyn pRefPicListReorderSyn = pCtx->pCurDqLayer->pRefPicListReordering; + PSliceHeader pSliceHeader = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader; + + PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0]; + int32_t iShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0]; + PPicture* ppLongRefList = pCtx->sRefPic.pLongRefList[LIST_0]; + int32_t iLongRefCount = pCtx->sRefPic.uiLongRefCount[LIST_0]; + int32_t i = 0; + int32_t j = 0; + int32_t k = 0; + int32_t iMaxRefIdx = pCtx->pSps->iNumRefFrames; + const int32_t iCurFrameNum = pSliceHeader->iFrameNum; + const int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum; + int32_t iListCount = 1; + if (pCtx->eSliceType == B_SLICE) iListCount = 2; + for (int32_t listIdx = 0; listIdx < iListCount; ++listIdx) { + PPicture* ppRefList = pCtx->sRefPic.pRefList[listIdx]; + int32_t iCount = 0; + int32_t iRefCount = pSliceHeader->uiRefCount[listIdx]; + int32_t iAbsDiffPicNum = -1; + + if (pRefPicListReorderSyn->bRefPicListReorderingFlag[listIdx]) { + int32_t iPredFrameNum = iCurFrameNum; + for (i = 0; pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiReorderingOfPicNumsIdc != 3; i++) { + if (iCount >= iMaxRefIdx) + break; + + for (j = iRefCount; j > iCount; j--) + ppRefList[j] = ppRefList[j - 1]; + + uint16_t uiReorderingOfPicNumsIdc = + pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiReorderingOfPicNumsIdc; + + if (uiReorderingOfPicNumsIdc < 2) { // reorder short references + iAbsDiffPicNum = (int32_t) (pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiAbsDiffPicNumMinus1 + 1); + if (uiReorderingOfPicNumsIdc == 0) { + if (iPredFrameNum - iAbsDiffPicNum < 0) + iPredFrameNum -= (iAbsDiffPicNum - iMaxPicNum); + else + iPredFrameNum -= iAbsDiffPicNum; + } else { + if (iPredFrameNum + iAbsDiffPicNum >= iMaxPicNum) + iPredFrameNum += (iAbsDiffPicNum - iMaxPicNum); + else + iPredFrameNum += iAbsDiffPicNum; + } + + if (iPredFrameNum > iCurFrameNum) { + iPredFrameNum -= iMaxPicNum; + } + + for (j = 0; j < iShortRefCount; j++) { + if (ppShoreRefList[j]) { + if (ppShoreRefList[j]->iFrameWrapNum == iPredFrameNum) { + ppRefList[iCount++] = ppShoreRefList[j]; + break; + } + } + } + k = iCount; + for (j = k; j <= iRefCount; j++) { + if (ppRefList[j] != NULL) { + if (ppRefList[j]->bIsLongRef || ppRefList[j]->iFrameWrapNum != iPredFrameNum) + ppRefList[k++] = ppRefList[j]; + } + } + } else { // reorder long term references uiReorderingOfPicNumsIdc == 2 + iPredFrameNum = pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiLongTermPicNum; + for (j = 0; j < iLongRefCount; j++) { + if (ppLongRefList[j] != NULL) { + if (ppLongRefList[j]->uiLongTermPicNum == (uint32_t)iPredFrameNum) { + ppRefList[iCount++] = ppLongRefList[j]; + break; + } + } + } + k = iCount; + for (j = k; j <= iRefCount; j++) { + if (ppRefList[j] != NULL) { + if (!ppRefList[j]->bIsLongRef || ppLongRefList[j]->uiLongTermPicNum != (uint32_t)iPredFrameNum) + ppRefList[k++] = ppRefList[j]; + } + } + } + } + } + + for (i = WELS_MAX (1, WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx])); i < iRefCount; i++) + ppRefList[i] = ppRefList[i - 1]; + pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]), + iRefCount); + } + return ERR_NONE; +} + +int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec) { + PPicture pDec = pLastDec; + bool isThreadCtx = true; + if (pDec == NULL) { + pDec = pCtx->pDec; + isThreadCtx = false; + } + PRefPic pRefPic = isThreadCtx ? &pCtx->sTmpRefPic : &pCtx->sRefPic; + PRefPicMarking pRefPicMarking = pCtx->pCurDqLayer->pRefPicMarking; + PAccessUnit pCurAU = pCtx->pAccessUnitList; + bool bIsIDRAU = false; + uint32_t j; + + int32_t iRet = ERR_NONE; + + pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId; + pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId; + pDec->iSpsId = pCtx->pSps->iSpsId; + pDec->iPpsId = pCtx->pPps->iPpsId; + + for (j = pCurAU->uiStartPos; j <= pCurAU->uiEndPos; j++) { + if (pCurAU->pNalUnitsList[j]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR + || pCurAU->pNalUnitsList[j]->sNalHeaderExt.bIdrFlag) { + bIsIDRAU = true; + break; + } + } + if (bIsIDRAU) { + if (pRefPicMarking->bLongTermRefFlag) { + pRefPic->iMaxLongTermFrameIdx = 0; + AddLongTermToList (pRefPic, pDec, 0, 0); + } else { + pRefPic->iMaxLongTermFrameIdx = -1; + } + } else { + if (pRefPicMarking->bAdaptiveRefPicMarkingModeFlag) { + iRet = MMCO (pCtx, pRefPic, pRefPicMarking); + if (iRet != ERR_NONE) { + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic); + WELS_VERIFY_RETURN_IF (iRet, iRet); + } else { + return iRet; + } + } + + if (pCtx->pLastDecPicInfo->bLastHasMmco5) { + pDec->iFrameNum = 0; + pDec->iFramePoc = 0; + } + + } else { + iRet = SlidingWindow (pCtx, pRefPic); + if (iRet != ERR_NONE) { + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic); + WELS_VERIFY_RETURN_IF (iRet, iRet); + } else { + return iRet; + } + } + } + } + + if (!pDec->bIsLongRef) { + if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) { + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic); + WELS_VERIFY_RETURN_IF (iRet, iRet); + } else { + return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW; + } + } + iRet = AddShortTermToList (pRefPic, pDec); + } + + return iRet; +} + +static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking) { + PSps pSps = pCtx->pCurDqLayer->sLayerInfo.pSps; + int32_t i = 0; + int32_t iRet = ERR_NONE; + for (i = 0; i < MAX_MMCO_COUNT && pRefPicMarking->sMmcoRef[i].uiMmcoType != MMCO_END; i++) { + uint32_t uiMmcoType = pRefPicMarking->sMmcoRef[i].uiMmcoType; + int32_t iShortFrameNum = (pCtx->iFrameNum - pRefPicMarking->sMmcoRef[i].iDiffOfPicNum) & (( + 1 << pSps->uiLog2MaxFrameNum) - 1); + uint32_t uiLongTermPicNum = pRefPicMarking->sMmcoRef[i].uiLongTermPicNum; + int32_t iLongTermFrameIdx = pRefPicMarking->sMmcoRef[i].iLongTermFrameIdx; + int32_t iMaxLongTermFrameIdx = pRefPicMarking->sMmcoRef[i].iMaxLongTermFrameIdx; + if (uiMmcoType > MMCO_LONG) { + return ERR_INFO_INVALID_MMCO_OPCODE_BASE; + } + iRet = MMCOProcess (pCtx, pRefPic, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx, + iMaxLongTermFrameIdx); + if (iRet != ERR_NONE) { + return iRet; + } + } + if (i == MAX_MMCO_COUNT) { //although Rec does not handle this condition, we here prohibit too many MMCO op + return ERR_INFO_INVALID_MMCO_NUM; + } + + return ERR_NONE; +} +static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType, + int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx) { + PPicture pPic = NULL; + int32_t i = 0; + int32_t iRet = ERR_NONE; + + switch (uiMmcoType) { + case MMCO_SHORT2UNUSED: + pPic = WelsDelShortFromListSetUnref (pRefPic, iShortFrameNum); + if (pPic == NULL) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_SHORT2UNUSED: delete an empty entry from short term list"); + } + break; + case MMCO_LONG2UNUSED: + pPic = WelsDelLongFromListSetUnref (pRefPic, uiLongTermPicNum); + if (pPic == NULL) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_LONG2UNUSED: delete an empty entry from long term list"); + } + break; + case MMCO_SHORT2LONG: + if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) { + return ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX; + } + pPic = WelsDelShortFromList (pRefPic, iShortFrameNum); + if (pPic == NULL) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_LONG2LONG: delete an empty entry from short term list"); + break; + } + WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx); +#ifdef LONG_TERM_REF + pCtx->bCurAuContainLtrMarkSeFlag = true; + pCtx->iFrameNumOfAuMarkedLtr = iShortFrameNum; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "ex_mark_avc():::MMCO_SHORT2LONG:::LTR marking....iFrameNum: %d", + pCtx->iFrameNumOfAuMarkedLtr); +#endif + + MarkAsLongTerm (pRefPic, iShortFrameNum, iLongTermFrameIdx, uiLongTermPicNum); + break; + case MMCO_SET_MAX_LONG: + pRefPic->iMaxLongTermFrameIdx = iMaxLongTermFrameIdx; + for (i = 0 ; i < pRefPic->uiLongRefCount[LIST_0]; i++) { + if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) { + WelsDelLongFromListSetUnref (pRefPic, pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx); + } + } + break; + case MMCO_RESET: + WelsResetRefPic (pCtx); + pCtx->pLastDecPicInfo->bLastHasMmco5 = true; + break; + case MMCO_LONG: + if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) { + return ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX; + } + WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx); + if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) { + return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW; + } +#ifdef LONG_TERM_REF + pCtx->bCurAuContainLtrMarkSeFlag = true; + pCtx->iFrameNumOfAuMarkedLtr = pCtx->iFrameNum; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "ex_mark_avc():::MMCO_LONG:::LTR marking....iFrameNum: %d", + pCtx->iFrameNum); +#endif + iRet = AddLongTermToList (pRefPic, pCtx->pDec, iLongTermFrameIdx, uiLongTermPicNum); + break; + default : + break; + } + + return iRet; +} + +static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic) { + PPicture pPic = NULL; + int32_t i = 0; + + if (pRefPic->uiShortRefCount[LIST_0] + pRefPic->uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) { + if (pRefPic->uiShortRefCount[LIST_0] == 0) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "No reference picture in short term list when sliding window"); + return ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH; + } + for (i = pRefPic->uiShortRefCount[LIST_0] - 1; i >= 0; i--) { + pPic = WelsDelShortFromList (pRefPic, pRefPic->pShortRefList[LIST_0][i]->iFrameNum); + if (pPic) { + SetUnRef (pPic); + break; + } else { + return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW; + } + } + } + return ERR_NONE; +} + +static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) { + int32_t i = 0; + int32_t iMoveSize = 0; + PPicture pPic = NULL; + + for (i = 0; i < pRefPic->uiShortRefCount[LIST_0]; i++) { + if (pRefPic->pShortRefList[LIST_0][i]->iFrameNum == iFrameNum) { + iMoveSize = pRefPic->uiShortRefCount[LIST_0] - i - 1; + pPic = pRefPic->pShortRefList[LIST_0][i]; + pPic->bUsedAsRef = false; + pRefPic->pShortRefList[LIST_0][i] = NULL; + if (iMoveSize > 0) { + memmove (&pRefPic->pShortRefList[LIST_0][i], &pRefPic->pShortRefList[LIST_0][i + 1], + iMoveSize * sizeof (PPicture)); //confirmed_safe_unsafe_usage + } + pRefPic->uiShortRefCount[LIST_0]--; + pRefPic->pShortRefList[LIST_0][pRefPic->uiShortRefCount[LIST_0]] = NULL; + break; + } + } + return pPic; +} + +static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum) { + PPicture pPic = WelsDelShortFromList (pRefPic, iFrameNum); + if (pPic) { + SetUnRef (pPic); + } + return pPic; +} + +static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameIdx) { + PPicture pPic = NULL; + int32_t i = 0; + for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) { + pPic = pRefPic->pLongRefList[LIST_0][i]; + if (pPic->iLongTermFrameIdx == (int32_t)uiLongTermFrameIdx) { + int32_t iMoveSize = pRefPic->uiLongRefCount[LIST_0] - i - 1; + pPic->bUsedAsRef = false; + pPic->bIsLongRef = false; + if (iMoveSize > 0) { + memmove (&pRefPic->pLongRefList[LIST_0][i], &pRefPic->pLongRefList[LIST_0][i + 1], + iMoveSize * sizeof (PPicture)); //confirmed_safe_unsafe_usage + } + pRefPic->uiLongRefCount[LIST_0]--; + pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = NULL; + return pPic; + } + } + return NULL; +} + +static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx) { + PPicture pPic = WelsDelLongFromList (pRefPic, uiLongTermFrameIdx); + if (pPic) { + SetUnRef (pPic); + } + return pPic; +} + +static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic) { + pPic->bUsedAsRef = true; + pPic->bIsLongRef = false; + pPic->iLongTermFrameIdx = -1; + if (pRefPic->uiShortRefCount[LIST_0] > 0) { + // Check the duplicate frame_num in short ref list + for (int32_t iPos = 0; iPos < pRefPic->uiShortRefCount[LIST_0]; iPos++) { + if (!pRefPic->pShortRefList[LIST_0][iPos]) { + return ERR_INFO_INVALID_PTR; + } + if (pPic->iFrameNum == pRefPic->pShortRefList[LIST_0][iPos]->iFrameNum) { + // Replace the previous ref pic with the new one with the same frame_num + pRefPic->pShortRefList[LIST_0][iPos] = pPic; + return ERR_INFO_DUPLICATE_FRAME_NUM; + } + } + + memmove (&pRefPic->pShortRefList[LIST_0][1], &pRefPic->pShortRefList[LIST_0][0], + pRefPic->uiShortRefCount[LIST_0]*sizeof (PPicture));//confirmed_safe_unsafe_usage + } + pRefPic->pShortRefList[LIST_0][0] = pPic; + pRefPic->uiShortRefCount[LIST_0]++; + return ERR_NONE; +} + +static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx, + uint32_t uiLongTermPicNum) { + int32_t i = 0; + + pPic->bUsedAsRef = true; + pPic->bIsLongRef = true; + pPic->iLongTermFrameIdx = iLongTermFrameIdx; + pPic->uiLongTermPicNum = uiLongTermPicNum; + if (pRefPic->uiLongRefCount[LIST_0] == 0) { + pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = pPic; + } else { + for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) { + if (!pRefPic->pLongRefList[LIST_0][i]) { + return ERR_INFO_INVALID_PTR; + } + if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pPic->iLongTermFrameIdx) { + break; + } + } + memmove (&pRefPic->pLongRefList[LIST_0][i + 1], &pRefPic->pLongRefList[LIST_0][i], + (pRefPic->uiLongRefCount[LIST_0] - i)*sizeof (PPicture)); //confirmed_safe_unsafe_usage + pRefPic->pLongRefList[LIST_0][i] = pPic; + } + + pRefPic->uiLongRefCount[LIST_0]++; + return ERR_NONE; +} + +static int32_t MarkAsLongTerm (PRefPic pRefPic, int32_t iFrameNum, int32_t iLongTermFrameIdx, + uint32_t uiLongTermPicNum) { + PPicture pPic = NULL; + int32_t i = 0; + int32_t iRet = ERR_NONE; + WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx); + + for (i = 0; i < pRefPic->uiRefCount[LIST_0]; i++) { + pPic = pRefPic->pRefList[LIST_0][i]; + if (pPic->iFrameNum == iFrameNum && !pPic->bIsLongRef) { + iRet = AddLongTermToList (pRefPic, pPic, iLongTermFrameIdx, uiLongTermPicNum); + break; + } + } + + return iRet; +} + +#ifdef LONG_TERM_REF +int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum) { + int32_t iLTRFrameIndex = -1; + PPicture pPic; + for (int i = 0; i < pRefPic->uiLongRefCount[0]; ++i) { + pPic = pRefPic->pLongRefList[LIST_0][i]; + if (pPic->iFrameNum == iAncLTRFrameNum) { + return (pPic->iLongTermFrameIdx); + } + } + return iLTRFrameIndex; +} +#endif + +static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic) { + int32_t iRet = ERR_NONE; + if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] < pCtx->pSps->iNumRefFrames) + return iRet; + + if (pRefPic->uiShortRefCount[0] > 0) { + iRet = SlidingWindow (pCtx, pRefPic); + } else { //all LTR, remove the smallest long_term_frame_idx + int32_t iLongTermFrameIdx = 0; + int32_t iMaxLongTermFrameIdx = pRefPic->iMaxLongTermFrameIdx; +#ifdef LONG_TERM_REF + int32_t iCurrLTRFrameIdx = GetLTRFrameIndex (pRefPic, pCtx->iFrameNumOfAuMarkedLtr); +#endif + while ((pRefPic->uiLongRefCount[0] >= pCtx->pSps->iNumRefFrames) && (iLongTermFrameIdx <= iMaxLongTermFrameIdx)) { +#ifdef LONG_TERM_REF + if (iLongTermFrameIdx == iCurrLTRFrameIdx) { + iLongTermFrameIdx++; + continue; + } +#endif + WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx); + iLongTermFrameIdx++; + } + } + if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] >= + pCtx->pSps->iNumRefFrames) { //fail to remain one empty buffer in DPB + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "RemainOneBufferInDpbForEC(): empty one DPB failed for EC!"); + iRet = ERR_INFO_REF_COUNT_OVERFLOW; + } + + return iRet; +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/memmgr_nal_unit.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/memmgr_nal_unit.cpp new file mode 100644 index 000000000..c274b71a6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/memmgr_nal_unit.cpp @@ -0,0 +1,148 @@ +/*! + * \copy + * Copyright (c) 2008-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * memmgr_nal_unit.c + * + * Abstract + * memory manager utils for NAL Unit list available + * + * History + * 07/10/2008 Created + * + *****************************************************************************/ +#include "memmgr_nal_unit.h" +#include "memory_align.h" +#include "error_code.h" + +namespace WelsDec { + +int32_t MemInitNalList (PAccessUnit* ppAu, const uint32_t kuiSize, CMemoryAlign* pMa) { + uint32_t uiIdx = 0; + uint8_t* pBase = NULL, *pPtr = NULL; + const uint32_t kuiSizeAu = sizeof (SAccessUnit); + const uint32_t kuiSizeNalUnitPtr = kuiSize * sizeof (PNalUnit); + const uint32_t kuiSizeNalUnit = sizeof (SNalUnit); + const uint32_t kuiCountSize = (kuiSizeAu + kuiSizeNalUnitPtr + kuiSize * kuiSizeNalUnit) * sizeof (uint8_t); + + if (kuiSize == 0) + return ERR_INFO_INVALID_PARAM; + + if (*ppAu != NULL) { + MemFreeNalList (ppAu, pMa); + } + + pBase = (uint8_t*)pMa->WelsMallocz (kuiCountSize, "Access Unit"); + if (pBase == NULL) + return ERR_INFO_OUT_OF_MEMORY; + pPtr = pBase; + *ppAu = (PAccessUnit)pPtr; + pPtr += kuiSizeAu; + (*ppAu)->pNalUnitsList = (PNalUnit*)pPtr; + pPtr += kuiSizeNalUnitPtr; + do { + (*ppAu)->pNalUnitsList[uiIdx] = (PNalUnit)pPtr; + pPtr += kuiSizeNalUnit; + ++ uiIdx; + } while (uiIdx < kuiSize); + + (*ppAu)->uiCountUnitsNum = kuiSize; + (*ppAu)->uiAvailUnitsNum = 0; + (*ppAu)->uiActualUnitsNum = 0; + (*ppAu)->uiStartPos = 0; + (*ppAu)->uiEndPos = 0; + (*ppAu)->bCompletedAuFlag = false; + + return ERR_NONE; +} + +int32_t MemFreeNalList (PAccessUnit* ppAu, CMemoryAlign* pMa) { + if (ppAu != NULL) { + PAccessUnit pAu = *ppAu; + if (pAu != NULL) { + pMa->WelsFree (pAu, "Access Unit"); + *ppAu = NULL; + } + } + return ERR_NONE; +} + + +int32_t ExpandNalUnitList (PAccessUnit* ppAu, const int32_t kiOrgSize, const int32_t kiExpSize, CMemoryAlign* pMa) { + if (kiExpSize <= kiOrgSize) + return ERR_INFO_INVALID_PARAM; + else { + PAccessUnit pTmp = NULL; + int32_t iIdx = 0; + int32_t iRet = ERR_NONE; + if ((iRet = MemInitNalList (&pTmp, kiExpSize, pMa)) != ERR_NONE) // request new list with expanding + return iRet; + + do { + memcpy (pTmp->pNalUnitsList[iIdx], (*ppAu)->pNalUnitsList[iIdx], sizeof (SNalUnit)); //confirmed_safe_unsafe_usage + ++ iIdx; + } while (iIdx < kiOrgSize); + + pTmp->uiCountUnitsNum = kiExpSize; + pTmp->uiAvailUnitsNum = (*ppAu)->uiAvailUnitsNum; + pTmp->uiActualUnitsNum = (*ppAu)->uiActualUnitsNum; + pTmp->uiEndPos = (*ppAu)->uiEndPos; + pTmp->bCompletedAuFlag = (*ppAu)->bCompletedAuFlag; + + MemFreeNalList (ppAu, pMa); // free old list + *ppAu = pTmp; + return ERR_NONE; + } +} + +/* + * MemGetNextNal + * Get next NAL Unit for using. + * Need expand NAL Unit list if exceeding count number of available NAL Units withing an Access Unit + */ +PNalUnit MemGetNextNal (PAccessUnit* ppAu, CMemoryAlign* pMa) { + PAccessUnit pAu = *ppAu; + PNalUnit pNu = NULL; + + if (pAu->uiAvailUnitsNum >= pAu->uiCountUnitsNum) { // need expand list + const uint32_t kuiExpandingSize = pAu->uiCountUnitsNum + (MAX_NAL_UNIT_NUM_IN_AU >> 1); + if (ExpandNalUnitList (ppAu, pAu->uiCountUnitsNum, kuiExpandingSize, pMa)) + return NULL; // out of memory + pAu = *ppAu; + } + + pNu = pAu->pNalUnitsList[pAu->uiAvailUnitsNum++]; // ready for next nal position + + memset (pNu, 0, sizeof (SNalUnit)); // Please do not remove this for cache intend!! + + return pNu; +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp new file mode 100644 index 000000000..642a982d9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp @@ -0,0 +1,1183 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file mv_pred.c + * + * \brief Get MV predictor and update motion vector of mb cache + * + * \date 05/22/2009 Created + * + ************************************************************************************* + */ + +#include "mv_pred.h" +#include "ls_defines.h" +#include "mb_cache.h" +#include "parse_mb_syn_cabac.h" + +namespace WelsDec { + +static inline void SetRectBlock (void* vp, int32_t w, const int32_t h, int32_t stride, const uint32_t val, + const int32_t size) { + uint8_t* p = (uint8_t*)vp; + w *= size; + if (w == 1 && h == 4) { + * (uint8_t*) (p + 0 * stride) = + * (uint8_t*) (p + 1 * stride) = + * (uint8_t*) (p + 2 * stride) = + * (uint8_t*) (p + 3 * stride) = (uint8_t)val; + } else if (w == 2 && h == 2) { + * (uint16_t*) (p + 0 * stride) = + * (uint16_t*) (p + 1 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U); + } else if (w == 2 && h == 4) { + * (uint16_t*) (p + 0 * stride) = + * (uint16_t*) (p + 1 * stride) = + * (uint16_t*) (p + 2 * stride) = + * (uint16_t*) (p + 3 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U); + } else if (w == 4 && h == 2) { + * (uint32_t*) (p + 0 * stride) = + * (uint32_t*) (p + 1 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } else if (w == 4 && h == 4) { + * (uint32_t*) (p + 0 * stride) = + * (uint32_t*) (p + 1 * stride) = + * (uint32_t*) (p + 2 * stride) = + * (uint32_t*) (p + 3 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } else if (w == 8 && h == 1) { + * (uint32_t*) (p + 0 * stride) = + * (uint32_t*) (p + 0 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } else if (w == 8 && h == 2) { + * (uint32_t*) (p + 0 * stride) = + * (uint32_t*) (p + 0 * stride + 4) = + * (uint32_t*) (p + 1 * stride) = + * (uint32_t*) (p + 1 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } else if (w == 8 && h == 4) { + * (uint32_t*) (p + 0 * stride) = + * (uint32_t*) (p + 0 * stride + 4) = + * (uint32_t*) (p + 1 * stride) = + * (uint32_t*) (p + 1 * stride + 4) = + * (uint32_t*) (p + 2 * stride) = + * (uint32_t*) (p + 2 * stride + 4) = + * (uint32_t*) (p + 3 * stride) = + * (uint32_t*) (p + 3 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } else if (w == 16 && h == 2) { + * (uint32_t*) (p + 0 * stride + 0) = + * (uint32_t*) (p + 0 * stride + 4) = + * (uint32_t*) (p + 0 * stride + 8) = + * (uint32_t*) (p + 0 * stride + 12) = + * (uint32_t*) (p + 1 * stride + 0) = + * (uint32_t*) (p + 1 * stride + 4) = + * (uint32_t*) (p + 1 * stride + 8) = + * (uint32_t*) (p + 1 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } else if (w == 16 && h == 3) { + * (uint32_t*) (p + 0 * stride + 0) = + * (uint32_t*) (p + 0 * stride + 4) = + * (uint32_t*) (p + 0 * stride + 8) = + * (uint32_t*) (p + 0 * stride + 12) = + * (uint32_t*) (p + 1 * stride + 0) = + * (uint32_t*) (p + 1 * stride + 4) = + * (uint32_t*) (p + 1 * stride + 8) = + * (uint32_t*) (p + 1 * stride + 12) = + * (uint32_t*) (p + 2 * stride + 0) = + * (uint32_t*) (p + 2 * stride + 4) = + * (uint32_t*) (p + 2 * stride + 8) = + * (uint32_t*) (p + 2 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } else if (w == 16 && h == 4) { + * (uint32_t*) (p + 0 * stride + 0) = + * (uint32_t*) (p + 0 * stride + 4) = + * (uint32_t*) (p + 0 * stride + 8) = + * (uint32_t*) (p + 0 * stride + 12) = + * (uint32_t*) (p + 1 * stride + 0) = + * (uint32_t*) (p + 1 * stride + 4) = + * (uint32_t*) (p + 1 * stride + 8) = + * (uint32_t*) (p + 1 * stride + 12) = + * (uint32_t*) (p + 2 * stride + 0) = + * (uint32_t*) (p + 2 * stride + 4) = + * (uint32_t*) (p + 2 * stride + 8) = + * (uint32_t*) (p + 2 * stride + 12) = + * (uint32_t*) (p + 3 * stride + 0) = + * (uint32_t*) (p + 3 * stride + 4) = + * (uint32_t*) (p + 3 * stride + 8) = + * (uint32_t*) (p + 3 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL); + } +} +void CopyRectBlock4Cols (void* vdst, void* vsrc, const int32_t stride_dst, const int32_t stride_src, int32_t w, + const int32_t size) { + uint8_t* dst = (uint8_t*)vdst; + uint8_t* src = (uint8_t*)vsrc; + w *= size; + if (w == 1) { + dst[stride_dst * 0] = src[stride_src * 0]; + dst[stride_dst * 1] = src[stride_src * 1]; + dst[stride_dst * 2] = src[stride_src * 2]; + dst[stride_dst * 3] = src[stride_src * 3]; + } else if (w == 2) { + * (uint16_t*) (&dst[stride_dst * 0]) = * (uint16_t*) (&src[stride_src * 0]); + * (uint16_t*) (&dst[stride_dst * 1]) = * (uint16_t*) (&src[stride_src * 1]); + * (uint16_t*) (&dst[stride_dst * 2]) = * (uint16_t*) (&src[stride_src * 2]); + * (uint16_t*) (&dst[stride_dst * 3]) = * (uint16_t*) (&src[stride_src * 3]); + } else if (w == 4) { + * (uint32_t*) (&dst[stride_dst * 0]) = * (uint32_t*) (&src[stride_src * 0]); + * (uint32_t*) (&dst[stride_dst * 1]) = * (uint32_t*) (&src[stride_src * 1]); + * (uint32_t*) (&dst[stride_dst * 2]) = * (uint32_t*) (&src[stride_src * 2]); + * (uint32_t*) (&dst[stride_dst * 3]) = * (uint32_t*) (&src[stride_src * 3]); + } else if (w == 16) { + memcpy (&dst[stride_dst * 0], &src[stride_src * 0], 16); + memcpy (&dst[stride_dst * 1], &src[stride_src * 1], 16); + memcpy (&dst[stride_dst * 2], &src[stride_src * 2], 16); + memcpy (&dst[stride_dst * 3], &src[stride_src * 3], 16); + } +} +void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]) { + bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail; + + int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc; + int32_t iLeftTopType, iRightTopType, iTopType, iLeftType; + int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0; + + int8_t iLeftRef; + int8_t iTopRef; + int8_t iRightTopRef; + int8_t iLeftTopRef; + int8_t iDiagonalRef; + int8_t iMatchRef; + int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2]; + + iCurXy = pCurDqLayer->iMbXyIndex; + iCurX = pCurDqLayer->iMbX; + iCurY = pCurDqLayer->iMbY; + iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy]; + + if (iCurX != 0) { + iLeftXy = iCurXy - 1; + iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy]; + bLeftAvail = (iLeftSliceIdc == iCurSliceIdc); + } else { + bLeftAvail = 0; + bLeftTopAvail = 0; + } + + if (iCurY != 0) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy]; + bTopAvail = (iTopSliceIdc == iCurSliceIdc); + if (iCurX != 0) { + iLeftTopXy = iTopXy - 1; + iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy]; + bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); + } else { + bLeftTopAvail = 0; + } + if (iCurX != (pCurDqLayer->iMbWidth - 1)) { + iRightTopXy = iTopXy + 1; + iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy]; + bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); + } else { + bRightTopAvail = 0; + } + } else { + bTopAvail = 0; + bLeftTopAvail = 0; + bRightTopAvail = 0; + } + + iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0); + iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0); + iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail) + ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0); + iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) + ? GetMbType (pCurDqLayer)[iRightTopXy] : 0); + + /*get neb mv&iRefIdxArray*/ + /*left*/ + if (bLeftAvail && IS_INTER (iLeftType)) { + ST32 (iMvA, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftXy][3] : pCurDqLayer->pMv[0][iLeftXy][3])); + iLeftRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftXy][3] : pCurDqLayer->pRefIndex[0][iLeftXy][3]; + } else { + ST32 (iMvA, 0); + if (0 == bLeftAvail) { //not available + iLeftRef = REF_NOT_AVAIL; + } else { //available but is intra mb type + iLeftRef = REF_NOT_IN_LIST; + } + } + if (REF_NOT_AVAIL == iLeftRef || + (0 == iLeftRef && 0 == * (int32_t*)iMvA)) { + ST32 (iMvp, 0); + return; + } + + /*top*/ + if (bTopAvail && IS_INTER (iTopType)) { + ST32 (iMvB, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iTopXy][12] : pCurDqLayer->pMv[0][iTopXy][12])); + iTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iTopXy][12] : pCurDqLayer->pRefIndex[0][iTopXy][12]; + } else { + ST32 (iMvB, 0); + if (0 == bTopAvail) { //not available + iTopRef = REF_NOT_AVAIL; + } else { //available but is intra mb type + iTopRef = REF_NOT_IN_LIST; + } + } + if (REF_NOT_AVAIL == iTopRef || + (0 == iTopRef && 0 == * (int32_t*)iMvB)) { + ST32 (iMvp, 0); + return; + } + + /*right_top*/ + if (bRightTopAvail && IS_INTER (iRightTopType)) { + ST32 (iMvC, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iRightTopXy][12] : + pCurDqLayer->pMv[0][iRightTopXy][12])); + iRightTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iRightTopXy][12] : + pCurDqLayer->pRefIndex[0][iRightTopXy][12]; + } else { + ST32 (iMvC, 0); + if (0 == bRightTopAvail) { //not available + iRightTopRef = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRightTopRef = REF_NOT_IN_LIST; + } + } + + /*left_top*/ + if (bLeftTopAvail && IS_INTER (iLeftTopType)) { + ST32 (iMvD, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftTopXy][15] : pCurDqLayer->pMv[0][iLeftTopXy][15])); + iLeftTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftTopXy][15] : + pCurDqLayer->pRefIndex[0][iLeftTopXy][15]; + } else { + ST32 (iMvD, 0); + if (0 == bLeftTopAvail) { //not available + iLeftTopRef = REF_NOT_AVAIL; + } else { //available but is intra mb type + iLeftTopRef = REF_NOT_IN_LIST; + } + } + + iDiagonalRef = iRightTopRef; + if (REF_NOT_AVAIL == iDiagonalRef) { + iDiagonalRef = iLeftTopRef; + * (int32_t*)iMvC = * (int32_t*)iMvD; + } + + if (REF_NOT_AVAIL == iTopRef && REF_NOT_AVAIL == iDiagonalRef && iLeftRef >= REF_NOT_IN_LIST) { + ST32 (iMvp, LD32 (iMvA)); + return; + } + + iMatchRef = (0 == iLeftRef) + (0 == iTopRef) + (0 == iDiagonalRef); + if (1 == iMatchRef) { + if (0 == iLeftRef) { + ST32 (iMvp, LD32 (iMvA)); + } else if (0 == iTopRef) { + ST32 (iMvp, LD32 (iMvB)); + } else { + ST32 (iMvp, LD32 (iMvC)); + } + } else { + iMvp[0] = WelsMedian (iMvA[0], iMvB[0], iMvC[0]); + iMvp[1] = WelsMedian (iMvA[1], iMvB[1], iMvC[1]); + } +} + +int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + uint32_t is8x8 = IS_Inter_8x8 (GetMbType (pCurDqLayer)[iMbXy]); + mbType = GetMbType (pCurDqLayer)[iMbXy]; + + PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0]; + if (GetThreadCount (pCtx) > 1) { + if (16 * pCurDqLayer->iMbY > pCtx->lastReadyHeightOffset[1][0]) { + if (colocPic->pReadyEvent[pCurDqLayer->iMbY].isSignaled != 1) { + WAIT_EVENT (&colocPic->pReadyEvent[pCurDqLayer->iMbY], WELS_DEC_THREAD_WAIT_INFINITE); + } + pCtx->lastReadyHeightOffset[1][0] = 16 * pCurDqLayer->iMbY; + } + } + + if (colocPic == NULL) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + + MbType coloc_mbType = colocPic->pMbType[iMbXy]; + if (coloc_mbType == MB_TYPE_SKIP) { + //This indicates the colocated MB is P SKIP MB + coloc_mbType |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0; + } + if (IS_Inter_8x8 (coloc_mbType) && !pCtx->pSps->bDirect8x8InferenceFlag) { + subMbType = SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT; + mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1; + } else if (!is8x8 && (IS_INTER_16x16 (coloc_mbType) || IS_INTRA (coloc_mbType)/* || IS_SKIP(coloc_mbType)*/)) { + subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT; + mbType |= MB_TYPE_16x16 | MB_TYPE_L0 | MB_TYPE_L1; + } else { + subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT; + mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1; + } + + if (IS_INTRA (coloc_mbType)) { + SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t)); + return ERR_NONE; + } + SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t)); + + if (IS_INTER_16x16 (mbType)) { + int16_t iMVZero[2] = { 0 }; + int16_t* pMv = IS_TYPE_L1 (coloc_mbType) ? colocPic->pMv[LIST_1][iMbXy][0] : iMVZero; + ST32 (pCurDqLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0])); + ST32 (pCurDqLayer->iColocMv[LIST_1][0], LD32 (pMv)); + pCurDqLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0]; + pCurDqLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] : + REF_NOT_IN_LIST; + } else { + if (!pCtx->pSps->bDirect8x8InferenceFlag) { + CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4); + CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1); + if (IS_TYPE_L1 (coloc_mbType)) { + CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4); + CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1); + } else { // only forward prediction + SetRectBlock (pCurDqLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1); + } + } else { + for (int32_t listIdx = 0; listIdx < 1 + !! (coloc_mbType & MB_TYPE_L1); listIdx++) { + SetRectBlock (pCurDqLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4); + SetRectBlock (pCurDqLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4); + SetRectBlock (pCurDqLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4); + SetRectBlock (pCurDqLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4); + + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1); + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1); + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1); + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1); + } + if (! (coloc_mbType & MB_TYPE_L1)) // only forward prediction + SetRectBlock (&pCurDqLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1); + } + } + return ERR_NONE; +} + +int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A], + SubMbType& subMbType) { + + int32_t ret = ERR_NONE; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0; + + MbType mbType; + ret = GetColocatedMb (pCtx, mbType, subMbType); + if (ret != ERR_NONE) { + return ret; + } + + bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail; + int32_t iLeftTopType, iRightTopType, iTopType, iLeftType; + int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc; + int32_t iCurX, iCurY, iCurXy, iLeftXy = 0, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0; + + int8_t iLeftRef[LIST_A]; + int8_t iTopRef[LIST_A]; + int8_t iRightTopRef[LIST_A]; + int8_t iLeftTopRef[LIST_A]; + int8_t iDiagonalRef[LIST_A]; + int16_t iMvA[LIST_A][2], iMvB[LIST_A][2], iMvC[LIST_A][2], iMvD[LIST_A][2]; + + iCurXy = pCurDqLayer->iMbXyIndex; + + iCurX = pCurDqLayer->iMbX; + iCurY = pCurDqLayer->iMbY; + iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy]; + + if (iCurX != 0) { + iLeftXy = iCurXy - 1; + iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy]; + bLeftAvail = (iLeftSliceIdc == iCurSliceIdc); + } else { + bLeftAvail = 0; + bLeftTopAvail = 0; + } + + if (iCurY != 0) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy]; + bTopAvail = (iTopSliceIdc == iCurSliceIdc); + if (iCurX != 0) { + iLeftTopXy = iTopXy - 1; + iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy]; + bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); + } else { + bLeftTopAvail = 0; + } + if (iCurX != (pCurDqLayer->iMbWidth - 1)) { + iRightTopXy = iTopXy + 1; + iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy]; + bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); + } else { + bRightTopAvail = 0; + } + } else { + bTopAvail = 0; + bLeftTopAvail = 0; + bRightTopAvail = 0; + } + + iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0); + iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0); + iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail) + ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0); + iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) + ? GetMbType (pCurDqLayer)[iRightTopXy] : 0); + + /*get neb mv&iRefIdxArray*/ + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + + /*left*/ + if (bLeftAvail && IS_INTER (iLeftType)) { + ST32 (iMvA[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3] : + pCurDqLayer->pMv[listIdx][iLeftXy][3])); + iLeftRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3] : + pCurDqLayer->pRefIndex[listIdx][iLeftXy][3]; + } else { + ST32 (iMvA[listIdx], 0); + if (0 == bLeftAvail) { //not available + iLeftRef[listIdx] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iLeftRef[listIdx] = REF_NOT_IN_LIST; + } + } + + /*top*/ + if (bTopAvail && IS_INTER (iTopType)) { + ST32 (iMvB[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iTopXy][12] : + pCurDqLayer->pMv[listIdx][iTopXy][12])); + iTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12] : + pCurDqLayer->pRefIndex[listIdx][iTopXy][12]; + } else { + ST32 (iMvB[listIdx], 0); + if (0 == bTopAvail) { //not available + iTopRef[listIdx] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iTopRef[listIdx] = REF_NOT_IN_LIST; + } + } + + /*right_top*/ + if (bRightTopAvail && IS_INTER (iRightTopType)) { + ST32 (iMvC[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12] : + pCurDqLayer->pMv[listIdx][iRightTopXy][12])); + iRightTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12] : + pCurDqLayer->pRefIndex[listIdx][iRightTopXy][12]; + } else { + ST32 (iMvC[listIdx], 0); + if (0 == bRightTopAvail) { //not available + iRightTopRef[listIdx] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRightTopRef[listIdx] = REF_NOT_IN_LIST; + } + } + /*left_top*/ + if (bLeftTopAvail && IS_INTER (iLeftTopType)) { + ST32 (iMvD[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15] : + pCurDqLayer->pMv[listIdx][iLeftTopXy][15])); + iLeftTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15] : + pCurDqLayer->pRefIndex[listIdx][iLeftTopXy][15]; + } else { + ST32 (iMvD[listIdx], 0); + if (0 == bLeftTopAvail) { //not available + iLeftTopRef[listIdx] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iLeftTopRef[listIdx] = REF_NOT_IN_LIST; + } + } + + iDiagonalRef[listIdx] = iRightTopRef[listIdx]; + if (REF_NOT_AVAIL == iDiagonalRef[listIdx]) { + iDiagonalRef[listIdx] = iLeftTopRef[listIdx]; + ST32 (iMvC[listIdx], LD32 (iMvD[listIdx])); + } + + int8_t ref_temp = WELS_MIN_POSITIVE (iTopRef[listIdx], iDiagonalRef[listIdx]); + ref[listIdx] = WELS_MIN_POSITIVE (iLeftRef[listIdx], ref_temp); + if (ref[listIdx] >= 0) { + + uint32_t match_count = (iLeftRef[listIdx] == ref[listIdx]) + (iTopRef[listIdx] == ref[listIdx]) + + (iDiagonalRef[listIdx] == ref[listIdx]); + if (match_count == 1) { + if (iLeftRef[listIdx] == ref[listIdx]) { + ST32 (iMvp[listIdx], LD32 (iMvA[listIdx])); + } else if (iTopRef[listIdx] == ref[listIdx]) { + ST32 (iMvp[listIdx], LD32 (iMvB[listIdx])); + } else { + ST32 (iMvp[listIdx], LD32 (iMvC[listIdx])); + } + } else { + iMvp[listIdx][0] = WelsMedian (iMvA[listIdx][0], iMvB[listIdx][0], iMvC[listIdx][0]); + iMvp[listIdx][1] = WelsMedian (iMvA[listIdx][1], iMvB[listIdx][1], iMvC[listIdx][1]); + } + } else { + iMvp[listIdx][0] = 0; + iMvp[listIdx][1] = 0; + ref[listIdx] = REF_NOT_IN_LIST; + } + } + if (ref[LIST_0] <= REF_NOT_IN_LIST && ref[LIST_1] <= REF_NOT_IN_LIST) { + ref[LIST_0] = ref[LIST_1] = 0; + } else if (ref[LIST_1] < 0) { + mbType &= ~MB_TYPE_L1; + subMbType &= ~MB_TYPE_L1; + } else if (ref[LIST_0] < 0) { + mbType &= ~MB_TYPE_L0; + subMbType &= ~MB_TYPE_L0; + } + GetMbType (pCurDqLayer)[iMbXy] = mbType; + + int16_t pMvd[4] = { 0 }; + + bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef; + + if (IS_INTER_16x16 (mbType)) { + if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) { + if (0 == pCurDqLayer->iColocIntra[0] && !bIsLongRef + && ((pCurDqLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][0] + 1) <= 2 + && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][1] + 1) <= 2) + || (pCurDqLayer->iColocRefIndex[LIST_0][0] < 0 && pCurDqLayer->iColocRefIndex[LIST_1][0] == 0 + && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][0] + 1) <= 2 + && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) { + if (0 >= ref[0]) * (uint32_t*)iMvp[LIST_0] = 0; + if (0 >= ref[1]) * (uint32_t*)iMvp[LIST_1] = 0; + } + } + UpdateP16x16DirectCabac (pCurDqLayer); + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref[listIdx], iMvp[listIdx]); + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx); + } + } else { + if (bSkipOrDirect) { + int8_t pSubPartCount[4], pPartW[4]; + for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv + int16_t iIdx8 = i << 2; + pCurDqLayer->pSubMbType[iMbXy][i] = subMbType; + int8_t pRefIndex[LIST_A][30]; + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1); + UpdateP8x8DirectCabac (pCurDqLayer, iIdx8); + + pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount; + pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth; + + if (IS_SUB_4x4 (subMbType)) { + pSubPartCount[i] = 4; + pPartW[i] = 1; + } + FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, bIsLongRef, iMvp, ref, NULL, NULL); + } + } + } + return ret; +} + +int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A], + SubMbType& subMbType) { + int32_t ret = ERR_NONE; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0; + + MbType mbType; + ret = GetColocatedMb (pCtx, mbType, subMbType); + if (ret != ERR_NONE) { + return ret; + } + + GetMbType (pCurDqLayer)[iMbXy] = mbType; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + int16_t pMvd[4] = { 0 }; + const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]); + if (IS_INTER_16x16 (mbType)) { + ref[LIST_0] = 0; + ref[LIST_1] = 0; + UpdateP16x16DirectCabac (pCurDqLayer); + UpdateP16x16RefIdx (pCurDqLayer, LIST_1, ref[LIST_1]); + ST64 (iMvp, 0); + if (pCurDqLayer->iColocIntra[0]) { + UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]); + UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]); + UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]); + } else { + ref[LIST_0] = 0; + int16_t* mv = pCurDqLayer->iColocMv[LIST_0][0]; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][0]; + if (colocRefIndexL0 >= 0) { + ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); + } else { + mv = pCurDqLayer->iColocMv[LIST_1][0]; + } + UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]); + + iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8; + iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8; + UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]); + iMvp[LIST_1][0] = iMvp[LIST_0][0] - mv[0]; + iMvp[LIST_1][1] = iMvp[LIST_0][1] - mv[1]; + UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]); + } + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0); + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_1); + } else { + if (bSkipOrDirect) { + int8_t pSubPartCount[4], pPartW[4]; + int8_t pRefIndex[LIST_A][30]; + for (int32_t i = 0; i < 4; i++) { + int16_t iIdx8 = i << 2; + const uint8_t iScan4Idx = g_kuiScan4[iIdx8]; + pCurDqLayer->pSubMbType[iMbXy][i] = subMbType; + + int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0]; + + ref[LIST_1] = 0; + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1); + if (pCurDqLayer->iColocIntra[iScan4Idx]) { + ref[LIST_0] = 0; + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); + ST64 (iMvp, 0); + } else { + ref[LIST_0] = 0; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][iScan4Idx]; + if (colocRefIndexL0 >= 0) { + ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); + } else { + mvColoc = pCurDqLayer->iColocMv[LIST_1]; + } + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); + } + UpdateP8x8DirectCabac (pCurDqLayer, iIdx8); + + pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount; + pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth; + + if (IS_SUB_4x4 (subMbType)) { + pSubPartCount[i] = 4; + pPartW[i] = 1; + } + FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, ref, mvColoc, NULL, NULL); + } + } + } + return ret; +} + +//basic iMVs prediction unit for iMVs partition width (4, 2, 1) +void PredMv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]) { + const uint8_t kuiLeftIdx = g_kuiCache30ScanIdx[iPartIdx] - 1; + const uint8_t kuiTopIdx = g_kuiCache30ScanIdx[iPartIdx] - 6; + const uint8_t kuiRightTopIdx = kuiTopIdx + iPartWidth; + const uint8_t kuiLeftTopIdx = kuiTopIdx - 1; + + const int8_t kiLeftRef = iRefIndex[listIdx][kuiLeftIdx]; + const int8_t kiTopRef = iRefIndex[listIdx][ kuiTopIdx]; + const int8_t kiRightTopRef = iRefIndex[listIdx][kuiRightTopIdx]; + const int8_t kiLeftTopRef = iRefIndex[listIdx][ kuiLeftTopIdx]; + int8_t iDiagonalRef = kiRightTopRef; + + int8_t iMatchRef = 0; + + + int16_t iAMV[2], iBMV[2], iCMV[2]; + + ST32 (iAMV, LD32 (iMotionVector[listIdx][ kuiLeftIdx])); + ST32 (iBMV, LD32 (iMotionVector[listIdx][ kuiTopIdx])); + ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiRightTopIdx])); + + if (REF_NOT_AVAIL == iDiagonalRef) { + iDiagonalRef = kiLeftTopRef; + ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiLeftTopIdx])); + } + + iMatchRef = (iRef == kiLeftRef) + (iRef == kiTopRef) + (iRef == iDiagonalRef); + + if (REF_NOT_AVAIL == kiTopRef && REF_NOT_AVAIL == iDiagonalRef && kiLeftRef >= REF_NOT_IN_LIST) { + ST32 (iMVP, LD32 (iAMV)); + return; + } + + if (1 == iMatchRef) { + if (iRef == kiLeftRef) { + ST32 (iMVP, LD32 (iAMV)); + } else if (iRef == kiTopRef) { + ST32 (iMVP, LD32 (iBMV)); + } else { + ST32 (iMVP, LD32 (iCMV)); + } + } else { + iMVP[0] = WelsMedian (iAMV[0], iBMV[0], iCMV[0]); + iMVP[1] = WelsMedian (iAMV[1], iBMV[1], iCMV[1]); + } +} +void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) { + if (0 == iPartIdx) { + const int8_t kiLeftRef = iRefIndex[listIdx][6]; + if (iRef == kiLeftRef) { + ST32 (iMVP, LD32 (&iMotionVector[listIdx][6][0])); + return; + } + } else { // 1 == iPartIdx + int8_t iDiagonalRef = iRefIndex[listIdx][5]; //top-right + int8_t index = 5; + if (REF_NOT_AVAIL == iDiagonalRef) { + iDiagonalRef = iRefIndex[listIdx][2]; //top-left for 8*8 block(index 1) + index = 2; + } + if (iRef == iDiagonalRef) { + ST32 (iMVP, LD32 (&iMotionVector[listIdx][index][0])); + return; + } + } + + PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 2, iRef, iMVP); +} +void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) { + if (0 == iPartIdx) { + const int8_t kiTopRef = iRefIndex[listIdx][1]; + if (iRef == kiTopRef) { + ST32 (iMVP, LD32 (&iMotionVector[listIdx][1][0])); + return; + } + } else { // 8 == iPartIdx + const int8_t kiLeftRef = iRefIndex[listIdx][18]; + if (iRef == kiLeftRef) { + ST32 (iMVP, LD32 (&iMotionVector[listIdx][18][0])); + return; + } + } + + PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 4, iRef, iMVP); +} + +//update iMVs and iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive) +/* can be further optimized */ +void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef, int16_t iMVs[2]) { + const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef; + const int32_t kiMV32 = LD32 (iMVs); + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + for (i = 0; i < 16; i += 4) { + //mb + const uint8_t kuiScan4Idx = g_kuiScan4[i]; + const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; + if (pCurDqLayer->pDec != NULL) { + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } + } +} + +//update iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive) +/* can be further optimized */ +void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef) { + const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef; + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + for (i = 0; i < 16; i += 4) { + //mb + const uint8_t kuiScan4Idx = g_kuiScan4[i]; + const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; + + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + } +} + +//update iMVs only cache for current MB, only for P_16*16 (SKIP inclusive) +/* can be further optimized */ +void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs[2]) { + const int32_t kiMV32 = LD32 (iMVs); + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + for (i = 0; i < 16; i += 4) { + //mb + const uint8_t kuiScan4Idx = g_kuiScan4[i]; + const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; + if (pCurDqLayer->pDec != NULL) { + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } + } +} + +//update iRefIndex and iMVs of Mb, only for P16x8 +/*need further optimization, mb_cache not work */ +void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A], + int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) { + const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef; + const int32_t kiMV32 = LD32 (iMVs); + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + for (i = 0; i < 2; i++, iPartIdx += 4) { + const uint8_t kuiScan4Idx = g_kuiScan4[iPartIdx]; + const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx; + + //mb + if (pCurDqLayer->pDec != NULL) { + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } + //cache + ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2); + ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2); + ST32 (iMotionVector[listIdx][ kuiCacheIdx ], kiMV32); + ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32); + ST32 (iMotionVector[listIdx][ kuiCacheIdxPlus6], kiMV32); + ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32); + } +} +//update iRefIndex and iMVs of both Mb and Mb_cache, only for P8x16 +void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A], + int8_t iRefIndex[LIST_A][30], + int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) { + const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef; + const int32_t kiMV32 = LD32 (iMVs); + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + for (i = 0; i < 2; i++, iPartIdx += 8) { + const uint8_t kuiScan4Idx = g_kuiScan4[iPartIdx]; + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; + const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx; + + //mb + if (pCurDqLayer->pDec != NULL) { + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } + //cache + ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2); + ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2); + ST32 (iMotionVector[listIdx][ kuiCacheIdx ], kiMV32); + ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32); + ST32 (iMotionVector[listIdx][ kuiCacheIdxPlus6], kiMV32); + ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32); + } +} + +void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW, + const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A], + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + for (int32_t j = 0; j < iPartCount; j++) { + int8_t iPartIdx = iIdx8 + j * iPartW; + uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + uint8_t iColocIdx = g_kuiScan4[iPartIdx]; + uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + int16_t pMV[4] = { 0 }; + if (IS_SUB_8x8 (subMbType)) { + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0]; + ST32 ((pMV + 2), LD32 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_0][iCacheIdx], 0); + ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); + } + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1]; + ST32 ((pMV + 2), LD32 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_1][iCacheIdx], 0); + ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); + } + } else { //SUB_4x4 + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0]; + ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV)); + ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV)); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_0][iCacheIdx], 0); + } + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1]; + ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV)); + ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV)); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_1][iCacheIdx], 0); + } + } + if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) { + uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef && + (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0 + && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0)); + const int16_t (*mvColoc)[2] = 0 == pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurDqLayer->iColocMv[LIST_0] : + pCurDqLayer->iColocMv[LIST_1]; + const int16_t* mv = mvColoc[iColocIdx]; + if (IS_SUB_8x8 (subMbType)) { + if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { + if (iRef[LIST_0] == 0) { + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_0][iCacheIdx], 0); + ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_0][iCacheIdx], 0); + ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); + } + } + + if (iRef[LIST_1] == 0) { + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_1][iCacheIdx], 0); + ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_1][iCacheIdx], 0); + ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); + } + } + } + } else { + if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { + if (iRef[LIST_0] == 0) { + ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0); + ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_0][iCacheIdx], 0); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_0][iCacheIdx], 0); + } + } + if (iRef[LIST_1] == 0) { + ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0); + ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_1][iCacheIdx], 0); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_1][iCacheIdx], 0); + } + } + } + } + } + } +} + +void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, + const int8_t& iPartW, + const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], int16_t pMotionVector[LIST_A][30][MV_A], + int16_t pMvdCache[LIST_A][30][MV_A]) { + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + for (int32_t j = 0; j < iPartCount; j++) { + int8_t iPartIdx = iIdx8 + j * iPartW; + uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + uint8_t iColocIdx = g_kuiScan4[iPartIdx]; + uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + int16_t* mv = mvColoc[iColocIdx]; + + int16_t pMV[4] = { 0 }; + if (IS_SUB_8x8 (subMbType)) { + if (!pCurDqLayer->iColocIntra[iColocIdx]) { + pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8; + pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8; + } + ST32 (pMV, LD32 (pMvDirect[LIST_0])); + ST32 ((pMV + 2), LD32 (pMvDirect[LIST_0])); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_0][iCacheIdx], 0); + ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); + } + if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) { + pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0]; + pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1]; + } + ST32 (pMV, LD32 (pMvDirect[LIST_1])); + ST32 ((pMV + 2), LD32 (pMvDirect[LIST_1])); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_1][iCacheIdx], 0); + ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); + } + } else { //SUB_4x4 + if (!pCurDqLayer->iColocIntra[iColocIdx]) { + pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8; + pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8; + } + ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_0])); + ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMvDirect[LIST_0])); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_0][iCacheIdx], 0); + } + if (!pCurDqLayer->iColocIntra[iColocIdx]) { + pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0]; + pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1]; + } + ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_1])); + ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMvDirect[LIST_1])); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_1][iCacheIdx], 0); + } + } + } +} +int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0, + const int32_t& ref0Count) { //ISO/IEC 14496-10:2009(E) (8-193) + //When reference is lost, this function must be skipped. + if ((pCtx->iErrorCode & dsRefLost) == dsRefLost) { + return 0; + } + PPicture pic1 = pCtx->sRefPic.pRefList[LIST_1][0]; + if (pic1 && pic1->pRefPic[LIST_0][colocRefIndexL0]) { + const int32_t iFramePoc = pic1->pRefPic[LIST_0][colocRefIndexL0]->iFramePoc; + for (int32_t i = 0; i < ref0Count; i++) { + if (pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc == iFramePoc) { + return i; + } + } + } + return 0; +} +void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 1] = + pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + + 5] = iRef; + +} +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp new file mode 100644 index 000000000..177a5e82a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp @@ -0,0 +1,1565 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * parse_mb_syn_cabac.cpp: cabac parse for syntax elements + */ +#include "parse_mb_syn_cabac.h" +#include "decode_slice.h" +#include "mv_pred.h" +#include "error_code.h" +#include + +namespace WelsDec { +#define IDX_UNUSED -1 + +static const int16_t g_kMaxPos [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 63, 3, 3, 14, 14}; +static const int16_t g_kMaxC2 [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4}; +static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 0, 12, 12, 16, 16}; +static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47}; +static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47}; +static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0, 10, 20, 30, 39, 0, 30, 30, 39, 39}; +static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0, 10, 20, 30, 39, 0, 30, 30, 39, 39}; + +const uint8_t g_kTopBlkInsideMb[24] = { //for index with z-order 0~23 + // 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8] + 0, 0, 1, 1, // 2 3 | 6 7 0 | 1 0 1 2 3 + 0, 0, 1, 1, //--------------- --------- 4 5 6 7 + 1, 1, 1, 1, // 8 9 | 12 13 2 | 3 8 9 10 11 + 1, 1, 1, 1, // 10 11 | 14 15-----------------------------> 12 13 14 15 + 0, 0, 1, 1, //---------------- chroma 8*8 block 16 17 18 19 + 0, 0, 1, 1 // 16 17 | 20 21 0 1 20 21 22 23 + // 18 19 | 22 23 +}; + +const uint8_t g_kLeftBlkInsideMb[24] = { //for index with z-order 0~23 + // 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8] + 0, 1, 0, 1, // 2 3 | 6 7 0 | 1 0 1 2 3 + 1, 1, 1, 1, //--------------- --------- 4 5 6 7 + 0, 1, 0, 1, // 8 9 | 12 13 2 | 3 8 9 10 11 + 1, 1, 1, 1, // 10 11 | 14 15-----------------------------> 12 13 14 15 + 0, 1, 0, 1, //---------------- chroma 8*8 block 16 17 18 19 + 0, 1, 0, 1 // 16 17 | 20 21 0 1 20 21 22 23 + // 18 19 | 22 23 +}; + +static uint32_t DecodeCabacIntraMbType (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int ctx_base) { + uint32_t uiCode; + uint32_t uiMbType = 0; + + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + ctx_base; + + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode)); + if (!uiCode) { + return 0; /* I4x4 */ + } + + WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode)); + if (uiCode) { + return 25; /* PCM */ + } + uiMbType = 1; /* I16x16 */ + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode)); /* cbp_luma != 0 */ + uiMbType += 12 * uiCode; + + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode)); + if (uiCode) { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode)); + uiMbType += 4 + 4 * uiCode; + } + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + uiMbType += 2 * uiCode; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + uiMbType += 1 * uiCode; + return uiMbType; +} + +void UpdateP16x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef, + const int8_t iListIdx) { + uint32_t iRef32Bit = (uint32_t) iRef; + const int32_t iRef4Bytes = (iRef32Bit << 24) | (iRef32Bit << 16) | (iRef32Bit << 8) | iRef32Bit; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + const uint8_t iScan4Idx4 = 4 + iScan4Idx; + const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + const uint8_t iCacheIdx6 = 6 + iCacheIdx; + //mb + ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes); + ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes); + //cache + ST32 (&pRefIndex[iListIdx][iCacheIdx ], iRef4Bytes); + ST32 (&pRefIndex[iListIdx][iCacheIdx6], iRef4Bytes); +} + +void UpdateP8x16RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef, + const int8_t iListIdx) { + uint16_t iRef16Bit = (uint16_t) iRef; + const int16_t iRef2Bytes = (iRef16Bit << 8) | iRef16Bit; + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + for (i = 0; i < 2; i++, iPartIdx += 8) { + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + const uint8_t iScan4Idx4 = 4 + iScan4Idx; + const uint8_t iCacheIdx6 = 6 + iCacheIdx; + //mb + ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes); + ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes); + //cache + ST16 (&pRefIndex[iListIdx][iCacheIdx ], iRef2Bytes); + ST16 (&pRefIndex[iListIdx][iCacheIdx6], iRef2Bytes); + } +} + +void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef, + const int8_t iListIdx) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] + = + pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + + 5] = iRef; +} + +void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + pCurDqLayer->pDirect[iMbXy][iScan4Idx] = pCurDqLayer->pDirect[iMbXy][iScan4Idx + 1] = + pCurDqLayer->pDirect[iMbXy][iScan4Idx + 4] = pCurDqLayer->pDirect[iMbXy][iScan4Idx + 5] = 1; +} + +void UpdateP16x16DirectCabac (PDqLayer pCurDqLayer) { + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + const int16_t direct = (1 << 8) | 1; + for (i = 0; i < 16; i += 4) { + const uint8_t kuiScan4Idx = g_kuiScan4[i]; + const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; + ST16 (&pCurDqLayer->pDirect[iMbXy][kuiScan4Idx], direct); + ST16 (&pCurDqLayer->pDirect[iMbXy][kuiScan4IdxPlus4], direct); + } +} + +void UpdateP16x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvd[2], const int8_t iListIdx) { + int32_t pMvd32[2]; + ST32 (&pMvd32[0], LD32 (pMvd)); + ST32 (&pMvd32[1], LD32 (pMvd)); + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + for (i = 0; i < 16; i += 2) { + ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][i], LD64 (pMvd32)); + } +} + +void UpdateP16x8MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2], + const int8_t iListIdx) { + int32_t pMvd32[2]; + ST32 (&pMvd32[0], LD32 (pMvd)); + ST32 (&pMvd32[1], LD32 (pMvd)); + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + for (i = 0; i < 2; i++, iPartIdx += 4) { + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + const uint8_t iScan4Idx4 = 4 + iScan4Idx; + const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + const uint8_t iCacheIdx6 = 6 + iCacheIdx; + //mb + ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][ iScan4Idx ], LD64 (pMvd32)); + ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][ iScan4Idx4], LD64 (pMvd32)); + //cache + ST64 (pMvdCache[iListIdx][ iCacheIdx ], LD64 (pMvd32)); + ST64 (pMvdCache[iListIdx][ iCacheIdx6], LD64 (pMvd32)); + } +} + +void UpdateP8x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2], + const int8_t iListIdx) { + int32_t pMvd32[2]; + ST32 (&pMvd32[0], LD32 (pMvd)); + ST32 (&pMvd32[1], LD32 (pMvd)); + int32_t i; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + for (i = 0; i < 2; i++, iPartIdx += 8) { + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + const uint8_t iScan4Idx4 = 4 + iScan4Idx; + const uint8_t iCacheIdx6 = 6 + iCacheIdx; + //mb + ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][ iScan4Idx ], LD64 (pMvd32)); + ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][ iScan4Idx4], LD64 (pMvd32)); + //cache + ST64 (pMvdCache[iListIdx][ iCacheIdx ], LD64 (pMvd32)); + ST64 (pMvdCache[iListIdx][ iCacheIdx6], LD64 (pMvd32)); + } +} + +int32_t ParseEndOfSliceCabac (PWelsDecoderContext pCtx, uint32_t& uiBinVal) { + uiBinVal = 0; + WELS_READ_VERIFY (DecodeTerminateCabac (pCtx->pCabacDecEngine, uiBinVal)); + return ERR_NONE; +} + +int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip) { + uiSkip = 0; + int32_t iCtxInc = NEW_CTX_OFFSET_SKIP; + iCtxInc += (pNeighAvail->iLeftAvail && !IS_SKIP (pNeighAvail->iLeftType)) + (pNeighAvail->iTopAvail + && !IS_SKIP (pNeighAvail->iTopType)); + if (B_SLICE == pCtx->eSliceType) + iCtxInc += 13; + PWelsCabacCtx pBinCtx = (pCtx->pCabacCtx + iCtxInc); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx, uiSkip)); + return ERR_NONE; +} + + +int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal) { + uint32_t uiCode; + int32_t iIdxA = 0, iIdxB = 0; + int32_t iCtxInc; + uiBinVal = 0; + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MB_TYPE_I; //I mode in I slice + iIdxA = (pNeighAvail->iLeftAvail) && (pNeighAvail->iLeftType != MB_TYPE_INTRA4x4 + && pNeighAvail->iLeftType != MB_TYPE_INTRA8x8); + iIdxB = (pNeighAvail->iTopAvail) && (pNeighAvail->iTopType != MB_TYPE_INTRA4x4 + && pNeighAvail->iTopType != MB_TYPE_INTRA8x8); + iCtxInc = iIdxA + iIdxB; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode)); + uiBinVal = uiCode; + if (uiBinVal != 0) { //I16x16 + WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode)); + if (uiCode == 1) + uiBinVal = 25; //I_PCM + else { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + uiBinVal = 1 + uiCode * 12; + //decoding of uiCbp:0,1,2 + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode)); + if (uiCode != 0) { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode)); + uiBinVal += 4; + if (uiCode != 0) + uiBinVal += 4; + } + //decoding of I pred-mode: 0,1,2,3 + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode)); + uiBinVal += (uiCode << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode)); + uiBinVal += uiCode; + } + } + //I4x4 + return ERR_NONE; +} + +int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiMbType) { + uint32_t uiCode; + uiMbType = 0; + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SKIP; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + if (uiCode) { + // Intra MB + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode)); + if (uiCode) { // Intra 16x16 + WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode)); + if (uiCode) { + uiMbType = 30; + return ERR_NONE;//MB_TYPE_INTRA_PCM; + } + + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode)); + uiMbType = 6 + uiCode * 12; + + //uiCbp: 0,1,2 + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode)); + if (uiCode) { + uiMbType += 4; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode)); + if (uiCode) + uiMbType += 4; + } + + //IPredMode: 0,1,2,3 + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode)); + uiMbType += (uiCode << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode)); + uiMbType += uiCode; + } else + // Intra 4x4 + uiMbType = 5; + } else { // P MB + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode)); + if (uiCode) { //second bit + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode)); + if (uiCode) + uiMbType = 1; + else + uiMbType = 2; + } else { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode)); + if (uiCode) + uiMbType = 3; + else + uiMbType = 0; + } + } + return ERR_NONE; +} + +int32_t ParseMBTypeBSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiMbType) { + uint32_t uiCode; + uiMbType = 0; + int32_t iIdxA = 0, iIdxB = 0; + int32_t iCtxInc; + + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + 27; //B slice + + iIdxA = (pNeighAvail->iLeftAvail) && !IS_DIRECT (pNeighAvail->iLeftType); + iIdxB = (pNeighAvail->iTopAvail) && !IS_DIRECT (pNeighAvail->iTopType); + + iCtxInc = iIdxA + iIdxB; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode)); + if (!uiCode) + uiMbType = 0; // Bi_Direct + else { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + if (!uiCode) { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode)); + uiMbType = 1 + uiCode; // 16x16 L0L1 + } else { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode)); + uiMbType = uiCode << 3; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode)); + uiMbType |= uiCode << 2; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode)); + uiMbType |= uiCode << 1; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode)); + uiMbType |= uiCode; + if (uiMbType < 8) { + uiMbType += 3; + return ERR_NONE; + } else if (uiMbType == 13) { + uiMbType = DecodeCabacIntraMbType (pCtx, pNeighAvail, 32) + 23; + return ERR_NONE; + } else if (uiMbType == 14) { + uiMbType = 11; // Bi8x16 + return ERR_NONE; + } else if (uiMbType == 15) { + uiMbType = 22; // 8x8 + return ERR_NONE; + } + uiMbType <<= 1; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode)); + uiMbType |= uiCode; + uiMbType -= 4; + } + } + return ERR_NONE; +} + +int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, + bool& bTransformSize8x8Flag) { + uint32_t uiCode; + int32_t iIdxA, iIdxB; + int32_t iCtxInc; + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_TS_8x8_FLAG; + iIdxA = (pNeighAvail->iLeftAvail) && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - 1]); + iIdxB = (pNeighAvail->iTopAvail) + && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - pCtx->pCurDqLayer->iMbWidth]); + iCtxInc = iIdxA + iIdxB; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode)); + bTransformSize8x8Flag = !!uiCode; + + return ERR_NONE; +} + +int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) { + uint32_t uiCode; + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SUBMB_TYPE; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode)); + if (uiCode) + uiSubMbType = 0; + else { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode)); + if (uiCode) { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode)); + uiSubMbType = 3 - uiCode; + } else { + uiSubMbType = 1; + } + } + return ERR_NONE; +} + +int32_t ParseBSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) { + uint32_t uiCode; + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_B_SUBMB_TYPE; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode)); + if (!uiCode) { + uiSubMbType = 0; /* B_Direct_8x8 */ + return ERR_NONE; + } + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode)); + if (!uiCode) { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + uiSubMbType = 1 + uiCode; /* B_L0_8x8, B_L1_8x8 */ + return ERR_NONE; + } + uiSubMbType = 3; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode)); + if (uiCode) { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + if (uiCode) { + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + uiSubMbType = 11 + uiCode; /* B_L1_4x4, B_Bi_4x4 */ + return ERR_NONE; + } + uiSubMbType += 4; + } + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + uiSubMbType += 2 * uiCode; + WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode)); + uiSubMbType += uiCode; + + return ERR_NONE; +} + +int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal) { + uint32_t uiCode; + iBinVal = 0; + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR, uiCode)); + if (uiCode == 1) + iBinVal = -1; + else { + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode)); + iBinVal |= uiCode; + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode)); + iBinVal |= (uiCode << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode)); + iBinVal |= (uiCode << 2); + } + return ERR_NONE; +} + +int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal) { + uint32_t uiCode; + int32_t iIdxA, iIdxB, iCtxInc; + int8_t* pChromaPredMode = pCtx->pCurDqLayer->pChromaPredMode; + uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType; + int32_t iLeftAvail = uiNeighAvail & 0x04; + int32_t iTopAvail = uiNeighAvail & 0x01; + + int32_t iMbXy = pCtx->pCurDqLayer->iMbXyIndex; + int32_t iMbXyTop = iMbXy - pCtx->pCurDqLayer->iMbWidth; + int32_t iMbXyLeft = iMbXy - 1; + + iBinVal = 0; + + iIdxB = iTopAvail && (pChromaPredMode[iMbXyTop] > 0 && pChromaPredMode[iMbXyTop] <= 3) + && pMbType[iMbXyTop] != MB_TYPE_INTRA_PCM; + iIdxA = iLeftAvail && (pChromaPredMode[iMbXyLeft] > 0 && pChromaPredMode[iMbXyLeft] <= 3) + && pMbType[iMbXyLeft] != MB_TYPE_INTRA_PCM; + iCtxInc = iIdxA + iIdxB; + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + iCtxInc, uiCode)); + iBinVal = uiCode; + if (iBinVal != 0) { + uint32_t iSym; + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, iSym)); + if (iSym == 0) { + iBinVal = (iSym + 1); + return ERR_NONE; + } + iSym = 0; + do { + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, uiCode)); + ++iSym; + } while ((uiCode != 0) && (iSym < 1)); + + if ((uiCode != 0) && (iSym == 1)) + ++ iSym; + iBinVal = (iSym + 1); + return ERR_NONE; + } + return ERR_NONE; +} + +int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]) { + PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0]; + int32_t pRefCount[2]; + int32_t i, j; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int16_t pMv[4] = {0}; + int16_t pMvd[4] = {0}; + int8_t iRef[2] = {0}; + int32_t iPartIdx; + int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv; + int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv; + pRefCount[0] = pSliceHeader->uiRefCount[0]; + pRefCount[1] = pSliceHeader->uiRefCount[1]; + + bool bIsPending = GetThreadCount (pCtx) > 1; + + switch (pCurDqLayer->pDec->pMbType[iMbXy]) { + case MB_TYPE_16x16: { + iPartIdx = 0; + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0, + iRef[0])); + if ((iRef[0] < 0) || (iRef[0] >= pRefCount[0]) || (ppRefPic[iRef[0]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRef[0] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[0]] + && (ppRefPic[iRef[0]]->bIsComplete || bIsPending)); + PredMv (pMotionVector, pRefIndex, LIST_0, 0, 4, iRef[0], pMv); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + UpdateP16x16MotionInfo (pCurDqLayer, LIST_0, iRef[0], pMv); + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0); + } + break; + case MB_TYPE_16x8: + for (i = 0; i < 2; i++) { + iPartIdx = i << 3; + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0, + iRef[i])); + if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRef[i] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]] + && (ppRefPic[iRef[i]]->bIsComplete || bIsPending)); + UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0); + } + for (i = 0; i < 2; i++) { + iPartIdx = i << 3; + PredInter16x8Mv (pMotionVector, pRefIndex, LIST_0, iPartIdx, iRef[i], pMv); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + UpdateP16x8MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, LIST_0, iPartIdx, iRef[i], pMv); + UpdateP16x8MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0); + } + break; + case MB_TYPE_8x16: + for (i = 0; i < 2; i++) { + iPartIdx = i << 2; + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0, + iRef[i])); + if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRef[i] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]] + && (ppRefPic[iRef[i]]->bIsComplete || bIsPending)); + UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0); + } + for (i = 0; i < 2; i++) { + iPartIdx = i << 2; + PredInter8x16Mv (pMotionVector, pRefIndex, LIST_0, i << 2, iRef[i], pMv/*&mv[0], &mv[1]*/); + + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + UpdateP8x16MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, LIST_0, iPartIdx, iRef[i], pMv); + UpdateP8x16MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0); + } + break; + case MB_TYPE_8x8: + case MB_TYPE_8x8_REF0: { + int8_t pRefIdx[4] = {0}, pSubPartCount[4], pPartW[4]; + uint32_t uiSubMbType; + //sub_mb_type, partition + for (i = 0; i < 4; i++) { + WELS_READ_VERIFY (ParseSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType)); + if (uiSubMbType >= 4) { //invalid sub_mb_type + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE); + } + pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iType; + pSubPartCount[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartCount; + pPartW[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartWidth; + + // Need modification when B picture add in, reference to 7.3.5 + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0); + } + + for (i = 0; i < 4; i++) { + int16_t iIdx8 = i << 2; + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iIdx8, pRefCount[0], 1, + pRefIdx[i])); + if ((pRefIdx[i] < 0) || (pRefIdx[i] >= pRefCount[0]) || (ppRefPic[pRefIdx[i]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + pRefIdx[i] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[pRefIdx[i]] + && (ppRefPic[pRefIdx[i]]->bIsComplete || bIsPending)); + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, pRefIdx[i], LIST_0); + } + //mv + for (i = 0; i < 4; i++) { + int8_t iPartCount = pSubPartCount[i]; + uiSubMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + int16_t iPartIdx, iBlockW = pPartW[i]; + uint8_t iScan4Idx, iCacheIdx; + iCacheIdx = g_kuiCache30ScanIdx[i << 2]; + pRefIndex[0][iCacheIdx ] = pRefIndex[0][iCacheIdx + 1] + = pRefIndex[0][iCacheIdx + 6] = pRefIndex[0][iCacheIdx + 7] = pRefIdx[i]; + + for (j = 0; j < iPartCount; j++) { + iPartIdx = (i << 2) + j * iBlockW; + iScan4Idx = g_kuiScan4[iPartIdx]; + iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + PredMv (pMotionVector, pRefIndex, LIST_0, iPartIdx, iBlockW, pRefIdx[i], pMv); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + if (SUB_MB_TYPE_8x8 == uiSubMbType) { + ST32 ((pMv + 2), LD32 (pMv)); + ST32 ((pMvd + 2), LD32 (pMvd)); + ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv)); + ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx], LD64 (pMvd)); + ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD64 (pMvd)); + ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv)); + ST64 (pMotionVector[0][iCacheIdx + 6], LD64 (pMv)); + ST64 (pMvdCache[0][iCacheIdx ], LD64 (pMvd)); + ST64 (pMvdCache[0][iCacheIdx + 6], LD64 (pMvd)); + } else if (SUB_MB_TYPE_8x4 == uiSubMbType) { + ST32 ((pMv + 2), LD32 (pMv)); + ST32 ((pMvd + 2), LD32 (pMvd)); + ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD64 (pMv)); + ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD64 (pMvd)); + ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv)); + ST64 (pMvdCache[0][iCacheIdx ], LD64 (pMvd)); + } else if (SUB_MB_TYPE_4x8 == uiSubMbType) { + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv)); + ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd)); + ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD32 (pMvd)); + ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv)); + ST32 (pMotionVector[0][iCacheIdx + 6], LD32 (pMv)); + ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd)); + ST32 (pMvdCache[0][iCacheIdx + 6], LD32 (pMvd)); + } else { //SUB_MB_TYPE_4x4 + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv)); + ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd)); + ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv)); + ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd)); + } + } + } + } + break; + default: + break; + } + return ERR_NONE; +} + +int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30], + int8_t pDirect[30]) { + PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t pRefCount[LIST_A]; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int16_t pMv[4] = { 0 }; + int16_t pMvd[4] = { 0 }; + int8_t iRef[LIST_A] = { 0 }; + int32_t iPartIdx; + int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv; + int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv; + pRefCount[0] = pSliceHeader->uiRefCount[0]; + pRefCount[1] = pSliceHeader->uiRefCount[1]; + + MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy]; + + bool bIsPending = GetThreadCount (pCtx) > 1; + + if (IS_DIRECT (mbType)) { + + int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + SubMbType subMbType; + if (pSliceHeader->iDirectSpatialMvPredFlag) { + //predict direct spatial mv + int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } else { + //temporal direct 16x16 mode + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } + } else if (IS_INTER_16x16 (mbType)) { + iPartIdx = 0; + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + iRef[listIdx] = REF_NOT_IN_LIST; + if (IS_DIR (mbType, 0, listIdx)) { + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iPartIdx, + pRefCount[listIdx], 0, + iRef[listIdx])); + if ((iRef[listIdx] < 0) || (iRef[listIdx] >= pRefCount[listIdx]) + || (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRef[listIdx] = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]] + && (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete || bIsPending)); + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (mbType, 0, listIdx)) { + PredMv (pMotionVector, pRefIndex, listIdx, 0, 4, iRef[listIdx], pMv); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)pMv = * (uint32_t*)pMvd = 0; + } + UpdateP16x16MotionInfo (pCurDqLayer, listIdx, iRef[listIdx], pMv); + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx); + } + } else if (IS_INTER_16x8 (mbType)) { + int8_t ref_idx_list[LIST_A][2] = { {REF_NOT_IN_LIST, REF_NOT_IN_LIST}, { REF_NOT_IN_LIST, REF_NOT_IN_LIST } }; + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + iPartIdx = i << 3; + int8_t ref_idx = REF_NOT_IN_LIST; + if (IS_DIR (mbType, i, listIdx)) { + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iPartIdx, + pRefCount[listIdx], 0, ref_idx)); + if ((ref_idx < 0) || (ref_idx >= pRefCount[listIdx]) + || (pCtx->sRefPic.pRefList[listIdx][ref_idx] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + ref_idx = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][ref_idx]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx] + && (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending)); + } + UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx); + ref_idx_list[listIdx][i] = ref_idx; + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + iPartIdx = i << 3; + int8_t ref_idx = ref_idx_list[listIdx][i]; + if (IS_DIR (mbType, i, listIdx)) { + PredInter16x8Mv (pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)pMv = * (uint32_t*)pMvd = 0; + } + UpdateP16x8MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv); + UpdateP16x8MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, listIdx); + } + } + } else if (IS_INTER_8x16 (mbType)) { + int8_t ref_idx_list[LIST_A][2] = { { REF_NOT_IN_LIST, REF_NOT_IN_LIST }, { REF_NOT_IN_LIST, REF_NOT_IN_LIST } }; + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + iPartIdx = i << 2; + int8_t ref_idx = REF_NOT_IN_LIST; + if (IS_DIR (mbType, i, listIdx)) { + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iPartIdx, + pRefCount[listIdx], 0, ref_idx)); + if ((ref_idx < 0) || (ref_idx >= pRefCount[listIdx]) + || (pCtx->sRefPic.pRefList[listIdx][ref_idx] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + ref_idx = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][ref_idx]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx] + && (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending)); + } + UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx); + ref_idx_list[listIdx][i] = ref_idx; + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + iPartIdx = i << 2; + int8_t ref_idx = ref_idx_list[listIdx][i]; + if (IS_DIR (mbType, i, listIdx)) { + PredInter8x16Mv (pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)pMv = * (uint32_t*)pMvd = 0; + } + UpdateP8x16MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv); + UpdateP8x16MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, listIdx); + } + } + } else if (IS_Inter_8x8 (mbType)) { + int8_t pSubPartCount[4], pPartW[4]; + uint32_t uiSubMbType; + //sub_mb_type, partition + int16_t pMvDirect[LIST_A][2] = { {0, 0}, {0, 0} }; + if (pCtx->sRefPic.pRefList[LIST_1][0] == NULL) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef; + const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]); + bool has_direct_called = false; + SubMbType directSubMbType = 0; + for (int32_t i = 0; i < 4; i++) { + WELS_READ_VERIFY (ParseBSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType)); + if (uiSubMbType >= 13) { //invalid sub_mb_type + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE); + } +// pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType; + pSubPartCount[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartCount; + pPartW[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartWidth; + + // Need modification when B picture add in, reference to 7.3.5 + if (pSubPartCount[i] > 1) + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = false; + + if (IS_DIRECT (g_ksInterBSubMbTypeInfo[uiSubMbType].iType)) { + if (!has_direct_called) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, directSubMbType); + if (ret != ERR_NONE) { + return ret; + } + + } else { + //temporal direct mode + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, directSubMbType); + if (ret != ERR_NONE) { + return ret; + } + } + has_direct_called = true; + } + pCurDqLayer->pSubMbType[iMbXy][i] = directSubMbType; + if (IS_SUB_4x4 (pCurDqLayer->pSubMbType[iMbXy][i])) { + pSubPartCount[i] = 4; + pPartW[i] = 1; + } + } else { + pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType; + } + } + for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv + int16_t iIdx8 = i << 2; + if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, bIsLongRef, pMvDirect, iRef, + pMotionVector, pMvdCache); + } else { + int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0]; + iRef[LIST_1] = 0; + iRef[LIST_0] = 0; + const uint8_t uiColoc4Idx = g_kuiScan4[iIdx8]; + if (!pCurDqLayer->iColocIntra[uiColoc4Idx]) { + iRef[LIST_0] = 0; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][uiColoc4Idx]; + if (colocRefIndexL0 >= 0) { + iRef[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); + } else { + mvColoc = pCurDqLayer->iColocMv[LIST_1]; + } + } + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_0, iRef[LIST_0]); + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_1, iRef[LIST_1]); + UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, LIST_0, iRef[LIST_0]); + UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, LIST_1, iRef[LIST_1]); + FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, iRef, mvColoc, pMotionVector, + pMvdCache); + } + } + } + //ref no-direct + int8_t ref_idx_list[LIST_A][4] = { {REF_NOT_IN_LIST, REF_NOT_IN_LIST}, { REF_NOT_IN_LIST, REF_NOT_IN_LIST } }; + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + int16_t iIdx8 = i << 2; + int32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + int8_t iref = REF_NOT_IN_LIST; + if (IS_DIRECT (subMbType)) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iRef[listIdx]); + ref_idx_list[listIdx][i] = iRef[listIdx]; + } + UpdateP8x8DirectCabac (pCurDqLayer, iIdx8); + } else { + if (IS_DIR (subMbType, 0, listIdx)) { + WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iIdx8, + pRefCount[listIdx], 1, + iref)); + if ((iref < 0) || (iref >= pRefCount[listIdx]) || (pCtx->sRefPic.pRefList[listIdx][iref] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iref = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][iref]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iref] + && (pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete || bIsPending)); + } + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iref); + ref_idx_list[listIdx][i] = iref; + } + } + } + //mv + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + int16_t iIdx8 = i << 2; + + uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + if (IS_DIRECT (subMbType) && !pSliceHeader->iDirectSpatialMvPredFlag) + continue; + + int8_t iref = ref_idx_list[listIdx][i]; + UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, listIdx, iref); + + if (IS_DIRECT (subMbType)) + continue; + + bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0; + int8_t iPartCount = pSubPartCount[i]; + int16_t iBlockW = pPartW[i]; + uint8_t iScan4Idx, iCacheIdx; + for (int32_t j = 0; j < iPartCount; j++) { + iPartIdx = (i << 2) + j * iBlockW; + iScan4Idx = g_kuiScan4[iPartIdx]; + iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + if (is_dir) { + PredMv (pMotionVector, pRefIndex, listIdx, iPartIdx, iBlockW, iref, pMv); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0])); + WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1])); + pMv[0] += pMvd[0]; + pMv[1] += pMvd[1]; + WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)pMv = * (uint32_t*)pMvd = 0; + } + if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8 + ST32 ((pMv + 2), LD32 (pMv)); + ST32 ((pMvd + 2), LD32 (pMvd)); + ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv)); + ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd)); + ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMvd)); + ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv)); + ST64 (pMotionVector[listIdx][iCacheIdx + 6], LD64 (pMv)); + ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd)); + ST64 (pMvdCache[listIdx][iCacheIdx + 6], LD64 (pMvd)); + } else if (IS_SUB_4x4 (subMbType)) { //MB_TYPE_4x4 + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv)); + ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd)); + ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv)); + ST32 (pMvdCache[listIdx][iCacheIdx], LD32 (pMvd)); + } else if (IS_SUB_4x8 (subMbType)) { //MB_TYPE_4x8 5, 7, 9 + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv)); + ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd)); + ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMvd)); + ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv)); + ST32 (pMotionVector[listIdx][iCacheIdx + 6], LD32 (pMv)); + ST32 (pMvdCache[listIdx][iCacheIdx], LD32 (pMvd)); + ST32 (pMvdCache[listIdx][iCacheIdx + 6], LD32 (pMvd)); + } else { //MB_TYPE_8x4 4, 6, 8 + ST32 ((pMv + 2), LD32 (pMv)); + ST32 ((pMvd + 2), LD32 (pMvd)); + ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv)); + ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd)); + ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv)); + ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd)); + } + } + } + } + } + return ERR_NONE; +} + +int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* nzc, + int8_t ref_idx[LIST_A][30], int8_t direct[30], + int32_t iListIdx, int32_t iZOrderIdx, int32_t iActiveRefNum, int32_t b8mode, int8_t& iRefIdxVal) { + if (iActiveRefNum == 1) { + iRefIdxVal = 0; + return ERR_NONE; + } + uint32_t uiCode; + int32_t iIdxA = 0, iIdxB = 0; + int32_t iCtxInc = 0; + int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pDec->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex]; + int8_t* pDirect = pCtx->pCurDqLayer->pDirect[pCtx->pCurDqLayer->iMbXyIndex]; + if (iZOrderIdx == 0) { + iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM + && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0); + iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM + && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0); + if (pCtx->eSliceType == B_SLICE) { + if (iIdxB > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 6] == 0) { + iCtxInc += 2; + } + if (iIdxA > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 1] == 0) { + iCtxInc++; + } + } + } else if (iZOrderIdx == 4) { + iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM + && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0); + iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0; + if (pCtx->eSliceType == B_SLICE) { + if (iIdxB > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 6] == 0) { + iCtxInc += 2; + } + if (iIdxA > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 1] == 0) { + iCtxInc ++; + } + } + } else if (iZOrderIdx == 8) { + + iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0; + iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM + && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0); + if (pCtx->eSliceType == B_SLICE) { + if (iIdxB > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 4] == 0) { + iCtxInc += 2; + } + if (iIdxA > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 1] == 0) { + iCtxInc++; + } + } + } else { + iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0; + iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0; + if (pCtx->eSliceType == B_SLICE) { + if (iIdxB > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 4] == 0) { + iCtxInc += 2; + } + if (iIdxA > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 1] == 0) { + iCtxInc++; + } + } + } + if (pCtx->eSliceType != B_SLICE) { + iCtxInc = iIdxA + (iIdxB << 1); + } + + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + iCtxInc, uiCode)); + if (uiCode) { + WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + 4, 1, uiCode)); + ++uiCode; + } + iRefIdxVal = (int8_t) uiCode; + return ERR_NONE; +} + +int32_t ParseMvdInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t pRefIndex[LIST_A][30], + int16_t pMvdCache[LIST_A][30][2], int32_t index, int8_t iListIdx, int8_t iMvComp, int16_t& iMvdVal) { + uint32_t uiCode; + int32_t iIdxA = 0; + //int32_t sym; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MVD + iMvComp * CTX_NUM_MVD; + iMvdVal = 0; + + if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 6] >= 0) + iIdxA = WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 6][iMvComp]); + if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 1] >= 0) + iIdxA += WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 1][iMvComp]); + + int32_t iCtxInc = 0; + if (iIdxA >= 3) + iCtxInc = 1 + (iIdxA > 32); + + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx + iCtxInc, uiCode)); + if (uiCode) { + WELS_READ_VERIFY (DecodeUEGMvCabac (pCtx->pCabacDecEngine, pBinCtx + 3, 3, uiCode)); + iMvdVal = (int16_t) (uiCode + 1); + WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode)); + if (uiCode) { + iMvdVal = -iMvdVal; + } + } else { + iMvdVal = 0; + } + return ERR_NONE; +} + +int32_t ParseCbpInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiCbp) { + int32_t iIdxA = 0, iIdxB = 0, pALeftMb[2], pBTopMb[2]; + uiCbp = 0; + uint32_t pCbpBit[6]; + int32_t iCtxInc; + + //Luma: bit by bit for 4 8x8 blocks in z-order + pBTopMb[0] = pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM + && ((pNeighAvail->iTopCbp & (1 << 2)) == 0); + pBTopMb[1] = pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM + && ((pNeighAvail->iTopCbp & (1 << 3)) == 0); + pALeftMb[0] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM + && ((pNeighAvail->iLeftCbp & (1 << 1)) == 0); + pALeftMb[1] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM + && ((pNeighAvail->iLeftCbp & (1 << 3)) == 0); + + //left_top 8x8 block + iCtxInc = pALeftMb[0] + (pBTopMb[0] << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[0])); + if (pCbpBit[0]) + uiCbp += 0x01; + + //right_top 8x8 block + iIdxA = !pCbpBit[0]; + iCtxInc = iIdxA + (pBTopMb[1] << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[1])); + if (pCbpBit[1]) + uiCbp += 0x02; + + //left_bottom 8x8 block + iIdxB = !pCbpBit[0]; + iCtxInc = pALeftMb[1] + (iIdxB << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[2])); + if (pCbpBit[2]) + uiCbp += 0x04; + + //right_bottom 8x8 block + iIdxB = !pCbpBit[1]; + iIdxA = !pCbpBit[2]; + iCtxInc = iIdxA + (iIdxB << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[3])); + if (pCbpBit[3]) + uiCbp += 0x08; + + if (pCtx->pSps->uiChromaFormatIdc == 0)//monochroma + return ERR_NONE; + + + //Chroma: bit by bit + iIdxB = pNeighAvail->iTopAvail && (pNeighAvail->iTopType == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp >> 4)); + iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4)); + + //BitIdx = 0 + iCtxInc = iIdxA + (iIdxB << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + CTX_NUM_CBP + iCtxInc, + pCbpBit[4])); + + //BitIdx = 1 + if (pCbpBit[4]) { + iIdxB = pNeighAvail->iTopAvail && (pNeighAvail->iTopType == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp >> 4) == 2); + iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4) == 2); + iCtxInc = iIdxA + (iIdxB << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, + pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + 2 * CTX_NUM_CBP + iCtxInc, + pCbpBit[5])); + uiCbp += 1 << (4 + pCbpBit[5]); + + } + + return ERR_NONE; +} + +int32_t ParseDeltaQpCabac (PWelsDecoderContext pCtx, int32_t& iQpDelta) { + uint32_t uiCode; + PSlice pCurrSlice = & (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer); + iQpDelta = 0; + PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_DELTA_QP; + int32_t iCtxInc = (pCurrSlice->iLastDeltaQp != 0); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx + iCtxInc, uiCode)); + if (uiCode != 0) { + WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pBinCtx + 2, 1, uiCode)); + uiCode++; + iQpDelta = (uiCode + 1) >> 1; + if ((uiCode & 1) == 0) + iQpDelta = - iQpDelta; + } + pCurrSlice->iLastDeltaQp = iQpDelta; + return ERR_NONE; +} + +int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int32_t iZIndex, int32_t iResProperty, + PWelsDecoderContext pCtx, uint32_t& uiCbfBit) { + int8_t nA, nB/*, zigzag_idx = 0*/; + int32_t iCurrBlkXy = pCtx->pCurDqLayer->iMbXyIndex; + int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring + int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring + uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc; + uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType; + int32_t iCtxInc; + uiCbfBit = 0; + nA = nB = (int8_t)!!IS_INTRA (pMbType[iCurrBlkXy]); + + if (iResProperty == I16_LUMA_DC || iResProperty == CHROMA_DC_U || iResProperty == CHROMA_DC_V) { //DC + if (pNeighAvail->iTopAvail) + nB = (pMbType[iTopBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iTopBlkXy] >> iResProperty) & 1); + if (pNeighAvail->iLeftAvail) + nA = (pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iLeftBlkXy] >> iResProperty) & 1); + iCtxInc = nA + (nB << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, + pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit)); + if (uiCbfBit) + pCbfDc[iCurrBlkXy] |= (1 << iResProperty); + } else { //AC + //for 4x4 blk, make sure blk-idx is correct + if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] != 0xff) { //top blk available + if (g_kTopBlkInsideMb[iZIndex]) + iTopBlkXy = iCurrBlkXy; + nB = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] || pMbType[iTopBlkXy] == MB_TYPE_INTRA_PCM; + } + if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] != 0xff) { //left blk available + if (g_kLeftBlkInsideMb[iZIndex]) + iLeftBlkXy = iCurrBlkXy; + nA = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] || pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM; + } + + iCtxInc = nA + (nB << 1); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, + pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit)); + } + return ERR_NONE; +} + +int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx, + uint32_t& uiCoeffNum) { + uint32_t uiCode; + + PWelsCabacCtx pMapCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_MAP_8x8 : NEW_CTX_OFFSET_MAP) + + g_kBlockCat2CtxOffsetMap [iResProperty]; + PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_LAST_8x8 : + NEW_CTX_OFFSET_LAST) + g_kBlockCat2CtxOffsetLast[iResProperty]; + + + int32_t i; + uiCoeffNum = 0; + int32_t i0 = 0; + int32_t i1 = g_kMaxPos[iResProperty]; + + int32_t iCtx; + + for (i = i0; i < i1; ++i) { + iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxSignificantCoeffFlag8x8[i] : i); + //read significant + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + iCtx, uiCode)); + if (uiCode) { + * (pSignificantMap++) = 1; + ++ uiCoeffNum; + //read last significant + iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxLastSignificantCoeffFlag8x8[i] : i); + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + iCtx, uiCode)); + if (uiCode) { + memset (pSignificantMap, 0, (i1 - i) * sizeof (int32_t)); + return ERR_NONE; + } + } else + * (pSignificantMap++) = 0; + } + + //deal with last pSignificantMap if no data + //if(i < i1+1) + { + *pSignificantMap = 1; + ++uiCoeffNum; + } + + return ERR_NONE; +} + +int32_t ParseSignificantCoeffCabac (int32_t* pSignificant, int32_t iResProperty, PWelsDecoderContext pCtx) { + uint32_t uiCode; + PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ONE_8x8 : NEW_CTX_OFFSET_ONE) + + g_kBlockCat2CtxOffsetOne[iResProperty]; + PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ABS_8x8 : NEW_CTX_OFFSET_ABS) + + g_kBlockCat2CtxOffsetAbs[iResProperty]; + + const int16_t iMaxType = g_kMaxC2[iResProperty]; + int32_t i = g_kMaxPos[iResProperty]; + int32_t* pCoff = pSignificant + i; + int32_t c1 = 1; + int32_t c2 = 0; + for (; i >= 0; --i) { + if (*pCoff != 0) { + WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pOneCtx + c1, uiCode)); + *pCoff += uiCode; + if (*pCoff == 2) { + WELS_READ_VERIFY (DecodeUEGLevelCabac (pCtx->pCabacDecEngine, pAbsCtx + c2, uiCode)); + *pCoff += uiCode; + ++c2; + c2 = WELS_MIN (c2, iMaxType); + c1 = 0; + } else if (c1) { + ++c1; + c1 = WELS_MIN (c1, 4); + } + WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode)); + if (uiCode) + *pCoff = - *pCoff; + } + pCoff--; + } + return ERR_NONE; +} + +int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux, + int32_t iIndex, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, + short* sTCoeff, /*int mb_mode*/ uint8_t uiQp, PWelsDecoderContext pCtx) { + uint32_t uiTotalCoeffNum = 0; + uint32_t uiCbpBit; + int32_t pSignificantMap[64] = {0}; + + int32_t iMbResProperty = 0; + GetMbResProperty (&iMbResProperty, &iResProperty, false); + const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] : + g_kuiDequantCoeff8x8[uiQp]; + + uiCbpBit = 1; // for 8x8, MaxNumCoeff == 64 && uiCbpBit == 1 + if (uiCbpBit) { //has coeff + WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum)); + WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx)); + } + + pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex]] = + pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 1]] = + pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 2]] = + pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 3]] = (uint8_t)uiTotalCoeffNum; + if (uiTotalCoeffNum == 0) { + return ERR_NONE; + } + int32_t j = 0, i; + if (iResProperty == LUMA_DC_AC_8) { + do { + if (pSignificantMap[j] != 0) { + i = pScanTable[ j ]; + sTCoeff[i] = uiQp >= 36 ? ((pSignificantMap[j] * pDeQuantMul[i]) * (1 << (uiQp / 6 - 6))) : (( + pSignificantMap[j] * pDeQuantMul[i] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6)); + } + ++j; + } while (j < 64); + } + + return ERR_NONE; +} + +int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux, + int32_t iIndex, int32_t iMaxNumCoeff, + const uint8_t* pScanTable, int32_t iResProperty, short* sTCoeff, /*int mb_mode*/ uint8_t uiQp, + PWelsDecoderContext pCtx) { + int32_t iCurNzCacheIdx; + uint32_t uiTotalCoeffNum = 0; + uint32_t uiCbpBit; + int32_t pSignificantMap[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + int32_t iMbResProperty = 0; + GetMbResProperty (&iMbResProperty, &iResProperty, false); + const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff4x4[iMbResProperty][uiQp] : + g_kuiDequantCoeff[uiQp]; + + WELS_READ_VERIFY (ParseCbfInfoCabac (pNeighAvail, pNonZeroCountCache, iIndex, iResProperty, pCtx, uiCbpBit)); + if (uiCbpBit) { //has coeff + WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum)); + WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx)); + } + + iCurNzCacheIdx = g_kCacheNzcScanIdx[iIndex]; + pNonZeroCountCache[iCurNzCacheIdx] = (uint8_t)uiTotalCoeffNum; + if (uiTotalCoeffNum == 0) { + return ERR_NONE; + } + int32_t j = 0; + if (iResProperty == I16_LUMA_DC) { + do { + sTCoeff[pScanTable[j]] = pSignificantMap[j]; + ++j; + } while (j < 16); + WelsLumaDcDequantIdct (sTCoeff, uiQp, pCtx); + } else if (iResProperty == CHROMA_DC_U || iResProperty == CHROMA_DC_V) { + do { + sTCoeff[pScanTable[j]] = pSignificantMap[j]; + ++j; + } while (j < 4); + //iHadamard2x2 + WelsChromaDcIdct (sTCoeff); + //scaling + if (!pCtx->bUseScalingList) { + for (j = 0; j < 4; ++j) { + sTCoeff[pScanTable[j]] = (int16_t) ((int64_t)sTCoeff[pScanTable[j]] * (int64_t)pDeQuantMul[0] >> 1); + } + } else { //with scaling list + for (j = 0; j < 4; ++j) { + sTCoeff[pScanTable[j]] = (int16_t) ((int64_t)sTCoeff[pScanTable[j]] * (int64_t)pDeQuantMul[0] >> 5); + } + } + } else { //luma ac, chroma ac + do { + if (pSignificantMap[j] != 0) { + if (!pCtx->bUseScalingList) { + sTCoeff[pScanTable[j]] = pSignificantMap[j] * pDeQuantMul[pScanTable[j] & 0x07]; + } else { + sTCoeff[pScanTable[j]] = (int16_t) (((int64_t)pSignificantMap[j] * (int64_t)pDeQuantMul[pScanTable[j]] + 8) >> 4); + } + } + ++j; + } while (j < 16); + } + return ERR_NONE; +} + +int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) { + int32_t i; + PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; + SBitStringAux* pBsAux = pCtx->pCurDqLayer->pBitStringAux; + SDqLayer* pCurDqLayer = pCtx->pCurDqLayer; + int32_t iDstStrideLuma = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDstStrideChroma = pCurDqLayer->pDec->iLinesize[1]; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + + int32_t iMbOffsetLuma = (iMbX + iMbY * iDstStrideLuma) << 4; + int32_t iMbOffsetChroma = (iMbX + iMbY * iDstStrideChroma) << 3; + + uint8_t* pMbDstY = pCtx->pDec->pData[0] + iMbOffsetLuma; + uint8_t* pMbDstU = pCtx->pDec->pData[1] + iMbOffsetChroma; + uint8_t* pMbDstV = pCtx->pDec->pData[2] + iMbOffsetChroma; + + uint8_t* pPtrSrc; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + RestoreCabacDecEngineToBS (pCabacDecEngine, pBsAux); + intX_t iBytesLeft = pBsAux->pEndBuf - pBsAux->pCurBuf; + if (iBytesLeft < 384) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_CABAC_NO_BS_TO_READ); + } + pPtrSrc = pBsAux->pCurBuf; + if (!pCtx->pParam->bParseOnly) { + for (i = 0; i < 16; i++) { //luma + memcpy (pMbDstY, pPtrSrc, 16); + pMbDstY += iDstStrideLuma; + pPtrSrc += 16; + } + for (i = 0; i < 8; i++) { //cb + memcpy (pMbDstU, pPtrSrc, 8); + pMbDstU += iDstStrideChroma; + pPtrSrc += 8; + } + for (i = 0; i < 8; i++) { //cr + memcpy (pMbDstV, pPtrSrc, 8); + pMbDstV += iDstStrideChroma; + pPtrSrc += 8; + } + } + + pBsAux->pCurBuf += 384; + + pCurDqLayer->pLumaQp[iMbXy] = 0; + pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0; + memset (pCurDqLayer->pNzc[iMbXy], 16, sizeof (pCurDqLayer->pNzc[iMbXy])); + + //step 4: cabac engine init + WELS_READ_VERIFY (InitReadBits (pBsAux, 1)); + WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCabacDecEngine, pBsAux)); + return ERR_NONE; +} +void UpdateP8x8RefCacheIdxCabac (int8_t pRefIndex[LIST_A][30], const int16_t& iPartIdx, + const int32_t& listIdx, const int8_t& iRef) { + const uint8_t uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + pRefIndex[listIdx][uiCacheIdx] = pRefIndex[listIdx][uiCacheIdx + 1] = pRefIndex[listIdx][uiCacheIdx + 6] = + pRefIndex[listIdx][uiCacheIdx + 7] = iRef; +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp new file mode 100644 index 000000000..dc10d7273 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp @@ -0,0 +1,1729 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file parse_mb_syn_cavlc.c + * + * \brief Interfaces implementation for parsing the syntax of MB + * + * \date 03/17/2009 Created + * + ************************************************************************************* + */ + + +#include "parse_mb_syn_cavlc.h" +#include "decode_slice.h" +#include "error_code.h" +#include "mv_pred.h" + +namespace WelsDec { +#define MAX_LEVEL_PREFIX 15 + +typedef struct TagReadBitsCache { + uint32_t uiCache32Bit; + uint8_t uiRemainBits; + uint8_t* pBuf; +} SReadBitsCache; + +void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer) { + int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc; + int32_t iCurXy, iTopXy = 0, iLeftXy = 0, iLeftTopXy = 0, iRightTopXy = 0; + int32_t iCurX, iCurY; + + iCurXy = pCurDqLayer->iMbXyIndex; + iCurX = pCurDqLayer->iMbX; + iCurY = pCurDqLayer->iMbY; + iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy]; + if (iCurX != 0) { + iLeftXy = iCurXy - 1; + iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy]; + pNeighAvail->iLeftAvail = (iLeftSliceIdc == iCurSliceIdc); + pNeighAvail->iLeftCbp = pNeighAvail->iLeftAvail ? pCurDqLayer->pCbp[iLeftXy] : 0; + } else { + pNeighAvail->iLeftAvail = 0; + pNeighAvail->iLeftTopAvail = 0; + pNeighAvail->iLeftCbp = 0; + } + + if (iCurY != 0) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy]; + pNeighAvail->iTopAvail = (iTopSliceIdc == iCurSliceIdc); + pNeighAvail->iTopCbp = pNeighAvail->iTopAvail ? pCurDqLayer->pCbp[iTopXy] : 0; + if (iCurX != 0) { + iLeftTopXy = iTopXy - 1; + iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy]; + pNeighAvail->iLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); + } else { + pNeighAvail->iLeftTopAvail = 0; + } + if (iCurX != (pCurDqLayer->iMbWidth - 1)) { + iRightTopXy = iTopXy + 1; + iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy]; + pNeighAvail->iRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); + } else { + pNeighAvail->iRightTopAvail = 0; + } + } else { + pNeighAvail->iTopAvail = 0; + pNeighAvail->iLeftTopAvail = 0; + pNeighAvail->iRightTopAvail = 0; + pNeighAvail->iTopCbp = 0; + } + + pNeighAvail->iLeftType = (pNeighAvail->iLeftAvail ? pCurDqLayer->pDec->pMbType[iLeftXy] : 0); + pNeighAvail->iTopType = (pNeighAvail->iTopAvail ? pCurDqLayer->pDec->pMbType[iTopXy] : 0); + pNeighAvail->iLeftTopType = (pNeighAvail->iLeftTopAvail ? pCurDqLayer->pDec->pMbType[iLeftTopXy] : 0); + pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurDqLayer->pDec->pMbType[iRightTopXy] : 0); +} +void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + PDqLayer pCurDqLayer) { //no matter slice type, intra_pred_constrained_flag + int32_t iCurXy = pCurDqLayer->iMbXyIndex; + int32_t iTopXy = 0; + int32_t iLeftXy = 0; + if (pNeighAvail->iTopAvail) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iLeftAvail) { + iLeftXy = iCurXy - 1; + } + + //stuff non_zero_coeff_count from pNeighAvail(left and top) + if (pNeighAvail->iTopAvail) { + ST32 (&pNonZeroCount[1], LD32 (&pCurDqLayer->pNzc[iTopXy][12])); + pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0; + ST16 (&pNonZeroCount[6], LD16 (&pCurDqLayer->pNzc[iTopXy][20])); + ST16 (&pNonZeroCount[30], LD16 (&pCurDqLayer->pNzc[iTopXy][22])); + } else { + ST32 (&pNonZeroCount[1], 0xFFFFFFFFU); + pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0xFF; + ST16 (&pNonZeroCount[6], 0xFFFF); + ST16 (&pNonZeroCount[30], 0xFFFF); + } + + if (pNeighAvail->iLeftAvail) { + pNonZeroCount[8 * 1] = pCurDqLayer->pNzc[iLeftXy][3]; + pNonZeroCount[8 * 2] = pCurDqLayer->pNzc[iLeftXy][7]; + pNonZeroCount[8 * 3] = pCurDqLayer->pNzc[iLeftXy][11]; + pNonZeroCount[8 * 4] = pCurDqLayer->pNzc[iLeftXy][15]; + + pNonZeroCount[5 + 8 * 1] = pCurDqLayer->pNzc[iLeftXy][17]; + pNonZeroCount[5 + 8 * 2] = pCurDqLayer->pNzc[iLeftXy][21]; + pNonZeroCount[5 + 8 * 4] = pCurDqLayer->pNzc[iLeftXy][19]; + pNonZeroCount[5 + 8 * 5] = pCurDqLayer->pNzc[iLeftXy][23]; + } else { + pNonZeroCount[8 * 1] = + pNonZeroCount[8 * 2] = + pNonZeroCount[8 * 3] = + pNonZeroCount[8 * 4] = -1;//unavailable + + pNonZeroCount[5 + 8 * 1] = + pNonZeroCount[5 + 8 * 2] = -1;//unavailable + + pNonZeroCount[5 + 8 * 4] = + pNonZeroCount[5 + 8 * 5] = -1;//unavailable + } +} +void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, + PDqLayer pCurDqLayer) { //no matter slice type + int32_t iCurXy = pCurDqLayer->iMbXyIndex; + int32_t iTopXy = 0; + int32_t iLeftXy = 0; + + //stuff non_zero_coeff_count from pNeighAvail(left and top) + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + + if (pNeighAvail->iTopAvail) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iLeftAvail) { + iLeftXy = iCurXy - 1; + } + + //intraNxN_pred_mode + if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top + ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0])); + } else { + int32_t iPred; + if (IS_INTRA16x16 (pNeighAvail->iTopType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iTopType)) + iPred = 0x02020202; + else + iPred = 0xffffffff; + ST32 (pIntraPredMode + 1, iPred); + } + + if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left + pIntraPredMode[ 0 + 8 ] = pCurDqLayer->pIntraPredMode[iLeftXy][4]; + pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5]; + pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6]; + pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3]; + } else { + int8_t iPred; + if (IS_INTRA16x16 (pNeighAvail->iLeftType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iLeftType)) + iPred = 2; + else + iPred = -1; + pIntraPredMode[ 0 + 8 ] = + pIntraPredMode[ 0 + 8 * 2] = + pIntraPredMode[ 0 + 8 * 3] = + pIntraPredMode[ 0 + 8 * 4] = iPred; + } +} + +void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, + PDqLayer pCurDqLayer) { //no matter slice type + int32_t iCurXy = pCurDqLayer->iMbXyIndex; + int32_t iTopXy = 0; + int32_t iLeftXy = 0; + + //stuff non_zero_coeff_count from pNeighAvail(left and top) + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + + if (pNeighAvail->iTopAvail) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iLeftAvail) { + iLeftXy = iCurXy - 1; + } + + //intra4x4_pred_mode + if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top + ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0])); + } else { + int32_t iPred; + if (pNeighAvail->iTopAvail) + iPred = 0x02020202; + else + iPred = 0xffffffff; + ST32 (pIntraPredMode + 1, iPred); + } + + if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left + pIntraPredMode[ 0 + 8 * 1] = pCurDqLayer->pIntraPredMode[iLeftXy][4]; + pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5]; + pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6]; + pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3]; + } else { + int8_t iPred; + if (pNeighAvail->iLeftAvail) + iPred = 2; + else + iPred = -1; + pIntraPredMode[ 0 + 8 * 1] = + pIntraPredMode[ 0 + 8 * 2] = + pIntraPredMode[ 0 + 8 * 3] = + pIntraPredMode[ 0 + 8 * 4] = iPred; + } +} + +void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A], + int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) { + int32_t iCurXy = pCurDqLayer->iMbXyIndex; + int32_t iTopXy = 0; + int32_t iLeftXy = 0; + int32_t iLeftTopXy = 0; + int32_t iRightTopXy = 0; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + int32_t listCount = 1; + if (pSliceHeader->eSliceType == B_SLICE) { + listCount = 2; + } + //stuff non_zero_coeff_count from pNeighAvail(left and top) + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + + if (pNeighAvail->iTopAvail) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iLeftAvail) { + iLeftXy = iCurXy - 1; + } + if (pNeighAvail->iLeftTopAvail) { + iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iRightTopAvail) { + iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth; + } + + for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) { + //stuff mv_cache and iRefIdxArray from left and top (inter) + if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { + ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3])); + ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7])); + ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11])); + ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15])); + + ST32 (iMvdCache[listIdx][6], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][3])); + ST32 (iMvdCache[listIdx][12], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][7])); + ST32 (iMvdCache[listIdx][18], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][11])); + ST32 (iMvdCache[listIdx][24], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][15])); + + iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3]; + iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7]; + iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11]; + iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15]; + } else { + ST32 (iMvArray[listIdx][6], 0); + ST32 (iMvArray[listIdx][12], 0); + ST32 (iMvArray[listIdx][18], 0); + ST32 (iMvArray[listIdx][24], 0); + + ST32 (iMvdCache[listIdx][6], 0); + ST32 (iMvdCache[listIdx][12], 0); + ST32 (iMvdCache[listIdx][18], 0); + ST32 (iMvdCache[listIdx][24], 0); + + + if (0 == pNeighAvail->iLeftAvail) { //not available + iRefIdxArray[listIdx][6] = + iRefIdxArray[listIdx][12] = + iRefIdxArray[listIdx][18] = + iRefIdxArray[listIdx][24] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][6] = + iRefIdxArray[listIdx][12] = + iRefIdxArray[listIdx][18] = + iRefIdxArray[listIdx][24] = REF_NOT_IN_LIST; + } + } + if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { + ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15])); + ST32 (iMvdCache[listIdx][0], LD32 (pCurDqLayer->pMvd[listIdx][iLeftTopXy][15])); + iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15]; + } else { + ST32 (iMvArray[listIdx][0], 0); + ST32 (iMvdCache[listIdx][0], 0); + if (0 == pNeighAvail->iLeftTopAvail) { //not available + iRefIdxArray[listIdx][0] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][0] = REF_NOT_IN_LIST; + } + } + + if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { + ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12])); + ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14])); + ST64 (iMvdCache[listIdx][1], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][12])); + ST64 (iMvdCache[listIdx][3], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][14])); + ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12])); + } else { + ST64 (iMvArray[listIdx][1], 0); + ST64 (iMvArray[listIdx][3], 0); + ST64 (iMvdCache[listIdx][1], 0); + ST64 (iMvdCache[listIdx][3], 0); + if (0 == pNeighAvail->iTopAvail) { //not available + iRefIdxArray[listIdx][1] = + iRefIdxArray[listIdx][2] = + iRefIdxArray[listIdx][3] = + iRefIdxArray[listIdx][4] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][1] = + iRefIdxArray[listIdx][2] = + iRefIdxArray[listIdx][3] = + iRefIdxArray[listIdx][4] = REF_NOT_IN_LIST; + } + } + + if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { + ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12])); + ST32 (iMvdCache[listIdx][5], LD32 (pCurDqLayer->pMvd[listIdx][iRightTopXy][12])); + iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12]; + } else { + ST32 (iMvArray[listIdx][5], 0); + if (0 == pNeighAvail->iRightTopAvail) { //not available + iRefIdxArray[listIdx][5] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][5] = REF_NOT_IN_LIST; + } + } + + //right-top 4*4 block unavailable + ST32 (iMvArray[listIdx][9], 0); + ST32 (iMvArray[listIdx][21], 0); + ST32 (iMvArray[listIdx][11], 0); + ST32 (iMvArray[listIdx][17], 0); + ST32 (iMvArray[listIdx][23], 0); + ST32 (iMvdCache[listIdx][9], 0); + ST32 (iMvdCache[listIdx][21], 0); + ST32 (iMvdCache[listIdx][11], 0); + ST32 (iMvdCache[listIdx][17], 0); + ST32 (iMvdCache[listIdx][23], 0); + iRefIdxArray[listIdx][9] = + iRefIdxArray[listIdx][21] = + iRefIdxArray[listIdx][11] = + iRefIdxArray[listIdx][17] = + iRefIdxArray[listIdx][23] = REF_NOT_AVAIL; + } +} + +void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer) { + + int32_t iCurXy = pCurDqLayer->iMbXyIndex; + int32_t iTopXy = 0; + int32_t iLeftXy = 0; + int32_t iLeftTopXy = 0; + int32_t iRightTopXy = 0; + + if (pNeighAvail->iTopAvail) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iLeftAvail) { + iLeftXy = iCurXy - 1; + } + if (pNeighAvail->iLeftTopAvail) { + iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iRightTopAvail) { + iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth; + } + memset (iDirect, 0, 30); + if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { + iDirect[6] = pCurDqLayer->pDirect[iLeftXy][3]; + iDirect[12] = pCurDqLayer->pDirect[iLeftXy][7]; + iDirect[18] = pCurDqLayer->pDirect[iLeftXy][11]; + iDirect[24] = pCurDqLayer->pDirect[iLeftXy][15]; + } + if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { + iDirect[0] = pCurDqLayer->pDirect[iLeftTopXy][15]; + } + + if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { + ST32 (&iDirect[1], LD32 (&pCurDqLayer->pDirect[iTopXy][12])); + } + + if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { + iDirect[5] = pCurDqLayer->pDirect[iRightTopXy][12]; + } + //right-top 4*4 block unavailable +} + +void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, + int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) { + int32_t iCurXy = pCurDqLayer->iMbXyIndex; + int32_t iTopXy = 0; + int32_t iLeftXy = 0; + int32_t iLeftTopXy = 0; + int32_t iRightTopXy = 0; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + int32_t listCount = 1; + if (pSliceHeader->eSliceType == B_SLICE) { + listCount = 2; + } + + //stuff non_zero_coeff_count from pNeighAvail(left and top) + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + + if (pNeighAvail->iTopAvail) { + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iLeftAvail) { + iLeftXy = iCurXy - 1; + } + if (pNeighAvail->iLeftTopAvail) { + iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth; + } + if (pNeighAvail->iRightTopAvail) { + iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth; + } + + for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) { + //stuff mv_cache and iRefIdxArray from left and top (inter) + if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { + ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3])); + ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7])); + ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11])); + ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15])); + iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3]; + iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7]; + iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11]; + iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15]; + } else { + ST32 (iMvArray[listIdx][6], 0); + ST32 (iMvArray[listIdx][12], 0); + ST32 (iMvArray[listIdx][18], 0); + ST32 (iMvArray[listIdx][24], 0); + + if (0 == pNeighAvail->iLeftAvail) { //not available + iRefIdxArray[listIdx][6] = + iRefIdxArray[listIdx][12] = + iRefIdxArray[listIdx][18] = + iRefIdxArray[listIdx][24] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][6] = + iRefIdxArray[listIdx][12] = + iRefIdxArray[listIdx][18] = + iRefIdxArray[listIdx][24] = REF_NOT_IN_LIST; + } + } + if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { + ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15])); + iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15]; + } else { + ST32 (iMvArray[listIdx][0], 0); + if (0 == pNeighAvail->iLeftTopAvail) { //not available + iRefIdxArray[listIdx][0] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][0] = REF_NOT_IN_LIST; + } + } + if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { + ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12])); + ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14])); + ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12])); + } else { + ST64 (iMvArray[listIdx][1], 0); + ST64 (iMvArray[listIdx][3], 0); + if (0 == pNeighAvail->iTopAvail) { //not available + iRefIdxArray[listIdx][1] = + iRefIdxArray[listIdx][2] = + iRefIdxArray[listIdx][3] = + iRefIdxArray[listIdx][4] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][1] = + iRefIdxArray[listIdx][2] = + iRefIdxArray[listIdx][3] = + iRefIdxArray[listIdx][4] = REF_NOT_IN_LIST; + } + } + if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { + ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12])); + iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12]; + } else { + ST32 (iMvArray[listIdx][5], 0); + if (0 == pNeighAvail->iRightTopAvail) { //not available + iRefIdxArray[listIdx][5] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][5] = REF_NOT_IN_LIST; + } + } + //right-top 4*4 block unavailable + ST32 (iMvArray[listIdx][9], 0); + ST32 (iMvArray[listIdx][21], 0); + ST32 (iMvArray[listIdx][11], 0); + ST32 (iMvArray[listIdx][17], 0); + ST32 (iMvArray[listIdx][23], 0); + iRefIdxArray[listIdx][9] = + iRefIdxArray[listIdx][21] = + iRefIdxArray[listIdx][11] = + iRefIdxArray[listIdx][17] = + iRefIdxArray[listIdx][23] = REF_NOT_AVAIL; + } +} + +int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4) { + int8_t iTopMode = pIntraPredMode[g_kuiScan8[iIdx4] - 8]; + int8_t iLeftMode = pIntraPredMode[g_kuiScan8[iIdx4] - 1]; + int8_t iBestMode; + + if (-1 == iLeftMode || -1 == iTopMode) { + iBestMode = 2; + } else { + iBestMode = WELS_MIN (iLeftMode, iTopMode); + } + return iBestMode; +} + +#define CHECK_I16_MODE(a, b, c, d) \ + ((a == g_ksI16PredInfo[a].iPredMode) && \ + (b >= g_ksI16PredInfo[a].iLeftAvail) && \ + (c >= g_ksI16PredInfo[a].iTopAvail) && \ + (d >= g_ksI16PredInfo[a].iLeftTopAvail)); +#define CHECK_CHROMA_MODE(a, b, c, d) \ + ((a == g_ksChromaPredInfo[a].iPredMode) && \ + (b >= g_ksChromaPredInfo[a].iLeftAvail) && \ + (c >= g_ksChromaPredInfo[a].iTopAvail) && \ + (d >= g_ksChromaPredInfo[a].iLeftTopAvail)); +#define CHECK_I4_MODE(a, b, c, d) \ + ((a == g_ksI4PredInfo[a].iPredMode) && \ + (b >= g_ksI4PredInfo[a].iLeftAvail) && \ + (c >= g_ksI4PredInfo[a].iTopAvail) && \ + (d >= g_ksI4PredInfo[a].iLeftTopAvail)); + + +int32_t CheckIntra16x16PredMode (uint8_t uiSampleAvail, int8_t* pMode) { + int32_t iLeftAvail = uiSampleAvail & 0x04; + int32_t bLeftTopAvail = uiSampleAvail & 0x02; + int32_t iTopAvail = uiSampleAvail & 0x01; + + if ((*pMode < 0) || (*pMode > MAX_PRED_MODE_ID_I16x16)) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE); + } + + if (I16_PRED_DC == *pMode) { + if (iLeftAvail && iTopAvail) { + return ERR_NONE; + } else if (iLeftAvail) { + *pMode = I16_PRED_DC_L; + } else if (iTopAvail) { + *pMode = I16_PRED_DC_T; + } else { + *pMode = I16_PRED_DC_128; + } + } else { + bool bModeAvail = CHECK_I16_MODE (*pMode, iLeftAvail, iTopAvail, bLeftTopAvail); + if (0 == bModeAvail) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE); + } + } + return ERR_NONE; +} + + +int32_t CheckIntraChromaPredMode (uint8_t uiSampleAvail, int8_t* pMode) { + int32_t iLeftAvail = uiSampleAvail & 0x04; + int32_t bLeftTopAvail = uiSampleAvail & 0x02; + int32_t iTopAvail = uiSampleAvail & 0x01; + + if (C_PRED_DC == *pMode) { + if (iLeftAvail && iTopAvail) { + return ERR_NONE; + } else if (iLeftAvail) { + *pMode = C_PRED_DC_L; + } else if (iTopAvail) { + *pMode = C_PRED_DC_T; + } else { + *pMode = C_PRED_DC_128; + } + } else { + bool bModeAvail = CHECK_CHROMA_MODE (*pMode, iLeftAvail, iTopAvail, bLeftTopAvail); + if (0 == bModeAvail) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE); + } + } + return ERR_NONE; +} + +int32_t CheckIntraNxNPredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex, bool b8x8) { + int8_t iIdx = g_kuiCache30ScanIdx[iIndex]; + + int32_t iLeftAvail = pSampleAvail[iIdx - 1]; + int32_t iTopAvail = pSampleAvail[iIdx - 6]; + int32_t bLeftTopAvail = pSampleAvail[iIdx - 7]; + int32_t bRightTopAvail = pSampleAvail[iIdx - (b8x8 ? 4 : 5)]; // Diff with 4x4 Pred + + int8_t iFinalMode; + + if ((*pMode < 0) || (*pMode > MAX_PRED_MODE_ID_I4x4)) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE); + } + + if (I4_PRED_DC == *pMode) { + if (iLeftAvail && iTopAvail) { + return *pMode; + } else if (iLeftAvail) { + iFinalMode = I4_PRED_DC_L; + } else if (iTopAvail) { + iFinalMode = I4_PRED_DC_T; + } else { + iFinalMode = I4_PRED_DC_128; + } + } else { + bool bModeAvail = CHECK_I4_MODE (*pMode, iLeftAvail, iTopAvail, bLeftTopAvail); + if (0 == bModeAvail) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE); + } + + iFinalMode = *pMode; + + //if right-top unavailable, modify mode DDL and VL (padding rightmost pixel of top) + if (I4_PRED_DDL == iFinalMode && 0 == bRightTopAvail) { + iFinalMode = I4_PRED_DDL_TOP; + } else if (I4_PRED_VL == iFinalMode && 0 == bRightTopAvail) { + iFinalMode = I4_PRED_VL_TOP; + } + } + return iFinalMode; +} + +void BsStartCavlc (PBitStringAux pBs) { + pBs->iIndex = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); +} +void BsEndCavlc (PBitStringAux pBs) { + pBs->pCurBuf = pBs->pStartBuf + (pBs->iIndex >> 3); + uint32_t uiCache32Bit = (uint32_t) ((((pBs->pCurBuf[0] << 8) | pBs->pCurBuf[1]) << 16) | + (pBs->pCurBuf[2] << 8) | pBs->pCurBuf[3]); + pBs->uiCurBits = uiCache32Bit << (pBs->iIndex & 0x07); + pBs->pCurBuf += 4; + pBs->iLeftBits = -16 + (pBs->iIndex & 0x07); +} + + +// return: used bits +static int32_t CavlcGetTrailingOnesAndTotalCoeff (uint8_t& uiTotalCoeff, uint8_t& uiTrailingOnes, + SReadBitsCache* pBitsCache, SVlcTable* pVlcTable, bool bChromaDc, int8_t nC) { + const uint8_t* kpVlcTableMoreBitsCountList[3] = {g_kuiVlcTableMoreBitsCount0, g_kuiVlcTableMoreBitsCount1, g_kuiVlcTableMoreBitsCount2}; + int32_t iUsedBits = 0; + int32_t iIndexVlc, iIndexValue, iNcMapIdx; + uint32_t uiCount; + uint32_t uiValue; + + if (bChromaDc) { + uiValue = pBitsCache->uiCache32Bit >> 24; + iIndexVlc = pVlcTable->kpChromaCoeffTokenVlcTable[uiValue][0]; + uiCount = pVlcTable->kpChromaCoeffTokenVlcTable[uiValue][1]; + POP_BUFFER (pBitsCache, uiCount); + iUsedBits += uiCount; + uiTrailingOnes = g_kuiVlcTrailingOneTotalCoeffTable[iIndexVlc][0]; + uiTotalCoeff = g_kuiVlcTrailingOneTotalCoeffTable[iIndexVlc][1]; + } else { //luma + iNcMapIdx = g_kuiNcMapTable[nC]; + if (iNcMapIdx <= 2) { + uiValue = pBitsCache->uiCache32Bit >> 24; + if (uiValue < g_kuiVlcTableNeedMoreBitsThread[iNcMapIdx]) { + POP_BUFFER (pBitsCache, 8); + iUsedBits += 8; + iIndexValue = pBitsCache->uiCache32Bit >> (32 - kpVlcTableMoreBitsCountList[iNcMapIdx][uiValue]); + iIndexVlc = pVlcTable->kpCoeffTokenVlcTable[iNcMapIdx + 1][uiValue][iIndexValue][0]; + uiCount = pVlcTable->kpCoeffTokenVlcTable[iNcMapIdx + 1][uiValue][iIndexValue][1]; + POP_BUFFER (pBitsCache, uiCount); + iUsedBits += uiCount; + } else { + iIndexVlc = pVlcTable->kpCoeffTokenVlcTable[0][iNcMapIdx][uiValue][0]; + uiCount = pVlcTable->kpCoeffTokenVlcTable[0][iNcMapIdx][uiValue][1]; + uiValue = pBitsCache->uiCache32Bit >> (32 - uiCount); + POP_BUFFER (pBitsCache, uiCount); + iUsedBits += uiCount; + } + } else { + uiValue = pBitsCache->uiCache32Bit >> (32 - 6); + POP_BUFFER (pBitsCache, 6); + iUsedBits += 6; + iIndexVlc = pVlcTable->kpCoeffTokenVlcTable[0][3][uiValue][0]; //differ + } + uiTrailingOnes = g_kuiVlcTrailingOneTotalCoeffTable[iIndexVlc][0]; + uiTotalCoeff = g_kuiVlcTrailingOneTotalCoeffTable[iIndexVlc][1]; + } + + return iUsedBits; +} + +static int32_t CavlcGetLevelVal (int32_t iLevel[16], SReadBitsCache* pBitsCache, uint8_t uiTotalCoeff, + uint8_t uiTrailingOnes) { + int32_t i, iUsedBits = 0; + int32_t iSuffixLength, iSuffixLengthSize, iLevelPrefix, iPrefixBits, iLevelCode, iThreshold; + for (i = 0; i < uiTrailingOnes; i++) { + iLevel[i] = 1 - ((pBitsCache->uiCache32Bit >> (30 - i)) & 0x02); + } + POP_BUFFER (pBitsCache, uiTrailingOnes); + iUsedBits += uiTrailingOnes; + + iSuffixLength = (uiTotalCoeff > 10 && uiTrailingOnes < 3); + + for (; i < uiTotalCoeff; i++) { + if (pBitsCache->uiRemainBits <= 16) SHIFT_BUFFER (pBitsCache); + WELS_GET_PREFIX_BITS (pBitsCache->uiCache32Bit, iPrefixBits); + if (iPrefixBits > MAX_LEVEL_PREFIX + 1) //iPrefixBits includes leading "0"s and first "1", should +1 + return -1; + POP_BUFFER (pBitsCache, iPrefixBits); + iUsedBits += iPrefixBits; + iLevelPrefix = iPrefixBits - 1; + + iLevelCode = iLevelPrefix << iSuffixLength; //differ + iSuffixLengthSize = iSuffixLength; + + if (iLevelPrefix >= 14) { + if (14 == iLevelPrefix && 0 == iSuffixLength) + iSuffixLengthSize = 4; + else if (15 == iLevelPrefix) { + iSuffixLengthSize = 12; + if (iSuffixLength == 0) + iLevelCode += 15; + } + } + + if (iSuffixLengthSize > 0) { + if (pBitsCache->uiRemainBits <= iSuffixLengthSize) SHIFT_BUFFER (pBitsCache); + iLevelCode += (pBitsCache->uiCache32Bit >> (32 - iSuffixLengthSize)); + POP_BUFFER (pBitsCache, iSuffixLengthSize); + iUsedBits += iSuffixLengthSize; + } + + iLevelCode += ((i == uiTrailingOnes) && (uiTrailingOnes < 3)) << 1; + iLevel[i] = ((iLevelCode + 2) >> 1); + iLevel[i] -= (iLevel[i] << 1) & (- (iLevelCode & 0x01)); + + iSuffixLength += !iSuffixLength; + iThreshold = 3 << (iSuffixLength - 1); + iSuffixLength += ((iLevel[i] > iThreshold) || (iLevel[i] < -iThreshold)) && (iSuffixLength < 6); + } + + return iUsedBits; +} + +static int32_t CavlcGetTotalZeros (int32_t& iZerosLeft, SReadBitsCache* pBitsCache, uint8_t uiTotalCoeff, + SVlcTable* pVlcTable, bool bChromaDc) { + int32_t iCount, iUsedBits = 0; + const uint8_t* kpBitNumMap; + uint32_t uiValue; + + int32_t iTotalZeroVlcIdx; + uint8_t uiTableType; + //chroma_dc (0 < uiTotalCoeff < 4); others (chroma_ac or luma: 0 < uiTotalCoeff < 16) + + if (bChromaDc) { + iTotalZeroVlcIdx = uiTotalCoeff; + kpBitNumMap = g_kuiTotalZerosBitNumChromaMap; + uiTableType = bChromaDc; + } else { + iTotalZeroVlcIdx = uiTotalCoeff; + kpBitNumMap = g_kuiTotalZerosBitNumMap; + uiTableType = 0; + } + + iCount = kpBitNumMap[iTotalZeroVlcIdx - 1]; + if (pBitsCache->uiRemainBits < iCount) SHIFT_BUFFER ( + pBitsCache); // if uiRemainBits+16 still smaller than iCount?? potential bug + uiValue = pBitsCache->uiCache32Bit >> (32 - iCount); + iCount = pVlcTable->kpTotalZerosTable[uiTableType][iTotalZeroVlcIdx - 1][uiValue][1]; + POP_BUFFER (pBitsCache, iCount); + iUsedBits += iCount; + iZerosLeft = pVlcTable->kpTotalZerosTable[uiTableType][iTotalZeroVlcIdx - 1][uiValue][0]; + + return iUsedBits; +} +static int32_t CavlcGetRunBefore (int32_t iRun[16], SReadBitsCache* pBitsCache, uint8_t uiTotalCoeff, + SVlcTable* pVlcTable, int32_t iZerosLeft) { + int32_t i, iUsedBits = 0; + uint32_t uiCount, uiValue, iPrefixBits; + + for (i = 0; i < uiTotalCoeff - 1; i++) { + if (iZerosLeft > 0) { + uiCount = g_kuiZeroLeftBitNumMap[iZerosLeft]; + if (pBitsCache->uiRemainBits < uiCount) SHIFT_BUFFER (pBitsCache); + uiValue = pBitsCache->uiCache32Bit >> (32 - uiCount); + if (iZerosLeft < 7) { + uiCount = pVlcTable->kpZeroTable[iZerosLeft - 1][uiValue][1]; + POP_BUFFER (pBitsCache, uiCount); + iUsedBits += uiCount; + iRun[i] = pVlcTable->kpZeroTable[iZerosLeft - 1][uiValue][0]; + } else { + POP_BUFFER (pBitsCache, uiCount); + iUsedBits += uiCount; + if (pVlcTable->kpZeroTable[6][uiValue][0] < 7) { + iRun[i] = pVlcTable->kpZeroTable[6][uiValue][0]; + } else { + if (pBitsCache->uiRemainBits < 16) SHIFT_BUFFER (pBitsCache); + WELS_GET_PREFIX_BITS (pBitsCache->uiCache32Bit, iPrefixBits); + iRun[i] = iPrefixBits + 6; + if (iRun[i] > iZerosLeft) + return -1; + POP_BUFFER (pBitsCache, iPrefixBits); + iUsedBits += iPrefixBits; + } + } + } else { + for (int j = i; j < uiTotalCoeff; j++) { + iRun[j] = 0; + } + return iUsedBits; + } + + iZerosLeft -= iRun[i]; + } + + iRun[uiTotalCoeff - 1] = iZerosLeft; + + return iUsedBits; +} + +int32_t WelsResidualBlockCavlc (SVlcTable* pVlcTable, uint8_t* pNonZeroCountCache, PBitStringAux pBs, int32_t iIndex, + int32_t iMaxNumCoeff, + const uint8_t* kpZigzagTable, int32_t iResidualProperty, int16_t* pTCoeff, uint8_t uiQp, + PWelsDecoderContext pCtx) { + int32_t iLevel[16], iZerosLeft, iCoeffNum; + int32_t iRun[16]; + int32_t iCurNonZeroCacheIdx, i; + + + int32_t iMbResProperty = 0; + GetMbResProperty (&iMbResProperty, &iResidualProperty, 1); + const uint16_t* kpDequantCoeff = pCtx->bUseScalingList ? pCtx->pDequant_coeff4x4[iMbResProperty][uiQp] : + g_kuiDequantCoeff[uiQp]; + + int8_t nA, nB, nC; + uint8_t uiTotalCoeff, uiTrailingOnes; + int32_t iUsedBits = 0; + intX_t iCurIdx = pBs->iIndex; + uint8_t* pBuf = ((uint8_t*)pBs->pStartBuf) + (iCurIdx >> 3); + bool bChromaDc = (CHROMA_DC == iResidualProperty); + uint8_t bChroma = (bChromaDc || CHROMA_AC == iResidualProperty); + SReadBitsCache sReadBitsCache; + + uint32_t uiCache32Bit = (uint32_t) ((((pBuf[0] << 8) | pBuf[1]) << 16) | (pBuf[2] << 8) | pBuf[3]); + sReadBitsCache.uiCache32Bit = uiCache32Bit << (iCurIdx & 0x07); + sReadBitsCache.uiRemainBits = 32 - (iCurIdx & 0x07); + sReadBitsCache.pBuf = pBuf; + ////////////////////////////////////////////////////////////////////////// + + if (bChroma) { + iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex]; + nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1]; + nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8]; + } else { //luma + iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex]; + nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1]; + nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8]; + } + + WELS_NON_ZERO_COUNT_AVERAGE (nC, nA, nB); + + iUsedBits += CavlcGetTrailingOnesAndTotalCoeff (uiTotalCoeff, uiTrailingOnes, &sReadBitsCache, pVlcTable, bChromaDc, + nC); + + if (iResidualProperty != CHROMA_DC && iResidualProperty != I16_LUMA_DC) { + pNonZeroCountCache[iCurNonZeroCacheIdx] = uiTotalCoeff; + ////////////////////////////////////////////////////////////////////////// + } + if (0 == uiTotalCoeff) { + pBs->iIndex += iUsedBits; + return ERR_NONE; + } + if ((uiTrailingOnes > 3) || (uiTotalCoeff > 16)) { /////////////////check uiTrailingOnes and uiTotalCoeff + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_TOTAL_COEFF_OR_TRAILING_ONES); + } + if ((i = CavlcGetLevelVal (iLevel, &sReadBitsCache, uiTotalCoeff, uiTrailingOnes)) == -1) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_LEVEL); + } + iUsedBits += i; + if (uiTotalCoeff < iMaxNumCoeff) { + iUsedBits += CavlcGetTotalZeros (iZerosLeft, &sReadBitsCache, uiTotalCoeff, pVlcTable, bChromaDc); + } else { + iZerosLeft = 0; + } + + if ((iZerosLeft < 0) || ((iZerosLeft + uiTotalCoeff) > iMaxNumCoeff)) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_ZERO_LEFT); + } + if ((i = CavlcGetRunBefore (iRun, &sReadBitsCache, uiTotalCoeff, pVlcTable, iZerosLeft)) == -1) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_RUN_BEFORE); + } + iUsedBits += i; + pBs->iIndex += iUsedBits; + iCoeffNum = -1; + + if (iResidualProperty == CHROMA_DC) { + //chroma dc scaling process, is kpDequantCoeff[0]? LevelScale(qPdc%6,0,0))<<(qPdc/6-6), the transform is done at construction. + for (i = uiTotalCoeff - 1; i >= 0; --i) { + //FIXME merge into rundecode? + int32_t j; + iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ? + j = kpZigzagTable[ iCoeffNum ]; + pTCoeff[j] = iLevel[i]; + } + WelsChromaDcIdct (pTCoeff); + //scaling + if (!pCtx->bUseScalingList) { + for (int j = 0; j < 4; ++j) { + pTCoeff[kpZigzagTable[j]] = (pTCoeff[kpZigzagTable[j]] * kpDequantCoeff[0]) >> 1; + } + } else { + for (int j = 0; j < 4; ++j) { + pTCoeff[kpZigzagTable[j]] = ((int64_t) pTCoeff[kpZigzagTable[j]] * (int64_t) kpDequantCoeff[0]) >> 5; + } + } + } else if (iResidualProperty == I16_LUMA_DC) { //DC coefficent, only call in Intra_16x16, base_mode_flag = 0 + for (i = uiTotalCoeff - 1; i >= 0; --i) { //FIXME merge into rundecode? + int32_t j; + iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ? + j = kpZigzagTable[ iCoeffNum ]; + pTCoeff[j] = iLevel[i]; + } + WelsLumaDcDequantIdct (pTCoeff, uiQp, pCtx); + } else { + for (i = uiTotalCoeff - 1; i >= 0; --i) { //FIXME merge into rundecode? + int32_t j; + iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ? + j = kpZigzagTable[ iCoeffNum ]; + if (!pCtx->bUseScalingList) { + pTCoeff[j] = (iLevel[i] * kpDequantCoeff[j & 0x07]); + } else { + pTCoeff[j] = (iLevel[i] * kpDequantCoeff[j] + 8) >> 4; + } + } + } + + return ERR_NONE; +} + +int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable, uint8_t* pNonZeroCountCache, PBitStringAux pBs, int32_t iIndex, + int32_t iMaxNumCoeff, const uint8_t* kpZigzagTable, int32_t iResidualProperty, + int16_t* pTCoeff, int32_t iIdx4x4, uint8_t uiQp, + PWelsDecoderContext pCtx) { + int32_t iLevel[16], iZerosLeft, iCoeffNum; + int32_t iRun[16]; + int32_t iCurNonZeroCacheIdx, i; + + int32_t iMbResProperty = 0; + GetMbResProperty (&iMbResProperty, &iResidualProperty, 1); + + const uint16_t* kpDequantCoeff = pCtx->bUseScalingList ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] : + g_kuiDequantCoeff8x8[uiQp]; + + int8_t nA, nB, nC; + uint8_t uiTotalCoeff, uiTrailingOnes; + int32_t iUsedBits = 0; + intX_t iCurIdx = pBs->iIndex; + uint8_t* pBuf = ((uint8_t*)pBs->pStartBuf) + (iCurIdx >> 3); + bool bChromaDc = (CHROMA_DC == iResidualProperty); + uint8_t bChroma = (bChromaDc || CHROMA_AC == iResidualProperty); + SReadBitsCache sReadBitsCache; + + uint32_t uiCache32Bit = (uint32_t) ((((pBuf[0] << 8) | pBuf[1]) << 16) | (pBuf[2] << 8) | pBuf[3]); + sReadBitsCache.uiCache32Bit = uiCache32Bit << (iCurIdx & 0x07); + sReadBitsCache.uiRemainBits = 32 - (iCurIdx & 0x07); + sReadBitsCache.pBuf = pBuf; + ////////////////////////////////////////////////////////////////////////// + + if (bChroma) { + iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex]; + nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1]; + nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8]; + } else { //luma + iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex]; + nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1]; + nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8]; + } + + WELS_NON_ZERO_COUNT_AVERAGE (nC, nA, nB); + + iUsedBits += CavlcGetTrailingOnesAndTotalCoeff (uiTotalCoeff, uiTrailingOnes, &sReadBitsCache, pVlcTable, bChromaDc, + nC); + + if (iResidualProperty != CHROMA_DC && iResidualProperty != I16_LUMA_DC) { + pNonZeroCountCache[iCurNonZeroCacheIdx] = uiTotalCoeff; + ////////////////////////////////////////////////////////////////////////// + } + if (0 == uiTotalCoeff) { + pBs->iIndex += iUsedBits; + return ERR_NONE; + } + if ((uiTrailingOnes > 3) || (uiTotalCoeff > 16)) { /////////////////check uiTrailingOnes and uiTotalCoeff + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_TOTAL_COEFF_OR_TRAILING_ONES); + } + if ((i = CavlcGetLevelVal (iLevel, &sReadBitsCache, uiTotalCoeff, uiTrailingOnes)) == -1) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_LEVEL); + } + iUsedBits += i; + if (uiTotalCoeff < iMaxNumCoeff) { + iUsedBits += CavlcGetTotalZeros (iZerosLeft, &sReadBitsCache, uiTotalCoeff, pVlcTable, bChromaDc); + } else { + iZerosLeft = 0; + } + + if ((iZerosLeft < 0) || ((iZerosLeft + uiTotalCoeff) > iMaxNumCoeff)) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_ZERO_LEFT); + } + if ((i = CavlcGetRunBefore (iRun, &sReadBitsCache, uiTotalCoeff, pVlcTable, iZerosLeft)) == -1) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_CAVLC_INVALID_RUN_BEFORE); + } + iUsedBits += i; + pBs->iIndex += iUsedBits; + iCoeffNum = -1; + + for (i = uiTotalCoeff - 1; i >= 0; --i) { //FIXME merge into rundecode? + int32_t j; + iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ? + j = (iCoeffNum << 2) + iIdx4x4; + j = kpZigzagTable[ j ]; + pTCoeff[j] = uiQp >= 36 ? ((iLevel[i] * kpDequantCoeff[j]) * (1 << (uiQp / 6 - 6))) + : ((iLevel[i] * kpDequantCoeff[j] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6)); + } + + return ERR_NONE; +} + +int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], + PBitStringAux pBs) { + PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0]; + int32_t iRefCount[2]; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t i, j; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int32_t iMotionPredFlag[4]; + int16_t iMv[2]; + uint32_t uiCode; + int32_t iCode; + int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv; + int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv; + iMotionPredFlag[0] = iMotionPredFlag[1] = iMotionPredFlag[2] = iMotionPredFlag[3] = + pSlice->sSliceHeaderExt.bDefaultMotionPredFlag; + iRefCount[0] = pSliceHeader->uiRefCount[0]; + iRefCount[1] = pSliceHeader->uiRefCount[1]; + + bool bIsPending = GetThreadCount (pCtx) > 1; + + switch (pCurDqLayer->pDec->pMbType[iMbXy]) { + case MB_TYPE_16x16: { + int32_t iRefIdx = 0; + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ] + iMotionPredFlag[0] = uiCode; + } + if (iMotionPredFlag[0] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[0], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ] + iRefIdx = uiCode; + // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive + // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1. + if ((iRefIdx < 0) || (iRefIdx >= iRefCount[0]) || (ppRefPic[iRefIdx] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx] + && (ppRefPic[iRefIdx]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + PredMv (iMvArray, iRefIdxArray, LIST_0, 0, 4, iRefIdx, iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + UpdateP16x16MotionInfo (pCurDqLayer, LIST_0, iRefIdx, iMv); + } + break; + case MB_TYPE_16x8: { + int32_t iRefIdx[2]; + for (i = 0; i < 2; i++) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ] + iMotionPredFlag[i] = uiCode; + } + } + + for (i = 0; i < 2; i++) { + if (iMotionPredFlag[i]) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[0], &uiCode)); //ref_idx_l0[ mbPartIdx ] + iRefIdx[i] = uiCode; + if ((iRefIdx[i] < 0) || (iRefIdx[i] >= iRefCount[0]) || (ppRefPic[iRefIdx[i]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx[i] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]] + && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending)); + } + for (i = 0; i < 2; i++) { + PredInter16x8Mv (iMvArray, iRefIdxArray, LIST_0, i << 3, iRefIdx[i], iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + UpdateP16x8MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, LIST_0, i << 3, iRefIdx[i], iMv); + } + } + break; + case MB_TYPE_8x16: { + int32_t iRefIdx[2]; + for (i = 0; i < 2; i++) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ] + iMotionPredFlag[i] = uiCode; + } + } + + for (i = 0; i < 2; i++) { + if (iMotionPredFlag[i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[0], &uiCode)); //ref_idx_l0[ mbPartIdx ] + iRefIdx[i] = uiCode; + if ((iRefIdx[i] < 0) || (iRefIdx[i] >= iRefCount[0]) || (ppRefPic[iRefIdx[i]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx[i] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]] + && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + + } + for (i = 0; i < 2; i++) { + PredInter8x16Mv (iMvArray, iRefIdxArray, LIST_0, i << 2, iRefIdx[i], iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + UpdateP8x16MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, LIST_0, i << 2, iRefIdx[i], iMv); + } + } + break; + case MB_TYPE_8x8: + case MB_TYPE_8x8_REF0: { + int32_t iRefIdx[4] = {0}, iSubPartCount[4], iPartWidth[4]; + uint32_t uiSubMbType; + + if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) { + iRefCount[0] = + iRefCount[1] = 1; + } + + //uiSubMbType, partition + for (i = 0; i < 4; i++) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //sub_mb_type[ mbPartIdx ] + uiSubMbType = uiCode; + if (uiSubMbType >= 4) { //invalid uiSubMbType + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE); + } + pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iType; + iSubPartCount[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartCount; + iPartWidth[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartWidth; + + // Need modification when B picture add in, reference to 7.3.5 + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0); + } + + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (i = 0; i < 4; i++) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ] + iMotionPredFlag[i] = uiCode; + } + } + + //iRefIdxArray + if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) { + memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, 16); + } else { + for (i = 0; i < 4; i++) { + int16_t iIndex8 = i << 2; + uint8_t uiScan4Idx = g_kuiScan4[iIndex8]; + + if (iMotionPredFlag[i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[0], &uiCode)); //ref_idx_l0[ mbPartIdx ] + iRefIdx[i] = uiCode; + if ((iRefIdx[i] < 0) || (iRefIdx[i] >= iRefCount[0]) || (ppRefPic[iRefIdx[i]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx[i] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]] + && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending)); + + pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 1] = + pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 5] = + iRefIdx[i]; + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + + //gain mv and update mv cache + for (i = 0; i < 4; i++) { + int8_t iPartCount = iSubPartCount[i]; + uint32_t uiSubMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + int16_t iMv[2], iPartIdx, iBlockWidth = iPartWidth[i], iIdx = i << 2; + uint8_t uiScan4Idx, uiCacheIdx; + + uint8_t uiIdx4Cache = g_kuiCache30ScanIdx[iIdx]; + + iRefIdxArray[0][uiIdx4Cache ] = iRefIdxArray[0][uiIdx4Cache + 1] = + iRefIdxArray[0][uiIdx4Cache + 6] = iRefIdxArray[0][uiIdx4Cache + 7] = iRefIdx[i]; + + for (j = 0; j < iPartCount; j++) { + iPartIdx = iIdx + j * iBlockWidth; + uiScan4Idx = g_kuiScan4[iPartIdx]; + uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + PredMv (iMvArray, iRefIdxArray, LIST_0, iPartIdx, iBlockWidth, iRefIdx[i], iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ subMbPartIdx ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ subMbPartIdx ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + if (SUB_MB_TYPE_8x8 == uiSubMbType) { + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 5], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx + 7], LD32 (iMv)); + } else if (SUB_MB_TYPE_8x4 == uiSubMbType) { + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv)); + } else if (SUB_MB_TYPE_4x8 == uiSubMbType) { + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv)); + } else { //SUB_MB_TYPE_4x4 == uiSubMbType + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); + ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); + } + } + } + } + break; + default: + break; + } + + return ERR_NONE; +} +int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], + int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs) { + PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPic[2]; + ppRefPic[LIST_0] = pCtx->sRefPic.pRefList[LIST_0]; + ppRefPic[LIST_1] = pCtx->sRefPic.pRefList[LIST_1]; + int8_t ref_idx_list[LIST_A][4]; + int8_t iRef[2] = { 0, 0 }; + int32_t iRefCount[2]; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + uint8_t iMotionPredFlag[LIST_A][4]; + int16_t iMv[2]; + uint32_t uiCode; + int32_t iCode; + int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv; + int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv; + memset (ref_idx_list, -1, LIST_A * 4); + memset (iMotionPredFlag, (pSlice->sSliceHeaderExt.bDefaultMotionPredFlag ? 1 : 0), LIST_A * 4); + iRefCount[0] = pSliceHeader->uiRefCount[0]; + iRefCount[1] = pSliceHeader->uiRefCount[1]; + + bool bIsPending = GetThreadCount (pCtx) > 1; + + MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy]; + if (IS_DIRECT (mbType)) { + + int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + SubMbType subMbType; + if (pSliceHeader->iDirectSpatialMvPredFlag) { + //predict direct spatial mv + int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } else { + //temporal direct 16x16 mode + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } + } else if (IS_INTER_16x16 (mbType)) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (mbType, 0, listIdx)) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ] + iMotionPredFlag[listIdx][0] = uiCode; + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (mbType, 0, listIdx)) { + if (iMotionPredFlag[listIdx][0] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ] + ref_idx_list[listIdx][0] = uiCode; + // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive + // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1. + if ((ref_idx_list[listIdx][0] < 0) || (ref_idx_list[listIdx][0] >= iRefCount[listIdx]) + || (ppRefPic[listIdx][ref_idx_list[listIdx][0]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + ref_idx_list[listIdx][0] = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(ppRefPic[listIdx][ref_idx_list[listIdx][0]]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][ref_idx_list[listIdx][0]] + && (ppRefPic[listIdx][ref_idx_list[listIdx][0]]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (mbType, 0, listIdx)) { + PredMv (iMvArray, iRefIdxArray, listIdx, 0, 4, ref_idx_list[listIdx][0], iMv); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref_idx_list[listIdx][0], iMv); + } + } else if (IS_INTER_16x8 (mbType)) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ] + iMotionPredFlag[listIdx][i] = uiCode; + } + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + if (iMotionPredFlag[listIdx][i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ] + int32_t iRefIdx = uiCode; + // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive + // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1. + if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(ppRefPic[listIdx][iRefIdx]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + ref_idx_list[listIdx][i] = iRefIdx; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx] + && (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + } + // Read mvd_L0 then mvd_L1 + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + // Partitions + for (int32_t i = 0; i < 2; i++) { + int iPartIdx = i << 3; + int32_t iRefIdx = ref_idx_list[listIdx][i]; + if (IS_DIR (mbType, i, listIdx)) { + PredInter16x8Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l{0,1}[ mbPartIdx ][ listIdx ][x] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l{0,1}[ mbPartIdx ][ listIdx ][y] + iMv[1] += iCode; + + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + UpdateP16x8MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + } + } + } else if (IS_INTER_8x16 (mbType)) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ] + iMotionPredFlag[listIdx][i] = uiCode; + } + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + if (iMotionPredFlag[listIdx][i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ] + int32_t iRefIdx = uiCode; + // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive + // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1. + if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(ppRefPic[listIdx][iRefIdx]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + ref_idx_list[listIdx][i] = iRefIdx; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx] + && (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; i++) { + int iPartIdx = i << 2; + int32_t iRefIdx = ref_idx_list[listIdx][i]; + if (IS_DIR (mbType, i, listIdx)) { + PredInter8x16Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + UpdateP8x16MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + } + } + } else if (IS_Inter_8x8 (mbType)) { + int8_t pSubPartCount[4], pPartW[4]; + uint32_t uiSubMbType; + //sub_mb_type, partition + int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + if (pCtx->sRefPic.pRefList[LIST_1][0] == NULL) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef; + const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]); + bool has_direct_called = false; + SubMbType directSubMbType = 0; + + //uiSubMbType, partition + for (int32_t i = 0; i < 4; i++) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //sub_mb_type[ mbPartIdx ] + uiSubMbType = uiCode; + if (uiSubMbType >= 13) { //invalid uiSubMbType + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE); + } + pSubPartCount[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartCount; + pPartW[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartWidth; + + // Need modification when B picture add in, reference to 7.3.5 + if (pSubPartCount[i] > 1) + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = false; + + if (IS_DIRECT (g_ksInterBSubMbTypeInfo[uiSubMbType].iType)) { + if (!has_direct_called) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, directSubMbType); + if (ret != ERR_NONE) { + return ret; + } + + } else { + //temporal direct mode + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, directSubMbType); + if (ret != ERR_NONE) { + return ret; + } + } + has_direct_called = true; + } + pCurDqLayer->pSubMbType[iMbXy][i] = directSubMbType; + if (IS_SUB_4x4 (pCurDqLayer->pSubMbType[iMbXy][i])) { + pSubPartCount[i] = 4; + pPartW[i] = 1; + } + } else { + pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType; + } + } + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + bool is_dir = IS_DIR (pCurDqLayer->pSubMbType[iMbXy][i], 0, listIdx) > 0; + if (is_dir) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ] + iMotionPredFlag[listIdx][i] = uiCode; + } + } + } + } + for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv + int16_t iIdx8 = i << 2; + if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, bIsLongRef, pMvDirect, iRef, + iMvArray, NULL); + } else { + int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0]; + iRef[LIST_1] = 0; + iRef[LIST_0] = 0; + const uint8_t uiColoc4Idx = g_kuiScan4[iIdx8]; + if (!pCurDqLayer->iColocIntra[uiColoc4Idx]) { + iRef[LIST_0] = 0; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][uiColoc4Idx]; + if (colocRefIndexL0 >= 0) { + iRef[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); + } else { + mvColoc = pCurDqLayer->iColocMv[LIST_1]; + } + } + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_0, iRef[LIST_0]); + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_1, iRef[LIST_1]); + FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, iRef, mvColoc, iMvArray, + NULL); + } + } + } + //ref no-direct + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + int16_t iIdx8 = i << 2; + int32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + int8_t iref = REF_NOT_IN_LIST; + if (IS_DIRECT (subMbType)) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iRef[listIdx]); + ref_idx_list[listIdx][i] = iRef[listIdx]; + } + } else { + if (IS_DIR (subMbType, 0, listIdx)) { + if (iMotionPredFlag[listIdx][i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //ref_idx_l0[ mbPartIdx ] + iref = uiCode; + if ((iref < 0) || (iref >= iRefCount[listIdx]) || (ppRefPic[listIdx][iref] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iref = 0; + pCtx->iErrorCode |= dsBitstreamError; + RETURN_ERR_IF_NULL(ppRefPic[listIdx][iref]); + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iref] + && (ppRefPic[listIdx][iref]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iref); + ref_idx_list[listIdx][i] = iref; + } + } + } + //mv + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + int8_t iPartCount = pSubPartCount[i]; + int16_t iPartIdx, iBlockW = pPartW[i]; + uint8_t uiScan4Idx, uiCacheIdx; + + uiCacheIdx = g_kuiCache30ScanIdx[i << 2]; + + int8_t iref = ref_idx_list[listIdx][i]; + iRefIdxArray[listIdx][uiCacheIdx] = iRefIdxArray[listIdx][uiCacheIdx + 1] = + iRefIdxArray[listIdx][uiCacheIdx + 6] = iRefIdxArray[listIdx][uiCacheIdx + 7] = iref; + + uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + if (IS_DIRECT (subMbType)) { + continue; + } + bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0; + for (int32_t j = 0; j < iPartCount; j++) { + iPartIdx = (i << 2) + j * iBlockW; + uiScan4Idx = g_kuiScan4[iPartIdx]; + uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + if (is_dir) { + PredMv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iBlockW, iref, iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ subMbPartIdx ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ subMbPartIdx ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8 + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 5], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 7], LD32 (iMv)); + } else if (IS_SUB_8x4 (subMbType)) { + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv)); + } else if (IS_SUB_4x8 (subMbType)) { + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv)); + } else { //SUB_MB_TYPE_4x4 == uiSubMbType + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + } + } + } + } + } + return ERR_NONE; +} +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp new file mode 100644 index 000000000..475df0ac0 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp @@ -0,0 +1,244 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file pic_queue.c + * + * \brief Recycled piture queue implementation + * + * \date 03/13/2009 Created + * + ************************************************************************************* + */ +#include "pic_queue.h" +#include "decoder_context.h" +#include "codec_def.h" +#include "memory_align.h" + +namespace WelsDec { + +void FreePicture (PPicture pPic, CMemoryAlign* pMa); + + +///////////////////////////////////Recycled queue management for pictures/////////////////////////////////// +/* ______________________________________ + -->| P0 | P1 | P2 | P3 | P4 | .. | Pn-1 |--> + -------------------------------------- + * + * How does it work? + * node <- next; ++ next; + * +*/ + + + +PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight) { + PPicture pPic = NULL; + int32_t iPicWidth = 0; + int32_t iPicHeight = 0; + + int32_t iPicChromaWidth = 0; + int32_t iPicChromaHeight = 0; + int32_t iLumaSize = 0; + int32_t iChromaSize = 0; + CMemoryAlign* pMa = pCtx->pMemAlign; + + pPic = (PPicture) pMa->WelsMallocz (sizeof (SPicture), "PPicture"); + WELS_VERIFY_RETURN_IF (NULL, NULL == pPic); + + memset (pPic, 0, sizeof (SPicture)); + + iPicWidth = WELS_ALIGN (kiPicWidth + (PADDING_LENGTH << 1), PICTURE_RESOLUTION_ALIGNMENT); + iPicHeight = WELS_ALIGN (kiPicHeight + (PADDING_LENGTH << 1), PICTURE_RESOLUTION_ALIGNMENT); + iPicChromaWidth = iPicWidth >> 1; + iPicChromaHeight = iPicHeight >> 1; + + iLumaSize = iPicWidth * iPicHeight; + iChromaSize = iPicChromaWidth * iPicChromaHeight; + + if (pCtx->pParam->bParseOnly) { + pPic->pBuffer[0] = pPic->pBuffer[1] = pPic->pBuffer[2] = NULL; + pPic->pData[0] = pPic->pData[1] = pPic->pData[2] = NULL; + pPic->iLinesize[0] = iPicWidth; + pPic->iLinesize[1] = pPic->iLinesize[2] = iPicChromaWidth; + } else { + pPic->pBuffer[0] = static_cast (pMa->WelsMallocz (iLumaSize /* luma */ + + (iChromaSize << 1) /* Cb,Cr */, "_pic->buffer[0]")); + WELS_VERIFY_RETURN_PROC_IF (NULL, NULL == pPic->pBuffer[0], FreePicture (pPic, pMa)); + + memset (pPic->pBuffer[0], 128, (iLumaSize + (iChromaSize << 1))); + pPic->iLinesize[0] = iPicWidth; + pPic->iLinesize[1] = pPic->iLinesize[2] = iPicChromaWidth; + pPic->pBuffer[1] = pPic->pBuffer[0] + iLumaSize; + pPic->pBuffer[2] = pPic->pBuffer[1] + iChromaSize; + pPic->pData[0] = pPic->pBuffer[0] + (1 + pPic->iLinesize[0]) * PADDING_LENGTH; + pPic->pData[1] = pPic->pBuffer[1] + /*WELS_ALIGN*/ (((1 + pPic->iLinesize[1]) * PADDING_LENGTH) >> 1); + pPic->pData[2] = pPic->pBuffer[2] + /*WELS_ALIGN*/ (((1 + pPic->iLinesize[2]) * PADDING_LENGTH) >> 1); + } + pPic->iPlanes = 3; // yv12 in default + pPic->iWidthInPixel = kiPicWidth; + pPic->iHeightInPixel = kiPicHeight; + pPic->iFrameNum = -1; + pPic->iRefCount = 0; + + uint32_t uiMbWidth = (kiPicWidth + 15) >> 4; + uint32_t uiMbHeight = (kiPicHeight + 15) >> 4; + uint32_t uiMbCount = uiMbWidth * uiMbHeight; + + pPic->pMbCorrectlyDecodedFlag = (bool*)pMa->WelsMallocz (uiMbCount * sizeof (bool), "pPic->pMbCorrectlyDecodedFlag"); + pPic->pNzc = GetThreadCount (pCtx) > 1 ? (int8_t (*)[24])pMa->WelsMallocz (uiMbCount * 24, "pPic->pNzc") : NULL; + pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), "pPic->pMbType"); + pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof ( + int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]"); + pPic->pMv[LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof ( + int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]"); + pPic->pRefIndex[LIST_0] = (int8_t (*)[16])pMa->WelsMallocz (uiMbCount * sizeof ( + int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]"); + pPic->pRefIndex[LIST_1] = (int8_t (*)[16])pMa->WelsMallocz (uiMbCount * sizeof ( + int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]"); + if (pCtx->pThreadCtx != NULL) { + pPic->pReadyEvent = (SWelsDecEvent*)pMa->WelsMallocz (uiMbHeight * sizeof (SWelsDecEvent), "pPic->pReadyEvent"); + for (uint32_t i = 0; i < uiMbHeight; ++i) { + CREATE_EVENT (&pPic->pReadyEvent[i], 1, 0, NULL); + } + } else { + pPic->pReadyEvent = NULL; + } + + return pPic; +} + +void FreePicture (PPicture pPic, CMemoryAlign* pMa) { + if (NULL != pPic) { + if (pPic->pBuffer[0]) { + pMa->WelsFree (pPic->pBuffer[0], "pPic->pBuffer[0]"); + pPic->pBuffer[0] = NULL; + } + + if (pPic->pMbCorrectlyDecodedFlag) { + pMa->WelsFree (pPic->pMbCorrectlyDecodedFlag, "pPic->pMbCorrectlyDecodedFlag"); + pPic->pMbCorrectlyDecodedFlag = NULL; + } + + if (pPic->pNzc) { + pMa->WelsFree (pPic->pNzc, "pPic->pNzc"); + pPic->pNzc = NULL; + } + + if (pPic->pMbType) { + pMa->WelsFree (pPic->pMbType, "pPic->pMbType"); + pPic->pMbType = NULL; + } + + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (pPic->pMv[listIdx]) { + pMa->WelsFree (pPic->pMv[listIdx], "pPic->pMv[]"); + pPic->pMv[listIdx] = NULL; + } + + if (pPic->pRefIndex[listIdx]) { + pMa->WelsFree (pPic->pRefIndex[listIdx], "pPic->pRefIndex[]"); + pPic->pRefIndex[listIdx] = NULL; + } + } + if (pPic->pReadyEvent != NULL) { + uint32_t uiMbHeight = (pPic->iHeightInPixel + 15) >> 4; + for (uint32_t i = 0; i < uiMbHeight; ++i) { + CLOSE_EVENT (&pPic->pReadyEvent[i]); + } + pMa->WelsFree (pPic->pReadyEvent, "pPic->pReadyEvent"); + pPic->pReadyEvent = NULL; + } + pMa->WelsFree (pPic, "pPic"); + pPic = NULL; + } +} +PPicture PrefetchPic (PPicBuff pPicBuf) { + int32_t iPicIdx = 0; + PPicture pPic = NULL; + + if (pPicBuf->iCapacity == 0) { + return NULL; + } + + for (iPicIdx = pPicBuf->iCurrentIdx + 1; iPicIdx < pPicBuf->iCapacity ; ++iPicIdx) { + if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef + && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) { + pPic = pPicBuf->ppPic[iPicIdx]; + break; + } + } + if (pPic != NULL) { + pPicBuf->iCurrentIdx = iPicIdx; + pPic->iPicBuffIdx = iPicIdx; + return pPic; + } + for (iPicIdx = 0 ; iPicIdx <= pPicBuf->iCurrentIdx ; ++iPicIdx) { + if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef + && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) { + pPic = pPicBuf->ppPic[iPicIdx]; + break; + } + } + + pPicBuf->iCurrentIdx = iPicIdx; + if (pPic != NULL) { + pPic->iPicBuffIdx = iPicIdx; + } + return pPic; +} + +PPicture PrefetchPicForThread (PPicBuff pPicBuf) { + PPicture pPic = NULL; + + if (pPicBuf->iCapacity == 0) { + return NULL; + } + pPic = pPicBuf->ppPic[pPicBuf->iCurrentIdx]; + pPic->iPicBuffIdx = pPicBuf->iCurrentIdx; + if (++pPicBuf->iCurrentIdx >= pPicBuf->iCapacity) { + pPicBuf->iCurrentIdx = 0; + } + return pPic; +} + +PPicture PrefetchLastPicForThread (PPicBuff pPicBuf, const int32_t& iLastPicBuffIdx) { + PPicture pPic = NULL; + + if (pPicBuf->iCapacity == 0) { + return NULL; + } + if (iLastPicBuffIdx >= 0 && iLastPicBuffIdx < pPicBuf->iCapacity) { + pPic = pPicBuf->ppPic[iLastPicBuffIdx]; + } + return pPic; +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp new file mode 100644 index 000000000..9034cc4d7 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp @@ -0,0 +1,1078 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file rec_mb.c + * + * \brief implementation for all macroblock decoding process after mb syntax parsing and residual decoding with cavlc. + * + * \date 3/18/2009 Created + * + ************************************************************************************* + */ + + +#include "rec_mb.h" +#include "decode_slice.h" + +namespace WelsDec { + +void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer) { + PPicture pCurPic = pCtx->pDec; + int32_t iLumaStride = pCurPic->iLinesize[0]; + int32_t iChromaStride = pCurPic->iLinesize[1]; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + + pCurDqLayer->iLumaStride = iLumaStride; + pCurDqLayer->iChromaStride = iChromaStride; + + if (bOutput) { + pCurDqLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pCurDqLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pCurDqLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + } +} + +int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) { + RecI8x8Luma (iMbXy, pCtx, pScoeffLevel, pDqLayer); + RecI4x4Chroma (iMbXy, pCtx, pScoeffLevel, pDqLayer); + return ERR_NONE; +} + +int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) { + /*****get local variable from outer variable********/ + /*prediction info*/ + uint8_t* pPred = pDqLayer->pPred[0]; + + int32_t iLumaStride = pDqLayer->iLumaStride; + int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray; + PGetIntraPred8x8Func* pGetI8x8LumaPredFunc = pCtx->pGetI8x8LumaPredFunc; + + int8_t* pIntra8x8PredMode = pDqLayer->pIntra4x4FinalMode[iMbXy]; // I_NxN + int16_t* pRS = pScoeffLevel; + /*itransform info*/ + PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc8x8; + + /*************local variable********************/ + uint8_t i = 0; + bool bTLAvail[4], bTRAvail[4]; + // Top-Right : Left : Top-Left : Top + bTLAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x02); + bTLAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01); + bTLAvail[2] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x04); + bTLAvail[3] = true; + + bTRAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01); + bTRAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x08); + bTRAvail[2] = true; + bTRAvail[3] = false; + + /*************real process*********************/ + for (i = 0; i < 4; i++) { + + uint8_t* pPredI8x8 = pPred + pBlockOffset[i << 2]; + uint8_t uiMode = pIntra8x8PredMode[g_kuiScan4[i << 2]]; + + pGetI8x8LumaPredFunc[uiMode] (pPredI8x8, iLumaStride, bTLAvail[i], bTRAvail[i]); + + int32_t iIndex = g_kuiMbCountScan4Idx[i << 2]; + if (pDqLayer->pNzc[iMbXy][iIndex] || pDqLayer->pNzc[iMbXy][iIndex + 1] || pDqLayer->pNzc[iMbXy][iIndex + 4] + || pDqLayer->pNzc[iMbXy][iIndex + 5]) { + int16_t* pRSI8x8 = &pRS[i << 6]; + pIdctResAddPredFunc (pPredI8x8, iLumaStride, pRSI8x8); + } + } + + return ERR_NONE; +} + +int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) { + RecI4x4Luma (iMBXY, pCtx, pScoeffLevel, pDqLayer); + RecI4x4Chroma (iMBXY, pCtx, pScoeffLevel, pDqLayer); + return ERR_NONE; +} + + +int32_t RecI4x4Luma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) { + /*****get local variable from outer variable********/ + /*prediction info*/ + uint8_t* pPred = pDqLayer->pPred[0]; + + int32_t iLumaStride = pDqLayer->iLumaStride; + int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray; + PGetIntraPredFunc* pGetI4x4LumaPredFunc = pCtx->pGetI4x4LumaPredFunc; + + int8_t* pIntra4x4PredMode = pDqLayer->pIntra4x4FinalMode[iMBXY]; + int16_t* pRS = pScoeffLevel; + /*itransform info*/ + PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc; + + + /*************local variable********************/ + uint8_t i = 0; + + /*************real process*********************/ + for (i = 0; i < 16; i++) { + + uint8_t* pPredI4x4 = pPred + pBlockOffset[i]; + uint8_t uiMode = pIntra4x4PredMode[g_kuiScan4[i]]; + + pGetI4x4LumaPredFunc[uiMode] (pPredI4x4, iLumaStride); + + if (pDqLayer->pNzc[iMBXY][g_kuiMbCountScan4Idx[i]]) { + int16_t* pRSI4x4 = &pRS[i << 4]; + pIdctResAddPredFunc (pPredI4x4, iLumaStride, pRSI4x4); + } + } + + return ERR_NONE; +} + + +int32_t RecI4x4Chroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) { + int32_t iChromaStride = pCtx->pCurDqLayer->pDec->iLinesize[1]; + + int8_t iChromaPredMode = pDqLayer->pChromaPredMode[iMBXY]; + + PGetIntraPredFunc* pGetIChromaPredFunc = pCtx->pGetIChromaPredFunc; + + uint8_t* pPred = pDqLayer->pPred[1]; + + pGetIChromaPredFunc[iChromaPredMode] (pPred, iChromaStride); + pPred = pDqLayer->pPred[2]; + pGetIChromaPredFunc[iChromaPredMode] (pPred, iChromaStride); + + RecChroma (iMBXY, pCtx, pScoeffLevel, pDqLayer); + + return ERR_NONE; +} + + +int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) { + /*decoder use, encoder no use*/ + int8_t iI16x16PredMode = pDqLayer->pIntraPredMode[iMBXY][7]; + int8_t iChromaPredMode = pDqLayer->pChromaPredMode[iMBXY]; + PGetIntraPredFunc* pGetIChromaPredFunc = pCtx->pGetIChromaPredFunc; + PGetIntraPredFunc* pGetI16x16LumaPredFunc = pCtx->pGetI16x16LumaPredFunc; + int32_t iUVStride = pCtx->pCurDqLayer->pDec->iLinesize[1]; + + /*common use by decoder&encoder*/ + int32_t iYStride = pDqLayer->iLumaStride; + int16_t* pRS = pScoeffLevel; + + uint8_t* pPred = pDqLayer->pPred[0]; + + PIdctFourResAddPredFunc pIdctFourResAddPredFunc = pCtx->pIdctFourResAddPredFunc; + + /*decode i16x16 y*/ + pGetI16x16LumaPredFunc[iI16x16PredMode] (pPred, iYStride); + + /*1 mb is divided 16 4x4_block to idct*/ + const int8_t* pNzc = pDqLayer->pNzc[iMBXY]; + pIdctFourResAddPredFunc (pPred + 0 * iYStride + 0, iYStride, pRS + 0 * 64, pNzc + 0); + pIdctFourResAddPredFunc (pPred + 0 * iYStride + 8, iYStride, pRS + 1 * 64, pNzc + 2); + pIdctFourResAddPredFunc (pPred + 8 * iYStride + 0, iYStride, pRS + 2 * 64, pNzc + 8); + pIdctFourResAddPredFunc (pPred + 8 * iYStride + 8, iYStride, pRS + 3 * 64, pNzc + 10); + + /*decode intra mb cb&cr*/ + pPred = pDqLayer->pPred[1]; + pGetIChromaPredFunc[iChromaPredMode] (pPred, iUVStride); + pPred = pDqLayer->pPred[2]; + pGetIChromaPredFunc[iChromaPredMode] (pPred, iUVStride); + RecChroma (iMBXY, pCtx, pScoeffLevel, pDqLayer); + + return ERR_NONE; +} + + +//according to current 8*8 block ref_index to gain reference picture +static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, const int8_t& iRefIdx, + int32_t listIdx) { + PPicture pRefPic; + + if (iRefIdx >= 0) { + pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx]; + + if (pRefPic != NULL) { + pMCRefMem->iSrcLineLuma = pRefPic->iLinesize[0]; + pMCRefMem->iSrcLineChroma = pRefPic->iLinesize[1]; + + pMCRefMem->pSrcY = pRefPic->pData[0]; + pMCRefMem->pSrcU = pRefPic->pData[1]; + pMCRefMem->pSrcV = pRefPic->pData[2]; + if (!pMCRefMem->pSrcY || !pMCRefMem->pSrcU || !pMCRefMem->pSrcV) { + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + return ERR_NONE; + } + } + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); +} + + +#ifndef MC_FLOW_SIMPLE_JUDGE +#define MC_FLOW_SIMPLE_JUDGE 1 +#endif //MC_FLOW_SIMPLE_JUDGE +void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx, + int32_t iXOffset, int32_t iYOffset, + SMcFunc* pMCFunc, + int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]) { + int32_t iFullMVx = (iXOffset << 2) + iMVs[0]; //quarter pixel + int32_t iFullMVy = (iYOffset << 2) + iMVs[1]; + iFullMVx = WELS_CLIP3 (iFullMVx, ((-PADDING_LENGTH + 2) * (1 << 2)), + ((pMCRefMem->iPicWidth + PADDING_LENGTH - 19) * (1 << 2))); + iFullMVy = WELS_CLIP3 (iFullMVy, ((-PADDING_LENGTH + 2) * (1 << 2)), + ((pMCRefMem->iPicHeight + PADDING_LENGTH - 19) * (1 << 2))); + + if (GetThreadCount (pCtx) > 1 && iRefIdx >= 0) { + // wait for the lines of reference macroblock (3 + 16). + PPicture pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx]; + if (pCtx->bNewSeqBegin && (pCtx->iErrorCode & dsRefLost)) { + //set event if refpic is lost to prevent from infinite waiting. + if (!pRefPic->pReadyEvent[0].isSignaled) { + for (uint32_t ln = 0; ln < pCtx->sMb.iMbHeight; ++ln) { + SET_EVENT (&pRefPic->pReadyEvent[ln]); + } + } + } + int32_t offset = (iFullMVy >> 2) + iBlkHeight + 3 + 16; + if (offset > pCtx->lastReadyHeightOffset[listIdx][iRefIdx]) { + const int32_t down_line = WELS_MIN (offset >> 4, int32_t (pCtx->sMb.iMbHeight) - 1); + if (pRefPic->pReadyEvent[down_line].isSignaled != 1) { + WAIT_EVENT (&pRefPic->pReadyEvent[down_line], WELS_DEC_THREAD_WAIT_INFINITE); + } + pCtx->lastReadyHeightOffset[listIdx][iRefIdx] = offset; + } + } + + int32_t iSrcPixOffsetLuma = (iFullMVx >> 2) + (iFullMVy >> 2) * pMCRefMem->iSrcLineLuma; + int32_t iSrcPixOffsetChroma = (iFullMVx >> 3) + (iFullMVy >> 3) * pMCRefMem->iSrcLineChroma; + + int32_t iBlkWidthChroma = iBlkWidth >> 1; + int32_t iBlkHeightChroma = iBlkHeight >> 1; + + uint8_t* pSrcY = pMCRefMem->pSrcY + iSrcPixOffsetLuma; + uint8_t* pSrcU = pMCRefMem->pSrcU + iSrcPixOffsetChroma; + uint8_t* pSrcV = pMCRefMem->pSrcV + iSrcPixOffsetChroma; + uint8_t* pDstY = pMCRefMem->pDstY; + uint8_t* pDstU = pMCRefMem->pDstU; + uint8_t* pDstV = pMCRefMem->pDstV; + + pMCFunc->pMcLumaFunc (pSrcY, pMCRefMem->iSrcLineLuma, pDstY, pMCRefMem->iDstLineLuma, iFullMVx, iFullMVy, iBlkWidth, + iBlkHeight); + pMCFunc->pMcChromaFunc (pSrcU, pMCRefMem->iSrcLineChroma, pDstU, pMCRefMem->iDstLineChroma, iFullMVx, iFullMVy, + iBlkWidthChroma, iBlkHeightChroma); + pMCFunc->pMcChromaFunc (pSrcV, pMCRefMem->iSrcLineChroma, pDstV, pMCRefMem->iDstLineChroma, iFullMVx, iFullMVy, + iBlkWidthChroma, iBlkHeightChroma); + +} + +static void WeightPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, int32_t listIdx, int32_t iRefIdx, + int32_t iBlkWidth, + int32_t iBlkHeight) { + + + int32_t iLog2denom, iWoc, iOoc; + int32_t iPredTemp, iLineStride; + int32_t iPixel = 0; + uint8_t* pDst; + //luma + iLog2denom = pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom; + iWoc = pCurDqLayer->pPredWeightTable->sPredList[listIdx].iLumaWeight[iRefIdx]; + iOoc = pCurDqLayer->pPredWeightTable->sPredList[listIdx].iLumaOffset[iRefIdx]; + iLineStride = pMCRefMem->iDstLineLuma; + + for (int i = 0; i < iBlkHeight; i++) { + for (int j = 0; j < iBlkWidth; j++) { + iPixel = j + i * (iLineStride); + if (iLog2denom >= 1) { + iPredTemp = ((pMCRefMem->pDstY[iPixel] * iWoc + (1 << (iLog2denom - 1))) >> iLog2denom) + iOoc; + + pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + } else { + iPredTemp = pMCRefMem->pDstY[iPixel] * iWoc + iOoc; + + pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + + } + } + } + + + //UV + iBlkWidth = iBlkWidth >> 1; + iBlkHeight = iBlkHeight >> 1; + iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom; + iLineStride = pMCRefMem->iDstLineChroma; + + for (int i = 0; i < 2; i++) { + + + //iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom; + iWoc = pCurDqLayer->pPredWeightTable->sPredList[listIdx].iChromaWeight[iRefIdx][i]; + iOoc = pCurDqLayer->pPredWeightTable->sPredList[listIdx].iChromaOffset[iRefIdx][i]; + pDst = i ? pMCRefMem->pDstV : pMCRefMem->pDstU; + //iLineStride = pMCRefMem->iDstLineChroma; + + for (int i = 0; i < iBlkHeight ; i++) { + for (int j = 0; j < iBlkWidth; j++) { + iPixel = j + i * (iLineStride); + if (iLog2denom >= 1) { + iPredTemp = ((pDst[iPixel] * iWoc + (1 << (iLog2denom - 1))) >> iLog2denom) + iOoc; + + pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + } else { + iPredTemp = pDst[iPixel] * iWoc + iOoc; + + pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + + } + } + + } + + + } +} + +static void BiWeightPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, sMCRefMember* pTempMCRefMem, + int32_t iRefIdx1, int32_t iRefIdx2, bool bWeightedBipredIdcIs1, int32_t iBlkWidth, + int32_t iBlkHeight) { + int32_t iWoc1 = 0, iOoc1 = 0, iWoc2 = 0, iOoc2 = 0; + int32_t iPredTemp, iLineStride; + int32_t iPixel = 0; + //luma + int32_t iLog2denom = pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom; + if (bWeightedBipredIdcIs1) { + iWoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iLumaWeight[iRefIdx1]; + iOoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iLumaOffset[iRefIdx1]; + iWoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iLumaWeight[iRefIdx2]; + iOoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iLumaOffset[iRefIdx2]; + } else { + iWoc1 = pCurDqLayer->pPredWeightTable->iImplicitWeight[iRefIdx1][iRefIdx2]; + iWoc2 = 64 - iWoc1; + } + iLineStride = pMCRefMem->iDstLineLuma; + + for (int i = 0; i < iBlkHeight; i++) { + for (int j = 0; j < iBlkWidth; j++) { + iPixel = j + i * (iLineStride); + iPredTemp = ((pMCRefMem->pDstY[iPixel] * iWoc1 + pTempMCRefMem->pDstY[iPixel] * iWoc2 + (1 << iLog2denom)) >> + (iLog2denom + 1)) + ((iOoc1 + iOoc2 + 1) >> 1); + pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + } + } + + //UV + iBlkWidth = iBlkWidth >> 1; + iBlkHeight = iBlkHeight >> 1; + iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom; + iLineStride = pMCRefMem->iDstLineChroma; + + uint8_t* pDst; + uint8_t* pTempDst; + for (int k = 0; k < 2; k++) { + //iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom; + if (bWeightedBipredIdcIs1) { + iWoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iChromaWeight[iRefIdx1][k]; + iOoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iChromaOffset[iRefIdx1][k]; + iWoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iChromaWeight[iRefIdx2][k]; + iOoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iChromaOffset[iRefIdx2][k]; + } + pDst = k ? pMCRefMem->pDstV : pMCRefMem->pDstU; + pTempDst = k ? pTempMCRefMem->pDstV : pTempMCRefMem->pDstU; + //iLineStride = pMCRefMem->iDstLineChroma; + + for (int i = 0; i < iBlkHeight; i++) { + for (int j = 0; j < iBlkWidth; j++) { + iPixel = j + i * (iLineStride); + iPredTemp = ((pDst[iPixel] * iWoc1 + pTempDst[iPixel] * iWoc2 + (1 << iLog2denom)) >> (iLog2denom + 1)) + (( + iOoc1 + iOoc2 + 1) >> 1); + pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + } + } + } +} + +static void BiPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, sMCRefMember* pTempMCRefMem, int32_t iBlkWidth, + int32_t iBlkHeight) { + int32_t iPredTemp, iLineStride; + int32_t iPixel = 0; + //luma + iLineStride = pMCRefMem->iDstLineLuma; + + for (int i = 0; i < iBlkHeight; i++) { + for (int j = 0; j < iBlkWidth; j++) { + iPixel = j + i * (iLineStride); + iPredTemp = (pMCRefMem->pDstY[iPixel] + pTempMCRefMem->pDstY[iPixel] + 1) >> 1; + pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + } + } + + //UV + iBlkWidth = iBlkWidth >> 1; + iBlkHeight = iBlkHeight >> 1; + iLineStride = pMCRefMem->iDstLineChroma; + + uint8_t* pDst; + uint8_t* pTempDst; + for (int k = 0; k < 2; k++) { + pDst = k ? pMCRefMem->pDstV : pMCRefMem->pDstU; + pTempDst = k ? pTempMCRefMem->pDstV : pTempMCRefMem->pDstU; + //iLineStride = pMCRefMem->iDstLineChroma; + + for (int i = 0; i < iBlkHeight; i++) { + for (int j = 0; j < iBlkWidth; j++) { + iPixel = j + i * (iLineStride); + iPredTemp = (pDst[iPixel] + pTempDst[iPixel] + 1) >> 1; + pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255); + } + } + } +} + +int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx) { + sMCRefMember pMCRefMem; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + SMcFunc* pMCFunc = &pCtx->sMcFunc; + + int32_t iMBXY = pCurDqLayer->iMbXyIndex; + + int16_t iMVs[2] = {0}; + + uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY]; + + int32_t iMBOffsetX = pCurDqLayer->iMbX << 4; + int32_t iMBOffsetY = pCurDqLayer->iMbY << 4; + + int32_t iDstLineLuma = pCtx->pDec->iLinesize[0]; + int32_t iDstLineChroma = pCtx->pDec->iLinesize[1]; + + int32_t iBlk8X, iBlk8Y, iBlk4X, iBlk4Y, i, j, iIIdx, iJIdx; + + pMCRefMem.iPicWidth = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbWidth << 4); + pMCRefMem.iPicHeight = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbHeight << 4); + + pMCRefMem.pDstY = pPredY; + pMCRefMem.pDstU = pPredCb; + pMCRefMem.pDstV = pPredCr; + + pMCRefMem.iDstLineLuma = iDstLineLuma; + pMCRefMem.iDstLineChroma = iDstLineChroma; + + int8_t iRefIndex = 0; + + switch (iMBType) { + case MB_TYPE_SKIP: + case MB_TYPE_16x16: + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); + + if (pCurDqLayer->bUseWeightPredictionFlag) { + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 16); + } + break; + case MB_TYPE_16x8: + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs); + + if (pCurDqLayer->bUseWeightPredictionFlag) { + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8); + } + + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][8][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][8][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][8]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + pMCRefMem.pDstY = pPredY + (iDstLineLuma << 3); + pMCRefMem.pDstU = pPredCb + (iDstLineChroma << 2); + pMCRefMem.pDstV = pPredCr + (iDstLineChroma << 2); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs); + + if (pCurDqLayer->bUseWeightPredictionFlag) { + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8); + } + break; + case MB_TYPE_8x16: + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs); + if (pCurDqLayer->bUseWeightPredictionFlag) { + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16); + } + + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][2][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][2][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][2]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + pMCRefMem.pDstY = pPredY + 8; + pMCRefMem.pDstU = pPredCb + 4; + pMCRefMem.pDstV = pPredCr + 4; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs); + + if (pCurDqLayer->bUseWeightPredictionFlag) { + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16); + } + break; + case MB_TYPE_8x8: + case MB_TYPE_8x8_REF0: { + uint32_t iSubMBType; + int32_t iXOffset, iYOffset; + uint8_t* pDstY, *pDstU, *pDstV; + for (i = 0; i < 4; i++) { + iSubMBType = pCurDqLayer->pSubMbType[iMBXY][i]; + iBlk8X = (i & 1) << 3; + iBlk8Y = (i >> 1) << 3; + iXOffset = iMBOffsetX + iBlk8X; + iYOffset = iMBOffsetY + iBlk8Y; + + iIIdx = ((i >> 1) << 3) + ((i & 1) << 1); + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + pDstY = pPredY + iBlk8X + iBlk8Y * iDstLineLuma; + pDstU = pPredCb + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; + pDstV = pPredCr + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; + pMCRefMem.pDstY = pDstY; + pMCRefMem.pDstU = pDstU; + pMCRefMem.pDstV = pDstV; + switch (iSubMBType) { + case SUB_MB_TYPE_8x8: + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + if (pCurDqLayer->bUseWeightPredictionFlag) { + + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 8); + } + + break; + case SUB_MB_TYPE_8x4: + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + if (pCurDqLayer->bUseWeightPredictionFlag) { + + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4); + } + + + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][1]; + pMCRefMem.pDstY += (iDstLineLuma << 2); + pMCRefMem.pDstU += (iDstLineChroma << 1); + pMCRefMem.pDstV += (iDstLineChroma << 1); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + if (pCurDqLayer->bUseWeightPredictionFlag) { + + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4); + } + + break; + case SUB_MB_TYPE_4x8: + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + if (pCurDqLayer->bUseWeightPredictionFlag) { + + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8); + } + + + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][1]; + pMCRefMem.pDstY += 4; + pMCRefMem.pDstU += 2; + pMCRefMem.pDstV += 2; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + if (pCurDqLayer->bUseWeightPredictionFlag) { + + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8); + } + + break; + case SUB_MB_TYPE_4x4: { + for (j = 0; j < 4; j++) { + int32_t iUVLineStride; + iJIdx = ((j >> 1) << 2) + (j & 1); + + iBlk4X = (j & 1) << 2; + iBlk4Y = (j >> 1) << 2; + + iUVLineStride = (iBlk4X >> 1) + (iBlk4Y >> 1) * iDstLineChroma; + pMCRefMem.pDstY = pDstY + iBlk4X + iBlk4Y * iDstLineLuma; + pMCRefMem.pDstU = pDstU + iUVLineStride; + pMCRefMem.pDstV = pDstV + iUVLineStride; + + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + if (pCurDqLayer->bUseWeightPredictionFlag) { + + WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 4); + } + + } + } + break; + default: + break; + } + } + } + break; + default: + break; + } + return ERR_NONE; +} + +int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx) { + sMCRefMember pMCRefMem; + sMCRefMember pTempMCRefMem; + + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + SMcFunc* pMCFunc = &pCtx->sMcFunc; + + int32_t iMBXY = pCurDqLayer->iMbXyIndex; + + int16_t iMVs[2] = { 0 }; + + uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY]; + + int32_t iMBOffsetX = pCurDqLayer->iMbX << 4; + int32_t iMBOffsetY = pCurDqLayer->iMbY << 4; + + int32_t iDstLineLuma = pCtx->pDec->iLinesize[0]; + int32_t iDstLineChroma = pCtx->pDec->iLinesize[1]; + + + pMCRefMem.iPicWidth = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbWidth << 4); + pMCRefMem.iPicHeight = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbHeight << 4); + + pMCRefMem.pDstY = pPredYCbCr[0]; + pMCRefMem.pDstU = pPredYCbCr[1]; + pMCRefMem.pDstV = pPredYCbCr[2]; + + pMCRefMem.iDstLineLuma = iDstLineLuma; + pMCRefMem.iDstLineChroma = iDstLineChroma; + + pTempMCRefMem = pMCRefMem; + pTempMCRefMem.pDstY = pTempPredYCbCr[0]; + pTempMCRefMem.pDstU = pTempPredYCbCr[1]; + pTempMCRefMem.pDstV = pTempPredYCbCr[2]; + + + int8_t iRefIndex0 = 0; + int8_t iRefIndex1 = 0; + int8_t iRefIndex = 0; + + bool bWeightedBipredIdcIs1 = pCurDqLayer->sLayerInfo.pPps->uiWeightedBipredIdc == 1; + + if (IS_INTER_16x16 (iMBType)) { + if (IS_TYPE_L0 (iMBType) && IS_TYPE_L1 (iMBType)) { + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][1]; + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); + + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); + if (pCurDqLayer->bUseWeightedBiPredIdc) { + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 16); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 16); + } + } else { + int32_t listIdx = (iMBType & MB_TYPE_P0L0) ? LIST_0 : LIST_1; + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); + if (bWeightedBipredIdcIs1) { + WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 16, 16); + } + } + } else if (IS_INTER_16x8 (iMBType)) { + for (int32_t i = 0; i < 2; ++i) { + int32_t iPartIdx = i << 3; + uint32_t listCount = 0; + int32_t lastListIdx = LIST_0; + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (iMBType, i, listIdx)) { + lastListIdx = listIdx; + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iPartIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); + if (i) { + pMCRefMem.pDstY += (iDstLineLuma << 3); + pMCRefMem.pDstU += (iDstLineChroma << 2); + pMCRefMem.pDstV += (iDstLineChroma << 2); + } + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs); + if (++listCount == 2) { + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); + if (i) { + pTempMCRefMem.pDstY += (iDstLineLuma << 3); + pTempMCRefMem.pDstU += (iDstLineChroma << 2); + pTempMCRefMem.pDstV += (iDstLineChroma << 2); + } + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs); + if (pCurDqLayer->bUseWeightedBiPredIdc) { + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iPartIdx]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx]; + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 8); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 8); + } + } + } + } + if (listCount == 1) { + if (bWeightedBipredIdcIs1) { + iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][iPartIdx]; + WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 16, 8); + } + } + } + } else if (IS_INTER_8x16 (iMBType)) { + for (int32_t i = 0; i < 2; ++i) { + uint32_t listCount = 0; + int32_t lastListIdx = LIST_0; + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (iMBType, i, listIdx)) { + lastListIdx = listIdx; + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][i << 1]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); + if (i) { + pMCRefMem.pDstY += 8; + pMCRefMem.pDstU += 4; + pMCRefMem.pDstV += 4; + } + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs); + if (++listCount == 2) { + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); + if (i) { + pTempMCRefMem.pDstY += 8; + pTempMCRefMem.pDstU += 4; + pTempMCRefMem.pDstV += 4; + } + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs); + if (pCurDqLayer->bUseWeightedBiPredIdc) { + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][i << 1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1]; + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 16); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 16); + } + } + } + } + if (listCount == 1) { + if (bWeightedBipredIdcIs1) { + iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][i << 1]; + WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 8, 16); + } + } + } + } else if (IS_Inter_8x8 (iMBType)) { + int32_t iBlk8X, iBlk8Y, iBlk4X, iBlk4Y, iIIdx, iJIdx; + uint32_t iSubMBType; + int32_t iXOffset, iYOffset; + uint8_t* pDstY, *pDstU, *pDstV; + uint8_t* pDstY2, *pDstU2, *pDstV2; + for (int32_t i = 0; i < 4; i++) { + iSubMBType = pCurDqLayer->pSubMbType[iMBXY][i]; + iBlk8X = (i & 1) << 3; + iBlk8Y = (i >> 1) << 3; + iXOffset = iMBOffsetX + iBlk8X; + iYOffset = iMBOffsetY + iBlk8Y; + + iIIdx = ((i >> 1) << 3) + ((i & 1) << 1); + + pDstY = pPredYCbCr[0] + iBlk8X + iBlk8Y * iDstLineLuma; + pDstU = pPredYCbCr[1] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; + pDstV = pPredYCbCr[2] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; + pMCRefMem.pDstY = pDstY; + pMCRefMem.pDstU = pDstU; + pMCRefMem.pDstV = pDstV; + + pTempMCRefMem = pMCRefMem; + pDstY2 = pTempPredYCbCr[0] + iBlk8X + iBlk8Y * iDstLineLuma; + pDstU2 = pTempPredYCbCr[1] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; + pDstV2 = pTempPredYCbCr[2] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; + + pTempMCRefMem.pDstY = pDstY2; + pTempMCRefMem.pDstU = pDstU2; + pTempMCRefMem.pDstV = pDstV2; + + if ((IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType))) { + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0)); + + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); + } else { + int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); + } + + if (IS_SUB_8x8 (iSubMBType)) { + if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + + if (pCurDqLayer->bUseWeightedBiPredIdc) { + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 8); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 8); + } + } else { + int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + if (bWeightedBipredIdcIs1) { + WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 8); + } + } + } else if (IS_SUB_8x4 (iSubMBType)) { + if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_8x4 + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + + if (pCurDqLayer->bUseWeightedBiPredIdc) { + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4); + } + + pMCRefMem.pDstY += (iDstLineLuma << 2); + pMCRefMem.pDstU += (iDstLineChroma << 1); + pMCRefMem.pDstV += (iDstLineChroma << 1); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + + pTempMCRefMem.pDstY += (iDstLineLuma << 2); + pTempMCRefMem.pDstU += (iDstLineChroma << 1); + pTempMCRefMem.pDstV += (iDstLineChroma << 1); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + + if (pCurDqLayer->bUseWeightedBiPredIdc) { + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4); + } + } else { //B_L0_8x4 B_L1_8x4 + int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + pMCRefMem.pDstY += (iDstLineLuma << 2); + pMCRefMem.pDstU += (iDstLineChroma << 1); + pMCRefMem.pDstV += (iDstLineChroma << 1); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][1]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + if (bWeightedBipredIdcIs1) { + WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 4); + } + } + } else if (IS_SUB_4x8 (iSubMBType)) { + if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_4x8 + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + + if (pCurDqLayer->bUseWeightedBiPredIdc) { + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8); + } + + pMCRefMem.pDstY += 4; + pMCRefMem.pDstU += 2; + pMCRefMem.pDstV += 2; + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + + pTempMCRefMem.pDstY += 4; + pTempMCRefMem.pDstU += 2; + pTempMCRefMem.pDstV += 2; + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + + if (pCurDqLayer->bUseWeightedBiPredIdc) { + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8); + } + } else { //B_L0_4x8 B_L1_4x8 + int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + pMCRefMem.pDstY += 4; + pMCRefMem.pDstU += 2; + pMCRefMem.pDstV += 2; + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][1]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + if (bWeightedBipredIdcIs1) { + WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 8); + } + } + } else if (IS_SUB_4x4 (iSubMBType)) { + if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { + for (int32_t j = 0; j < 4; j++) { + int32_t iUVLineStride; + iJIdx = ((j >> 1) << 2) + (j & 1); + + iBlk4X = (j & 1) << 2; + iBlk4Y = (j >> 1) << 2; + + iUVLineStride = (iBlk4X >> 1) + (iBlk4Y >> 1) * iDstLineChroma; + pMCRefMem.pDstY = pDstY + iBlk4X + iBlk4Y * iDstLineLuma; + pMCRefMem.pDstU = pDstU + iUVLineStride; + pMCRefMem.pDstV = pDstV + iUVLineStride; + + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + + pTempMCRefMem.pDstY = pDstY2 + iBlk8X + iBlk8Y * iDstLineLuma; + pTempMCRefMem.pDstU = pDstU2 + iUVLineStride; + pTempMCRefMem.pDstV = pDstV2 + iUVLineStride;; + + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + + if (pCurDqLayer->bUseWeightedBiPredIdc) { + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 4); + } else { + BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 4); + } + } + } else { + int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + for (int32_t j = 0; j < 4; j++) { + int32_t iUVLineStride; + iJIdx = ((j >> 1) << 2) + (j & 1); + + iBlk4X = (j & 1) << 2; + iBlk4Y = (j >> 1) << 2; + + iUVLineStride = (iBlk4X >> 1) + (iBlk4Y >> 1) * iDstLineChroma; + pMCRefMem.pDstY = pDstY + iBlk4X + iBlk4Y * iDstLineLuma; + pMCRefMem.pDstU = pDstU + iUVLineStride; + pMCRefMem.pDstV = pDstV + iUVLineStride; + + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + if (bWeightedBipredIdcIs1) { + WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 4); + } + } + } + } + } + } + return ERR_NONE; +} + +int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) { + int32_t iChromaStride = pCtx->pCurDqLayer->pDec->iLinesize[1]; + PIdctFourResAddPredFunc pIdctFourResAddPredFunc = pCtx->pIdctFourResAddPredFunc; + + uint8_t i = 0; + uint8_t uiCbpC = pDqLayer->pCbp[iMBXY] >> 4; + + if (1 == uiCbpC || 2 == uiCbpC) { + for (i = 0; i < 2; i++) { + int16_t* pRS = pScoeffLevel + 256 + (i << 6); + uint8_t* pPred = pDqLayer->pPred[i + 1]; + const int8_t* pNzc = pDqLayer->pNzc[iMBXY] + 16 + 2 * i; + + /*1 chroma is divided 4 4x4_block to idct*/ + pIdctFourResAddPredFunc (pPred, iChromaStride, pRS, pNzc); + } + } + + return ERR_NONE; +} + +} // namespace WelsDec diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp new file mode 100644 index 000000000..d05aa4515 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp @@ -0,0 +1,311 @@ +/*! + * \copy + * Copyright (c) 2009-2019, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_decoder_thread.cpp + * + * \brief Interfaces introduced in thread programming + * + * \date 08/06/2018 Created + * + ************************************************************************************* + */ + + +#ifdef __linux__ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#elif !defined(_WIN32) && !defined(__CYGWIN__) +#include +#include +#include +#ifndef __Fuchsia__ +#include +#endif +#ifdef __APPLE__ +#define HW_NCPU_NAME "hw.logicalcpu" +#else +#define HW_NCPU_NAME "hw.ncpu" +#endif +#endif + +#include "wels_decoder_thread.h" +#include +#include + +int32_t GetCPUCount() { + WelsLogicalProcessInfo pInfo; + pInfo.ProcessorCount = 1; + WelsQueryLogicalProcessInfo (&pInfo); + return pInfo.ProcessorCount; +} + +int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta) { + WELS_THREAD_ATTR attr = 0; + return WelsThreadCreate (& (t->h), tf, ta, attr); +} + +int ThreadWait (SWelsDecThread* t) { + return WelsThreadJoin (t->h); +} + +#if defined(_WIN32) || defined(__CYGWIN__) + +int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) { + e->h = CreateEvent (NULL, manualReset, initialState, NULL); + e->isSignaled = initialState; + return (e->h != NULL) ? 0 : 1; +} + +void EventReset (SWelsDecEvent* e) { + ResetEvent (e->h); + e->isSignaled = 0; +} + +void EventPost (SWelsDecEvent* e) { + SetEvent (e->h); + e->isSignaled = 1; +} + +int EventWait (SWelsDecEvent* e, int32_t timeout) { + DWORD result; + if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) + result = WaitForSingleObject (e->h, INFINITE); + else + result = WaitForSingleObject (e->h, timeout); + + if (result == WAIT_OBJECT_0) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WAIT_TIMEOUT; +} + +void EventDestroy (SWelsDecEvent* e) { + CloseHandle (e->h); + e->h = NULL; +} + +int SemCreate (SWelsDecSemphore* s, long value, long max) { + s->h = CreateSemaphore (NULL, value, max, NULL); + return (s->h != NULL) ? 0 : 1; +} + +int SemWait (SWelsDecSemphore* s, int32_t timeout) { + DWORD result; + if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) + result = WaitForSingleObject (s->h, INFINITE); + else + result = WaitForSingleObject (s->h, timeout); + + if (result == WAIT_OBJECT_0) { + return WELS_DEC_THREAD_WAIT_SIGNALED; + } else { + return WELS_DEC_THREAD_WAIT_TIMEDOUT; + } +} + +void SemRelease (SWelsDecSemphore* s, long* prevcount) { + ReleaseSemaphore (s->h, 1, prevcount); +} + +void SemDestroy (SWelsDecSemphore* s) { + CloseHandle (s->h); + s->h = NULL; +} + +#else /* _WIN32 */ + +static void getTimespecFromTimeout (struct timespec* ts, int32_t timeout) { + struct timeval tv; + gettimeofday (&tv, 0); + ts->tv_nsec = tv.tv_usec * 1000 + timeout * 1000000; + ts->tv_sec = tv.tv_sec + ts->tv_nsec / 1000000000; + ts->tv_nsec %= 1000000000; +} +int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) { + if (pthread_mutex_init (& (e->m), NULL)) + return 1; + if (pthread_cond_init (& (e->c), NULL)) + return 2; + + e->isSignaled = initialState; + e->manualReset = manualReset; + + return 0; +} + +void EventReset (SWelsDecEvent* e) { + pthread_mutex_lock (& (e->m)); + e->isSignaled = 0; + pthread_mutex_unlock (& (e->m)); +} + +void EventPost (SWelsDecEvent* e) { + pthread_mutex_lock (& (e->m)); + pthread_cond_broadcast (& (e->c)); + e->isSignaled = 1; + pthread_mutex_unlock (& (e->m)); +} + +int EventWait (SWelsDecEvent* e, int32_t timeout) { + pthread_mutex_lock (& (e->m)); + int signaled = e->isSignaled; + if (timeout == 0) { + pthread_mutex_unlock (& (e->m)); + if (signaled) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WELS_DEC_THREAD_WAIT_TIMEDOUT; + } + if (signaled) { + if (!e->manualReset) { + e->isSignaled = 0; + } + pthread_mutex_unlock (& (e->m)); + return WELS_DEC_THREAD_WAIT_SIGNALED; + } + int rc = 0; + if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) { + rc = pthread_cond_wait (& (e->c), & (e->m)); + } else { + struct timespec ts; + getTimespecFromTimeout (&ts, timeout); + rc = pthread_cond_timedwait (& (e->c), & (e->m), &ts); + } + if (!e->manualReset) { + e->isSignaled = 0; + } + pthread_mutex_unlock (& (e->m)); + if (rc == 0) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WELS_DEC_THREAD_WAIT_TIMEDOUT; +} + +void EventDestroy (SWelsDecEvent* e) { + pthread_mutex_destroy (& (e->m)); + pthread_cond_destroy (& (e->c)); +} + +int SemCreate (SWelsDecSemphore* s, long value, long max) { + s->v = value; + s->max = max; + if (pthread_mutex_init (& (s->m), NULL)) + return 1; + const char* event_name = ""; + if (WelsEventOpen (& (s->e), event_name)) { + return 2; + } + return 0; +} + +int SemWait (SWelsDecSemphore* s, int32_t timeout) { +#if defined(__APPLE__) + pthread_mutex_lock (& (s->m)); +#endif + int rc = 0; + if (timeout != 0) { + while ((s->v) == 0) { + if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) { + // infinite wait until released +#if defined(__APPLE__) + rc = pthread_cond_wait (& (s->e), & (s->m)); +#else + rc = sem_wait (s->e); + if (rc != 0) rc = errno; +#endif + } else { + struct timespec ts; + getTimespecFromTimeout (&ts, timeout); +#if defined(__APPLE__) + rc = pthread_cond_timedwait (& (s->e), & (s->m), &ts); +#else + rc = sem_timedwait (s->e, &ts); + if (rc != 0) rc = errno; +#endif + if (rc != EINTR) { + // if timed out we return to the caller + break; + } + } + } + // only decrement counter if semaphore was signaled + if (rc == 0) + s->v -= 1; + + } else { + // Special handling for timeout of 0 + if (s->v > 0) { + s->v -= 1; + rc = 0; + } else { + rc = 1; + } + } +#if defined(__APPLE__) + pthread_mutex_unlock (& (s->m)); +#endif + // set return value + if (rc == 0) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WELS_DEC_THREAD_WAIT_TIMEDOUT; +} + +void SemRelease (SWelsDecSemphore* s, long* o_pPrevCount) { + long prevcount; +#ifdef __APPLE__ + pthread_mutex_lock (& (s->m)); + prevcount = s->v; + if (s->v < s->max) + s->v += 1; + pthread_cond_signal (& (s->e)); + pthread_mutex_unlock (& (s->m)); +#else + prevcount = s->v; + if (s->v < s->max) + s->v += 1; + sem_post (s->e); +#endif + if (o_pPrevCount != NULL) { + *o_pPrevCount = prevcount; + } +} + +void SemDestroy (SWelsDecSemphore* s) { + pthread_mutex_destroy (& (s->m)); + const char* event_name = ""; + WelsEventClose (& (s->e), event_name); +} + +#endif /* !_WIN32 */ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/x86/dct.asm b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/x86/dct.asm new file mode 100644 index 000000000..40e230c64 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/x86/dct.asm @@ -0,0 +1,72 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* ?Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* ?Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* dct.asm +;* +;* Abstract +;* WelsDctFourT4_sse2 +;* +;* History +;* 8/4/2009 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + +SECTION .text + +;void WelsBlockZero16x16_sse2(int16_t * block, int32_t stride); +WELS_EXTERN WelsBlockZero16x16_sse2 + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + shl r1, 1 + pxor xmm0, xmm0 +%rep 16 + movdqa [r0], xmm0 + movdqa [r0+16], xmm0 + add r0, r1 +%endrep + ret + +;void WelsBlockZero8x8_sse2(int16_t * block, int32_t stride); +WELS_EXTERN WelsBlockZero8x8_sse2 + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + shl r1, 1 + pxor xmm0, xmm0 +%rep 8 + movdqa [r0], xmm0 + add r0, r1 +%endrep + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/x86/intra_pred.asm b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/x86/intra_pred.asm new file mode 100644 index 000000000..4ea1afc32 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/core/x86/intra_pred.asm @@ -0,0 +1,1456 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* intra_pred.asm +;* +;* Abstract +;* sse2 and mmx function for intra predict operations(decoder) +;* +;* History +;* 18/09/2009 Created +;* 19/11/2010 Added +;* WelsDecoderI16x16LumaPredDcTop_sse2, WelsDecoderI16x16LumaPredDcNA_sse2, +;* WelsDecoderIChromaPredDcLeft_mmx, WelsDecoderIChromaPredDcTop_sse2 +;* and WelsDecoderIChromaPredDcNA_mmx +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" +;******************************************************************************* +; Local Data (Read Only) +;******************************************************************************* + +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +align 16 +sse2_plane_inc_minus dw -7, -6, -5, -4, -3, -2, -1, 0 +align 16 +sse2_plane_inc dw 1, 2, 3, 4, 5, 6, 7, 8 +align 16 +sse2_plane_dec dw 8, 7, 6, 5, 4, 3, 2, 1 + +; for chroma plane mode +sse2_plane_inc_c dw 1, 2, 3, 4 +sse2_plane_dec_c dw 4, 3, 2, 1 +align 16 +sse2_plane_mul_b_c dw -3, -2, -1, 0, 1, 2, 3, 4 + +align 16 +mmx_01bytes: times 16 db 1 + +align 16 +mmx_0x02: dw 0x02, 0x00, 0x00, 0x00 + +align 16 +sse2_dc_0x80: times 16 db 0x80 +align 16 +sse2_wd_0x02: times 8 dw 0x02 + +;******************************************************************************* +; macros +;******************************************************************************* +;xmm0, xmm1, xmm2, eax, ecx +;lower 64 bits of xmm0 save the result +%macro SSE2_PRED_H_4X4_TWO_LINE 5 + movd %1, [%4-1] + movdqa %3, %1 + punpcklbw %1, %3 + movdqa %3, %1 + punpcklbw %1, %3 + + ;add %4, %5 + movd %2, [%4+%5-1] + movdqa %3, %2 + punpcklbw %2, %3 + movdqa %3, %2 + punpcklbw %2, %3 + punpckldq %1, %2 +%endmacro + + +%macro LOAD_COLUMN 6 + movd %1, [%5] + movd %2, [%5+%6] + punpcklbw %1, %2 + lea %5, [%5+2*%6] + movd %3, [%5] + movd %2, [%5+%6] + punpcklbw %3, %2 + punpcklwd %1, %3 + lea %5, [%5+2*%6] + movd %4, [%5] + movd %2, [%5+%6] + punpcklbw %4, %2 + lea %5, [%5+2*%6] + movd %3, [%5] + movd %2, [%5+%6] + lea %5, [%5+2*%6] + punpcklbw %3, %2 + punpcklwd %4, %3 + punpckhdq %1, %4 +%endmacro + +%macro SUMW_HORIZON 3 + movhlps %2, %1 ; x2 = xx xx xx xx d7 d6 d5 d4 + paddw %1, %2 ; x1 = xx xx xx xx d37 d26 d15 d04 + punpcklwd %1, %3 ; x1 = d37 d26 d15 d04 + movhlps %2, %1 ; x2 = xxxx xxxx d37 d26 + paddd %1, %2 ; x1 = xxxx xxxx d1357 d0246 + pshuflw %2, %1, 0x4e ; x2 = xxxx xxxx d0246 d1357 + paddd %1, %2 ; x1 = xxxx xxxx xxxx d01234567 +%endmacro + +%macro COPY_16_TIMES 2 + movdqa %2, [%1-16] + psrldq %2, 15 + pmuludq %2, [pic(mmx_01bytes)] + pshufd %2, %2, 0 +%endmacro + +%macro COPY_16_TIMESS 3 + movdqa %2, [%1+%3-16] + psrldq %2, 15 + pmuludq %2, [pic(mmx_01bytes)] + pshufd %2, %2, 0 +%endmacro + +%macro LOAD_COLUMN_C 6 + movd %1, [%5] + movd %2, [%5+%6] + punpcklbw %1,%2 + lea %5, [%5+2*%6] + movd %3, [%5] + movd %2, [%5+%6] + punpcklbw %3, %2 + punpckhwd %1, %3 + lea %5, [%5+2*%6] +%endmacro + +%macro LOAD_2_LEFT_AND_ADD 0 + lea r0, [r0+2*r1] + movzx r3, byte [r0-0x01] + add r2, r3 + movzx r3, byte [r0+r1-0x01] + add r2, r3 +%endmacro + +;******************************************************************************* +; Code +;******************************************************************************* + +SECTION .text + + +;******************************************************************************* +; void WelsDecoderI4x4LumaPredH_sse2(uint8_t *pPred, const int32_t kiStride) +; +; pPred must align to 16 +;******************************************************************************* +WELS_EXTERN WelsDecoderI4x4LumaPredH_sse2 + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + + movzx r2, byte [r0-1] + movd xmm0, r2d + pmuludq xmm0, [pic(mmx_01bytes)] + + movzx r2, byte [r0+r1-1] + movd xmm1, r2d + pmuludq xmm1, [pic(mmx_01bytes)] + + lea r0, [r0+r1] + movzx r2, byte [r0+r1-1] + movd xmm2, r2d + pmuludq xmm2, [pic(mmx_01bytes)] + + movzx r2, byte [r0+2*r1-1] + movd xmm3, r2d + pmuludq xmm3, [pic(mmx_01bytes)] + + sub r0, r1 + movd [r0], xmm0 + movd [r0+r1], xmm1 + lea r0, [r0+2*r1] + movd [r0], xmm2 + movd [r0+r1], xmm3 + + DEINIT_X86_32_PIC + ret + +;******************************************************************************* +; void WelsDecoderI16x16LumaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride); +;******************************************************************************* +WELS_EXTERN WelsDecoderI16x16LumaPredPlane_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_2_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + mov r4, r0 ; save r0 in r4 + sub r0, 1 + sub r0, r1 + + ;for H + pxor xmm7, xmm7 + movq xmm0, [r0] + movdqa xmm5, [pic(sse2_plane_dec)] + punpcklbw xmm0, xmm7 + pmullw xmm0, xmm5 + movq xmm1, [r0 + 9] + movdqa xmm6, [pic(sse2_plane_inc)] + punpcklbw xmm1, xmm7 + pmullw xmm1, xmm6 + psubw xmm1, xmm0 + + SUMW_HORIZON xmm1,xmm0,xmm2 + movd r2d, xmm1 ; H += (i + 1) * (top[8 + i] - top[6 - i]); + movsx r2, r2w + imul r2, 5 + add r2, 32 + sar r2, 6 ; b = (5 * H + 32) >> 6; + SSE2_Copy8Times xmm1, r2d ; xmm1 = b,b,b,b,b,b,b,b + + movzx r3, BYTE [r0+16] + sub r0, 3 + LOAD_COLUMN xmm0, xmm2, xmm3, xmm4, r0, r1 + + add r0, 3 + movzx r2, BYTE [r0+8*r1] + add r3, r2 + shl r3, 4 ; a = (left[15*kiStride] + top[15]) << 4; + + sub r0, 3 + add r0, r1 + LOAD_COLUMN xmm7, xmm2, xmm3, xmm4, r0, r1 + pxor xmm4, xmm4 + punpckhbw xmm0, xmm4 + pmullw xmm0, xmm5 + punpckhbw xmm7, xmm4 + pmullw xmm7, xmm6 + psubw xmm7, xmm0 + + ; Indicate that xmm2 is fully initialized. Its actual value doesn't + ; matter in SUMW_HORIZON below, but after being used in LOAD_COLUMN above, + ; valgrind thinks that xmm2 contains uninitalized data (if the columns outside + ; of the left are uninitialized, such as in DecUT_IntraPrediction), which taints + ; r2d below, even if actually isn't based on the uninitialized data. + pxor xmm2, xmm2 + + SUMW_HORIZON xmm7,xmm0,xmm2 + movd r2d, xmm7 ; V + movsx r2, r2w + + imul r2, 5 + add r2, 32 + sar r2, 6 ; c = (5 * V + 32) >> 6; + SSE2_Copy8Times xmm4, r2d ; xmm4 = c,c,c,c,c,c,c,c + + mov r0, r4 + add r3, 16 + imul r2, -7 + add r3, r2 ; s = a + 16 + (-7)*c + SSE2_Copy8Times xmm0, r3d ; xmm0 = s,s,s,s,s,s,s,s + + xor r2, r2 + movdqa xmm5, [pic(sse2_plane_inc_minus)] + +get_i16x16_luma_pred_plane_sse2_1: + movdqa xmm2, xmm1 + pmullw xmm2, xmm5 + paddw xmm2, xmm0 + psraw xmm2, 5 + movdqa xmm3, xmm1 + pmullw xmm3, xmm6 + paddw xmm3, xmm0 + psraw xmm3, 5 + packuswb xmm2, xmm3 + movdqa [r0], xmm2 + paddw xmm0, xmm4 + add r0, r1 + inc r2 + cmp r2, 16 + jnz get_i16x16_luma_pred_plane_sse2_1 + + POP_XMM + DEINIT_X86_32_PIC + pop r4 + pop r3 + ret + + + +;******************************************************************************* +; void WelsDecoderI16x16LumaPredH_sse2(uint8_t *pPred, const int32_t kiStride); +;******************************************************************************* + +%macro SSE2_PRED_H_16X16_TWO_LINE_DEC 2 + lea %1, [%1+%2*2] + + COPY_16_TIMES %1, xmm0 + movdqa [%1], xmm0 + COPY_16_TIMESS %1, xmm0, %2 + movdqa [%1+%2], xmm0 +%endmacro + +WELS_EXTERN WelsDecoderI16x16LumaPredH_sse2 + %assign push_num 0 + INIT_X86_32_PIC_NOPRESERVE r2 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + + COPY_16_TIMES r0, xmm0 + movdqa [r0], xmm0 + COPY_16_TIMESS r0, xmm0, r1 + movdqa [r0+r1], xmm0 + + SSE2_PRED_H_16X16_TWO_LINE_DEC r0, r1 + SSE2_PRED_H_16X16_TWO_LINE_DEC r0, r1 + SSE2_PRED_H_16X16_TWO_LINE_DEC r0, r1 + SSE2_PRED_H_16X16_TWO_LINE_DEC r0, r1 + SSE2_PRED_H_16X16_TWO_LINE_DEC r0, r1 + SSE2_PRED_H_16X16_TWO_LINE_DEC r0, r1 + SSE2_PRED_H_16X16_TWO_LINE_DEC r0, r1 + + DEINIT_X86_32_PIC + ret + +;******************************************************************************* +; void WelsDecoderI16x16LumaPredV_sse2(uint8_t *pPred, const int32_t kiStride); +;******************************************************************************* +WELS_EXTERN WelsDecoderI16x16LumaPredV_sse2 + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + + sub r0, r1 + movdqa xmm0, [r0] + + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm0 + lea r0, [r0+2*r1] + movdqa [r0], xmm0 + + ret + +;******************************************************************************* +; void WelsDecoderIChromaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride); +;******************************************************************************* +WELS_EXTERN WelsDecoderIChromaPredPlane_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_2_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + mov r4, r0 + sub r0, 1 + sub r0, r1 + + pxor mm7, mm7 + movq mm0, [r0] + movq mm5, [pic(sse2_plane_dec_c)] + punpcklbw mm0, mm7 + pmullw mm0, mm5 + movq mm1, [r0 + 5] + movq mm6, [pic(sse2_plane_inc_c)] + punpcklbw mm1, mm7 + pmullw mm1, mm6 + psubw mm1, mm0 + + movq2dq xmm1, mm1 + pxor xmm2, xmm2 + SUMW_HORIZON xmm1,xmm0,xmm2 + movd r2d, xmm1 + movsx r2, r2w + imul r2, 17 + add r2, 16 + sar r2, 5 ; b = (17 * H + 16) >> 5; + SSE2_Copy8Times xmm1, r2d ; mm1 = b,b,b,b,b,b,b,b + + movzx r3, BYTE [r0+8] + sub r0, 3 + LOAD_COLUMN_C mm0, mm2, mm3, mm4, r0, r1 + + add r0, 3 + movzx r2, BYTE [r0+4*r1] + add r3, r2 + shl r3, 4 ; a = (left[7*kiStride] + top[7]) << 4; + + sub r0, 3 + add r0, r1 + LOAD_COLUMN_C mm7, mm2, mm3, mm4, r0, r1 + pxor mm4, mm4 + punpckhbw mm0, mm4 + pmullw mm0, mm5 + punpckhbw mm7, mm4 + pmullw mm7, mm6 + psubw mm7, mm0 + + movq2dq xmm7, mm7 + pxor xmm2, xmm2 + SUMW_HORIZON xmm7,xmm0,xmm2 + movd r2d, xmm7 ; V + movsx r2, r2w + + imul r2, 17 + add r2, 16 + sar r2, 5 ; c = (17 * V + 16) >> 5; + SSE2_Copy8Times xmm4, r2d ; mm4 = c,c,c,c,c,c,c,c + + mov r0, r4 + add r3, 16 + imul r2, -3 + add r3, r2 ; s = a + 16 + (-3)*c + SSE2_Copy8Times xmm0, r3d ; xmm0 = s,s,s,s,s,s,s,s + + xor r2, r2 + movdqa xmm5, [pic(sse2_plane_mul_b_c)] + +get_i_chroma_pred_plane_sse2_1: + movdqa xmm2, xmm1 + pmullw xmm2, xmm5 + paddw xmm2, xmm0 + psraw xmm2, 5 + packuswb xmm2, xmm2 + movq [r0], xmm2 + paddw xmm0, xmm4 + add r0, r1 + inc r2 + cmp r2, 8 + jnz get_i_chroma_pred_plane_sse2_1 + + POP_XMM + DEINIT_X86_32_PIC + pop r4 + pop r3 + WELSEMMS + ret + +;******************************************************************************* +; 0 |1 |2 |3 |4 | +; 6 |7 |8 |9 |10| +; 11|12|13|14|15| +; 16|17|18|19|20| +; 21|22|23|24|25| +; 7 is the start pixel of current 4x4 block +; pPred[7] = ([6]+[0]*2+[1]+2)/4 +; +; void WelsDecoderI4x4LumaPredDDR_mmx(uint8_t *pPred, const int32_t kiStride) +; +;******************************************************************************* +WELS_EXTERN WelsDecoderI4x4LumaPredDDR_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r2, r0 + + movq mm1,[r2+r1-8] ;get value of 11,decreasing 8 is trying to improve the performance of movq mm1[8] = 11 + movq mm2,[r2-8] ;get value of 6 mm2[8] = 6 + sub r2, r1 ;mov eax to above line of current block(postion of 1) + punpckhbw mm2,[r2-8] ;mm2[8](high 8th byte of mm2) = [0](value of 0), mm2[7]= [6] + movd mm3,[r2] ;get value 1, mm3[1] = [1],mm3[2]=[2],mm3[3]=[3] + punpckhwd mm1,mm2 ;mm1[8]=[0],mm1[7]=[6],mm1[6]=[11] + psllq mm3,18h ;mm3[5]=[1] + psrlq mm1,28h ;mm1[3]=[0],mm1[2]=[6],mm1[1]=[11] + por mm3,mm1 ;mm3[6]=[3],mm3[5]=[2],mm3[4]=[1],mm3[3]=[0],mm3[2]=[6],mm3[1]=[11] + movq mm1,mm3 ;mm1[6]=[3],mm1[5]=[2],mm1[4]=[1],mm1[3]=[0],mm1[2]=[6],mm1[1]=[11] + lea r2,[r2+r1*2-8h] ;set eax point to 12 + movq mm4,[r2+r1] ;get value of 16, mm4[8]=[16] + psllq mm3,8 ;mm3[7]=[3],mm3[6]=[2],mm3[5]=[1],mm3[4]=[0],mm3[3]=[6],mm3[2]=[11],mm3[1]=0 + psrlq mm4,38h ;mm4[1]=[16] + por mm3,mm4 ;mm3[7]=[3],mm3[6]=[2],mm3[5]=[1],mm3[4]=[0],mm3[3]=[6],mm3[2]=[11],mm3[1]=[16] + movq mm2,mm3 ;mm2[7]=[3],mm2[6]=[2],mm2[5]=[1],mm2[4]=[0],mm2[3]=[6],mm2[2]=[11],mm2[1]=[16] + movq mm4,[r2+r1*2] ;mm4[8]=[21] + psllq mm3,8 ;mm3[8]=[3],mm3[7]=[2],mm3[6]=[1],mm3[5]=[0],mm3[4]=[6],mm3[3]=[11],mm3[2]=[16],mm3[1]=0 + psrlq mm4,38h ;mm4[1]=[21] + por mm3,mm4 ;mm3[8]=[3],mm3[7]=[2],mm3[6]=[1],mm3[5]=[0],mm3[4]=[6],mm3[3]=[11],mm3[2]=[16],mm3[1]=[21] + movq mm4,mm3 ;mm4[8]=[3],mm4[7]=[2],mm4[6]=[1],mm4[5]=[0],mm4[4]=[6],mm4[3]=[11],mm4[2]=[16],mm4[1]=[21] + pavgb mm3,mm1 ;mm3=([11]+[21]+1)/2 + pxor mm1,mm4 ;find odd value in the lowest bit of each byte + pand mm1,[pic(mmx_01bytes)] ;set the odd bit + psubusb mm3,mm1 ;decrease 1 from odd bytes + pavgb mm2,mm3 ;mm2=(([11]+[21]+1)/2+1+[16])/2 + + lea r0,[r0+r1] + movd [r0+2*r1],mm2 + sub r0,r1 + psrlq mm2,8 + movd [r0+2*r1],mm2 + psrlq mm2,8 + movd [r0+r1],mm2 + psrlq mm2,8 + movd [r0],mm2 + DEINIT_X86_32_PIC + WELSEMMS + ret + + +;******************************************************************************* +; void WelsDecoderIChromaPredH_mmx(uint8_t *pPred, const int32_t kiStride) +; copy 8 pixel of 8 line from left +;******************************************************************************* +%macro MMX_PRED_H_8X8_ONE_LINE 4 + movq %1, [%3-8] + psrlq %1, 38h + + pmullw %1, [pic(mmx_01bytes)] + pshufw %1, %1, 0 + movq [%4], %1 +%endmacro + +%macro MMX_PRED_H_8X8_ONE_LINEE 4 + movq %1, [%3+r1-8] + psrlq %1, 38h + + pmullw %1, [pic(mmx_01bytes)] + pshufw %1, %1, 0 + movq [%4], %1 +%endmacro + +WELS_EXTERN WelsDecoderIChromaPredH_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r2, r0 + + movq mm0, [r2-8] + psrlq mm0, 38h + + pmullw mm0, [pic(mmx_01bytes)] + pshufw mm0, mm0, 0 + movq [r0], mm0 + + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r2, r0+r1 + + lea r2, [r2+r1*2] + MMX_PRED_H_8X8_ONE_LINE mm0, mm1, r2, r0+2*r1 + + lea r0, [r0+2*r1] + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r2, r0+r1 + + lea r2, [r2+r1*2] + MMX_PRED_H_8X8_ONE_LINE mm0, mm1, r2, r0+2*r1 + + lea r0, [r0+2*r1] + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r2, r0+r1 + + lea r2, [r2+r1*2] + MMX_PRED_H_8X8_ONE_LINE mm0, mm1, r2, r0+2*r1 + + lea r0, [r0+2*r1] + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r2, r0+r1 + + DEINIT_X86_32_PIC + WELSEMMS + ret + + +;******************************************************************************* +; void WelsDecoderIChromaPredV_mmx(uint8_t *pPred, const int32_t kiStride) +; copy 8 pixels from top 8 pixels +;******************************************************************************* +WELS_EXTERN WelsDecoderIChromaPredV_mmx + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + + sub r0, r1 + movq mm0, [r0] + + movq [r0+r1], mm0 + movq [r0+2*r1], mm0 + lea r0, [r0+2*r1] + movq [r0+r1], mm0 + movq [r0+2*r1], mm0 + lea r0, [r0+2*r1] + movq [r0+r1], mm0 + movq [r0+2*r1], mm0 + lea r0, [r0+2*r1] + movq [r0+r1], mm0 + movq [r0+2*r1], mm0 + + WELSEMMS + ret + + +;******************************************************************************* +; lt|t0|t1|t2|t3| +; l0| +; l1| +; l2| +; l3| +; t3 will never been used +; destination: +; |a |b |c |d | +; |e |f |a |b | +; |g |h |e |f | +; |i |j |g |h | + +; a = (1 + lt + l0)>>1 +; e = (1 + l0 + l1)>>1 +; g = (1 + l1 + l2)>>1 +; i = (1 + l2 + l3)>>1 + +; d = (2 + t0 + (t1<<1) + t2)>>2 +; c = (2 + lt + (t0<<1) + t1)>>2 +; b = (2 + l0 + (lt<<1) + t0)>>2 + +; f = (2 + l1 + (l0<<1) + lt)>>2 +; h = (2 + l2 + (l1<<1) + l0)>>2 +; j = (2 + l3 + (l2<<1) + l1)>>2 +; [b a f e h g j i] + [d c b a] --> mov to memory +; +; void WelsDecoderI4x4LumaPredHD_mmx(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI4x4LumaPredHD_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r2, r0 + sub r2, r1 + movd mm0, [r2-1] ; mm0 = [xx xx xx xx t2 t1 t0 lt] + psllq mm0, 20h ; mm0 = [t2 t1 t0 lt xx xx xx xx] + + movd mm1, [r2+2*r1-4] + punpcklbw mm1, [r2+r1-4] ; mm1[7] = l0, mm1[6] = l1 + lea r2, [r2+2*r1] + movd mm2, [r2+2*r1-4] + punpcklbw mm2, [r2+r1-4] ; mm2[7] = l2, mm2[6] = l3 + punpckhwd mm2, mm1 ; mm2 = [l0 l1 l2 l3 xx xx xx xx] + psrlq mm2, 20h + pxor mm0, mm2 ; mm0 = [t2 t1 t0 lt l0 l1 l2 l3] + + movq mm1, mm0 + psrlq mm1, 10h ; mm1 = [xx xx t2 t1 t0 lt l0 l1] + movq mm2, mm0 + psrlq mm2, 8h ; mm2 = [xx t2 t1 t0 lt l0 l1 l2] + movq mm3, mm2 + movq mm4, mm1 + pavgb mm1, mm0 + + pxor mm4, mm0 ; find odd value in the lowest bit of each byte + pand mm4, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm1, mm4 ; decrease 1 from odd bytes + + pavgb mm2, mm1 ; mm2 = [xx xx d c b f h j] + + movq mm4, mm0 + pavgb mm3, mm4 ; mm3 = [xx xx xx xx a e g i] + punpcklbw mm3, mm2 ; mm3 = [b a f e h g j i] + + psrlq mm2, 20h + psllq mm2, 30h ; mm2 = [d c 0 0 0 0 0 0] + movq mm4, mm3 + psrlq mm4, 10h ; mm4 = [0 0 b a f e h j] + pxor mm2, mm4 ; mm2 = [d c b a xx xx xx xx] + psrlq mm2, 20h ; mm2 = [xx xx xx xx d c b a] + + movd [r0], mm2 + lea r0, [r0+r1] + movd [r0+2*r1], mm3 + sub r0, r1 + psrlq mm3, 10h + movd [r0+2*r1], mm3 + psrlq mm3, 10h + movd [r0+r1], mm3 + DEINIT_X86_32_PIC + WELSEMMS + ret + + + +;******************************************************************************* +; lt|t0|t1|t2|t3| +; l0| +; l1| +; l2| +; l3| +; t3 will never been used +; destination: +; |a |b |c |d | +; |c |d |e |f | +; |e |f |g |g | +; |g |g |g |g | + +; a = (1 + l0 + l1)>>1 +; c = (1 + l1 + l2)>>1 +; e = (1 + l2 + l3)>>1 +; g = l3 + +; b = (2 + l0 + (l1<<1) + l2)>>2 +; d = (2 + l1 + (l2<<1) + l3)>>2 +; f = (2 + l2 + (l3<<1) + l3)>>2 + +; [g g f e d c b a] + [g g g g] --> mov to memory +; +; void WelsDecoderI4x4LumaPredHU_mmx(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI4x4LumaPredHU_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r2, r0 + + movd mm0, [r2-4] ; mm0[3] = l0 + punpcklbw mm0, [r2+r1-4] ; mm0[7] = l1, mm0[6] = l0 + lea r2, [r2+2*r1] + movd mm2, [r2-4] ; mm2[3] = l2 + movd mm4, [r2+r1-4] ; mm4[3] = l3 + punpcklbw mm2, mm4 + punpckhwd mm0, mm2 ; mm0 = [l3 l2 l1 l0 xx xx xx xx] + + psrlq mm4, 18h + psllq mm4, 38h ; mm4 = [l3 xx xx xx xx xx xx xx] + psrlq mm0, 8h + pxor mm0, mm4 ; mm0 = [l3 l3 l2 l1 l0 xx xx xx] + + movq mm1, mm0 + psllq mm1, 8h ; mm1 = [l3 l2 l1 l0 xx xx xx xx] + movq mm3, mm1 ; mm3 = [l3 l2 l1 l0 xx xx xx xx] + pavgb mm1, mm0 ; mm1 = [g e c a xx xx xx xx] + + movq mm2, mm0 + psllq mm2, 10h ; mm2 = [l2 l1 l0 xx xx xx xx xx] + movq mm5, mm2 + pavgb mm2, mm0 + + pxor mm5, mm0 ; find odd value in the lowest bit of each byte + pand mm5, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm2, mm5 ; decrease 1 from odd bytes + + pavgb mm2, mm3 ; mm2 = [f d b xx xx xx xx xx] + + psrlq mm2, 8h + pxor mm2, mm4 ; mm2 = [g f d b xx xx xx xx] + + punpckhbw mm1, mm2 ; mm1 = [g g f e d c b a] + punpckhbw mm4, mm4 ; mm4 = [g g xx xx xx xx xx xx] + punpckhbw mm4, mm4 ; mm4 = [g g g g xx xx xx xx] + + psrlq mm4, 20h + lea r0, [r0+r1] + movd [r0+2*r1], mm4 + + sub r0, r1 + movd [r0], mm1 + psrlq mm1, 10h + movd [r0+r1], mm1 + psrlq mm1, 10h + movd [r0+2*r1], mm1 + DEINIT_X86_32_PIC + WELSEMMS + ret + + + +;******************************************************************************* +; lt|t0|t1|t2|t3| +; l0| +; l1| +; l2| +; l3| +; l3 will never been used +; destination: +; |a |b |c |d | +; |e |f |g |h | +; |i |a |b |c | +; |j |e |f |g | + +; a = (1 + lt + t0)>>1 +; b = (1 + t0 + t1)>>1 +; c = (1 + t1 + t2)>>1 +; d = (1 + t2 + t3)>>1 + +; e = (2 + l0 + (lt<<1) + t0)>>2 +; f = (2 + lt + (t0<<1) + t1)>>2 +; g = (2 + t0 + (t1<<1) + t2)>>2 + +; h = (2 + t1 + (t2<<1) + t3)>>2 +; i = (2 + lt + (l0<<1) + l1)>>2 +; j = (2 + l0 + (l1<<1) + l2)>>2 +; +; void WelsDecoderI4x4LumaPredVR_mmx(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI4x4LumaPredVR_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r2, r0 + sub r2, r1 + movq mm0, [r2-1] ; mm0 = [xx xx xx t3 t2 t1 t0 lt] + psllq mm0, 18h ; mm0 = [t3 t2 t1 t0 lt xx xx xx] + + movd mm1, [r2+2*r1-4] + punpcklbw mm1, [r2+r1-4] ; mm1[7] = l0, mm1[6] = l1 + lea r2, [r2+2*r1] + movq mm2, [r2+r1-8] ; mm2[7] = l2 + punpckhwd mm2, mm1 ; mm2 = [l0 l1 l2 xx xx xx xx xx] + psrlq mm2, 28h + pxor mm0, mm2 ; mm0 = [t3 t2 t1 t0 lt l0 l1 l2] + + movq mm1, mm0 + psllq mm1, 8h ; mm1 = [t2 t1 t0 lt l0 l1 l2 xx] + pavgb mm1, mm0 ; mm1 = [d c b a xx xx xx xx] + + movq mm2, mm0 + psllq mm2, 10h ; mm2 = [t1 t0 lt l0 l1 l2 xx xx] + movq mm3, mm2 + pavgb mm2, mm0 + + pxor mm3, mm0 ; find odd value in the lowest bit of each byte + pand mm3, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm2, mm3 ; decrease 1 from odd bytes + + movq mm3, mm0 + psllq mm3, 8h ; mm3 = [t2 t1 t0 lt l0 l1 l2 xx] + pavgb mm3, mm2 ; mm3 = [h g f e i j xx xx] + movq mm2, mm3 + + psrlq mm1, 20h ; mm1 = [xx xx xx xx d c b a] + movd [r0], mm1 + + psrlq mm2, 20h ; mm2 = [xx xx xx xx h g f e] + movd [r0+r1], mm2 + + movq mm4, mm3 + psllq mm4, 20h + psrlq mm4, 38h ; mm4 = [xx xx xx xx xx xx xx i] + + movq mm5, mm3 + psllq mm5, 28h + psrlq mm5, 38h ; mm5 = [xx xx xx xx xx xx xx j] + + psllq mm1, 8h + pxor mm4, mm1 ; mm4 = [xx xx xx xx c b a i] + movd [r0+2*r1], mm4 + + psllq mm2, 8h + pxor mm5, mm2 ; mm5 = [xx xx xx xx g f e j] + lea r0, [r0+2*r1] + movd [r0+r1], mm5 + DEINIT_X86_32_PIC + WELSEMMS + ret + +;******************************************************************************* +; lt|t0|t1|t2|t3|t4|t5|t6|t7 +; l0| +; l1| +; l2| +; l3| +; lt,t0,t1,t2,t3 will never been used +; destination: +; |a |b |c |d | +; |b |c |d |e | +; |c |d |e |f | +; |d |e |f |g | + +; a = (2 + t0 + t2 + (t1<<1))>>2 +; b = (2 + t1 + t3 + (t2<<1))>>2 +; c = (2 + t2 + t4 + (t3<<1))>>2 +; d = (2 + t3 + t5 + (t4<<1))>>2 + +; e = (2 + t4 + t6 + (t5<<1))>>2 +; f = (2 + t5 + t7 + (t6<<1))>>2 +; g = (2 + t6 + t7 + (t7<<1))>>2 + +; [g f e d c b a] --> mov to memory +; +; void WelsDecoderI4x4LumaPredDDL_mmx(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI4x4LumaPredDDL_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r2, r0 + sub r2, r1 + movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0] + movq mm1, mm0 + movq mm2, mm0 + + movq mm3, mm0 + psrlq mm3, 38h + psllq mm3, 38h ; mm3 = [t7 xx xx xx xx xx xx xx] + + psllq mm1, 8h ; mm1 = [t6 t5 t4 t3 t2 t1 t0 xx] + psrlq mm2, 8h + pxor mm2, mm3 ; mm2 = [t7 t7 t6 t5 t4 t3 t2 t1] + + movq mm3, mm1 + pavgb mm1, mm2 + pxor mm3, mm2 ; find odd value in the lowest bit of each byte + pand mm3, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm1, mm3 ; decrease 1 from odd bytes + + pavgb mm0, mm1 ; mm0 = [g f e d c b a xx] + + psrlq mm0, 8h + movd [r0], mm0 + psrlq mm0, 8h + movd [r0+r1], mm0 + psrlq mm0, 8h + movd [r0+2*r1], mm0 + psrlq mm0, 8h + lea r0, [r0+2*r1] + movd [r0+r1], mm0 + DEINIT_X86_32_PIC + WELSEMMS + ret + + +;******************************************************************************* +; lt|t0|t1|t2|t3|t4|t5|t6|t7 +; l0| +; l1| +; l2| +; l3| +; lt,t0,t1,t2,t3 will never been used +; destination: +; |a |b |c |d | +; |e |f |g |h | +; |b |c |d |i | +; |f |g |h |j | + +; a = (1 + t0 + t1)>>1 +; b = (1 + t1 + t2)>>1 +; c = (1 + t2 + t3)>>1 +; d = (1 + t3 + t4)>>1 +; i = (1 + t4 + t5)>>1 + +; e = (2 + t0 + (t1<<1) + t2)>>2 +; f = (2 + t1 + (t2<<1) + t3)>>2 +; g = (2 + t2 + (t3<<1) + t4)>>2 +; h = (2 + t3 + (t4<<1) + t5)>>2 +; j = (2 + t4 + (t5<<1) + t6)>>2 + +; [i d c b a] + [j h g f e] --> mov to memory +; +; void WelsDecoderI4x4LumaPredVL_mmx(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI4x4LumaPredVL_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r2, r0 + + sub r2, r1 + movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0] + movq mm1, mm0 + movq mm2, mm0 + + psrlq mm1, 8h ; mm1 = [xx t7 t6 t5 t4 t3 t2 t1] + psrlq mm2, 10h ; mm2 = [xx xx t7 t6 t5 t4 t3 t2] + + movq mm3, mm1 + pavgb mm3, mm0 ; mm3 = [xx xx xx i d c b a] + + movq mm4, mm2 + pavgb mm2, mm0 + pxor mm4, mm0 ; find odd value in the lowest bit of each byte + pand mm4, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm2, mm4 ; decrease 1 from odd bytes + + pavgb mm2, mm1 ; mm2 = [xx xx xx j h g f e] + + movd [r0], mm3 + psrlq mm3, 8h + movd [r0+2*r1], mm3 + + movd [r0+r1], mm2 + psrlq mm2, 8h + lea r0, [r0+2*r1] + movd [r0+r1], mm2 + DEINIT_X86_32_PIC + WELSEMMS + ret + +;******************************************************************************* +; +; void WelsDecoderIChromaPredDc_sse2(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderIChromaPredDc_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r4, r0 + + sub r0, r1 + movq mm0, [r0] + + movzx r2, byte [r0+r1-0x01] ; l1 + lea r0, [r0+2*r1] + movzx r3, byte [r0-0x01] ; l2 + add r2, r3 + movzx r3, byte [r0+r1-0x01] ; l3 + add r2, r3 + lea r0, [r0+2*r1] + movzx r3, byte [r0-0x01] ; l4 + add r2, r3 + movd mm1, r2d ; mm1 = l1+l2+l3+l4 + + movzx r2, byte [r0+r1-0x01] ; l5 + lea r0, [r0+2*r1] + movzx r3, byte [r0-0x01] ; l6 + add r2, r3 + movzx r3, byte [r0+r1-0x01] ; l7 + add r2, r3 + lea r0, [r0+2*r1] + movzx r3, byte [r0-0x01] ; l8 + add r2, r3 + movd mm2, r2d ; mm2 = l5+l6+l7+l8 + + movq mm3, mm0 + psrlq mm0, 0x20 + psllq mm3, 0x20 + psrlq mm3, 0x20 + pxor mm4, mm4 + psadbw mm0, mm4 + psadbw mm3, mm4 ; sum1 = mm3+mm1, sum2 = mm0, sum3 = mm2 + + paddq mm3, mm1 + movq mm1, mm2 + paddq mm1, mm0; ; sum1 = mm3, sum2 = mm0, sum3 = mm2, sum4 = mm1 + + movq mm4, [pic(mmx_0x02)] + + paddq mm0, mm4 + psrlq mm0, 0x02 + + paddq mm2, mm4 + psrlq mm2, 0x02 + + paddq mm3, mm4 + paddq mm3, mm4 + psrlq mm3, 0x03 + + paddq mm1, mm4 + paddq mm1, mm4 + psrlq mm1, 0x03 + + pmuludq mm0, [pic(mmx_01bytes)] + pmuludq mm3, [pic(mmx_01bytes)] + psllq mm0, 0x20 + pxor mm0, mm3 ; mm0 = m_up + + pmuludq mm2, [pic(mmx_01bytes)] + pmuludq mm1, [pic(mmx_01bytes)] + psllq mm1, 0x20 + pxor mm1, mm2 ; mm2 = m_down + + movq [r4], mm0 + movq [r4+r1], mm0 + movq [r4+2*r1], mm0 + lea r4, [r4+2*r1] + movq [r4+r1], mm0 + + movq [r4+2*r1], mm1 + lea r4, [r4+2*r1] + movq [r4+r1], mm1 + movq [r4+2*r1], mm1 + lea r4, [r4+2*r1] + movq [r4+r1], mm1 + + DEINIT_X86_32_PIC + pop r4 + pop r3 + WELSEMMS + ret + + + +;******************************************************************************* +; +; void WelsDecoderI16x16LumaPredDc_sse2(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI16x16LumaPredDc_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r4, r0 + sub r0, r1 + movdqa xmm0, [r0] ; read one row + pxor xmm1, xmm1 + psadbw xmm0, xmm1 + movdqa xmm1, xmm0 + psrldq xmm1, 0x08 + pslldq xmm0, 0x08 + psrldq xmm0, 0x08 + paddw xmm0, xmm1 + + movzx r2, byte [r0+r1-0x01] + movzx r3, byte [r0+2*r1-0x01] + add r2, r3 + lea r0, [r0+r1] + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + add r2, 0x10 + movd xmm1, r2d + paddw xmm0, xmm1 + psrld xmm0, 0x05 + pmuludq xmm0, [pic(mmx_01bytes)] + pshufd xmm0, xmm0, 0 + + movdqa [r4], xmm0 + movdqa [r4+r1], xmm0 + movdqa [r4+2*r1], xmm0 + lea r4, [r4+2*r1] + + movdqa [r4+r1], xmm0 + movdqa [r4+2*r1], xmm0 + lea r4, [r4+2*r1] + + movdqa [r4+r1], xmm0 + movdqa [r4+2*r1], xmm0 + lea r4, [r4+2*r1] + + movdqa [r4+r1], xmm0 + movdqa [r4+2*r1], xmm0 + lea r4, [r4+2*r1] + + movdqa [r4+r1], xmm0 + movdqa [r4+2*r1], xmm0 + lea r4, [r4+2*r1] + + movdqa [r4+r1], xmm0 + movdqa [r4+2*r1], xmm0 + lea r4, [r4+2*r1] + + movdqa [r4+r1], xmm0 + movdqa [r4+2*r1], xmm0 + lea r4, [r4+2*r1] + + movdqa [r4+r1], xmm0 + + DEINIT_X86_32_PIC + pop r4 + pop r3 + + ret + +;******************************************************************************* +; for intra prediction as follows, 11/19/2010 +;******************************************************************************* + +;******************************************************************************* +; void WelsDecoderI16x16LumaPredDcTop_sse2(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI16x16LumaPredDcTop_sse2 + %assign push_num 0 + LOAD_2_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + mov r2, r0 + sub r2, r1 + movdqa xmm0, [r2] ; pPred-kiStride, top line + pxor xmm7, xmm7 + psadbw xmm0, xmm7 + movdqa xmm1, xmm0 + psrldq xmm1, 8 + paddw xmm0, xmm1 + xor r2, r2 + movd r2d, xmm0 + ;movdqa xmm1, xmm0 + ;punpcklbw xmm0, xmm7 + ;punpckhbw xmm1, xmm7 + + ;paddw xmm0, xmm1 ; (ub.max(ff) << 4) will not excceed of uw, so can perform it in unit of unsigned word scope + ;pshufd xmm1, xmm0, 04eh ; 01001110, w3w2w1w0,w7w6w5w4 + ;paddw xmm0, xmm1 ; w3+7 w2+6 w1+5 w0+4 w3+7 w2+6 w1+5 w0+4 + ;pshufd xmm1, xmm0, 0b1h ; 10110001, w1+5 w0+4 w3+7 w2+6 w1+5 w0+4 w3+7 w2+6 + ;paddw xmm0, xmm1 ; w_o w_e w_o w_e w_o w_e w_o w_e (w_o=1+3+5+7, w_e=0+2+4+6) + ;pshuflw xmm1, xmm0, 0b1h ; 10110001 + ;paddw xmm0, xmm1 ; sum in word unit (x8) + ;xor r3, r3 + ;movd r3d, xmm0 + ;and edx, 0ffffh + + add r2, 8 + sar r2, 4 + SSE2_Copy16Times xmm1, r2d + ;mov dh, dl + ;mov r2, edx + ;shl r2, 010h + ;or edx, r2 + ;movd xmm1, edx + ;pshufd xmm0, xmm1, 00h + ;movdqa xmm1, xmm0 + movdqa xmm0, xmm1 + lea r2, [2*r1+r1] ; 3*kiStride + + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + + lea r0, [r0+4*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + + lea r0, [r0+4*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + + lea r0, [r0+4*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + + POP_XMM + ret + +;******************************************************************************* +; void WelsDecoderI16x16LumaPredDcNA_sse2(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderI16x16LumaPredDcNA_sse2 + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + lea r2, [2*r1+r1] ; 3*kiStride + + movdqa xmm0, [pic(sse2_dc_0x80)] + movdqa xmm1, xmm0 + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + lea r0, [r0+4*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + lea r0, [r0+4*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + lea r0, [r0+4*r1] + movdqa [r0], xmm0 + movdqa [r0+r1], xmm1 + movdqa [r0+2*r1], xmm0 + movdqa [r0+r2], xmm1 + + DEINIT_X86_32_PIC + ret + +;******************************************************************************* +; void WelsDecoderIChromaPredDcLeft_mmx(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderIChromaPredDcLeft_mmx + push r3 + push r4 + %assign push_num 2 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + mov r4, r0 + ; for left + dec r0 + xor r2, r2 + xor r3, r3 + movzx r2, byte [r0] + movzx r3, byte [r0+r1] + add r2, r3 + lea r0, [r0+2*r1] + movzx r3, byte [r0] + add r2, r3 + movzx r3, byte [r0+r1] + add r2, r3 + add r2, 02h + sar r2, 02h + ;SSE2_Copy16Times mm0, r2d + mov r3, r2 + sal r3, 8 + or r2, r3 + movd mm1, r2d + pshufw mm0, mm1, 00h + ;mov bh, bl + ;movd mm1, ebx + ;pshufw mm0, mm1, 00h ; up64 + movq mm1, mm0 + xor r2, r2 + lea r0, [r0+2*r1] + movzx r2, byte [r0] + movzx r3, byte [r0+r1] + add r2, r3 + lea r0, [r0+2*r1] + movzx r3, byte [r0] + add r2, r3 + movzx r3, byte [r0+r1] + add r2, r3 + add r2, 02h + sar r2, 02h + mov r3, r2 + sal r3, 8 + or r2, r3 + movd mm3, r2d + pshufw mm2, mm3, 00h + ;mov bh, bl + ;movd mm3, ebx + ;pshufw mm2, mm3, 00h ; down64 + ;SSE2_Copy16Times mm2, r2d + movq mm3, mm2 + lea r2, [2*r1+r1] + movq [r4], mm0 + movq [r4+r1], mm1 + movq [r4+2*r1], mm0 + movq [r4+r2], mm1 + lea r4, [r4+4*r1] + movq [r4], mm2 + movq [r4+r1], mm3 + movq [r4+2*r1], mm2 + movq [r4+r2], mm3 + pop r4 + pop r3 + emms + ret + +;******************************************************************************* +; void WelsDecoderIChromaPredDcTop_sse2(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderIChromaPredDcTop_sse2 + %assign push_num 0 + LOAD_2_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + mov r2, r0 + sub r2, r1 + movq xmm0, [r2] ; top: 8x1 pixels + pxor xmm7, xmm7 + punpcklbw xmm0, xmm7 ; ext 8x2 words + pshufd xmm1, xmm0, 0B1h ; 10110001 B, w5 w4 w7 w6 w1 w0 w3 w2 + paddw xmm0, xmm1 ; w5+7 w4+6 w5+7 w4+6 w1+3 w0+2 w1+3 w0+2 + movdqa xmm1, xmm0 + pshuflw xmm2, xmm0, 0B1h ; 10110001 B, .. w0+2 w1+3 w0+2 w1+3 + pshufhw xmm3, xmm1, 0B1h ; 10110001 B, w4+6 w5+7 w4+6 w5+7 .. + paddw xmm0, xmm2 ; .. w0+..+3 w0+..+3 w0+..+3 w0+..+3 + paddw xmm1, xmm3 ; w4+..+7 w4+..+7 w4+..+7 w4+..+7 .. + punpckhqdq xmm1, xmm7 + punpcklqdq xmm0, xmm1 ; sum1 sum1 sum1 sum1 sum0 sum0 sum0 sum0 +%ifdef X86_32_PICASM + pcmpeqw xmm6, xmm6 + psrlw xmm6, 15 + psllw xmm6, 1 +%else + movdqa xmm6, [sse2_wd_0x02] +%endif + paddw xmm0, xmm6 + psraw xmm0, 02h + packuswb xmm0, xmm7 + lea r2, [2*r1+r1] + movq [r0], xmm0 + movq [r0+r1], xmm0 + movq [r0+2*r1], xmm0 + movq [r0+r2], xmm0 + lea r0, [r0+4*r1] + movq [r0], xmm0 + movq [r0+r1], xmm0 + movq [r0+2*r1], xmm0 + movq [r0+r2], xmm0 + POP_XMM + ret + +;******************************************************************************* +; void WelsDecoderIChromaPredDcNA_mmx(uint8_t *pPred, const int32_t kiStride) +;******************************************************************************* +WELS_EXTERN WelsDecoderIChromaPredDcNA_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + lea r2, [2*r1+r1] + movq mm0, [pic(sse2_dc_0x80)] + movq mm1, mm0 + movq [r0], mm0 + movq [r0+r1], mm1 + movq [r0+2*r1], mm0 + movq [r0+r2], mm1 + lea r0, [r0+4*r1] + movq [r0], mm0 + movq [r0+r1], mm1 + movq [r0+2*r1], mm0 + movq [r0+r2], mm1 + DEINIT_X86_32_PIC + emms + ret + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h new file mode 100644 index 000000000..cfacbc83f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h @@ -0,0 +1,168 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * WelsDecoderExt.h + * + * Abstract + * Cisco OpenH264 decoder extension utilization interface + * + * History + * 3/12/2009 Created + * + * + *************************************************************************/ +#if !defined(WELS_PLUS_WELSDECODEREXT_H) +#define WELS_PLUS_WELSDECODEREXT_H + +#include "codec_api.h" +#include "codec_app_def.h" +#include "decoder_context.h" +#include "welsCodecTrace.h" +#include "cpu.h" + +class ISVCDecoder; + +namespace WelsDec { + +//#define OUTPUT_BIT_STREAM ////for test to output bitstream + +class CWelsDecoder : public ISVCDecoder { + public: + CWelsDecoder (void); + virtual ~CWelsDecoder(); + + virtual long EXTAPI Initialize (const SDecodingParam* pParam); + virtual long EXTAPI Uninitialize(); + + /*************************************************************************** + * Description: + * Decompress one frame, and output I420 or RGB24(in the future) decoded stream and its length. + * Input parameters: + * Parameter TYPE Description + * pSrc unsigned char* the h264 stream to decode + * srcLength int the length of h264 steam + * pDst unsigned char* buffer pointer of decoded data + * pDstInfo SBufferInfo& information provided to API including width, height, SW/HW option, etc + * + * return: if decode frame success return 0, otherwise corresponding error returned. + ***************************************************************************/ + virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + int* pStride, + int& iWidth, + int& iHeight); + + virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo); + + virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo); + + virtual DECODING_STATE EXTAPI FlushFrame (unsigned char** ppDst, + SBufferInfo* pDstInfo); + + virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* kpSrc, + const int kiSrcLen, + SParserBsInfo* pDstInfo); + virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char* pDst, + int iDstStride, + int& iDstLen, + int& iWidth, + int& iHeight, + int& color_format); + + virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption); + virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption); + + public: + DECODING_STATE DecodeFrame2WithCtx (PWelsDecoderContext pCtx, const unsigned char* kpSrc, const int kiSrcLen, + unsigned char** ppDst, SBufferInfo* pDstInfo); + DECODING_STATE ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx); + + private: + welsCodecTrace* m_pWelsTrace; + uint32_t m_uiDecodeTimeStamp; + bool m_bIsBaseline; + int32_t m_iCpuCount; + int32_t m_iThreadCount; + int32_t m_iCtxCount; + PPicBuff m_pPicBuff; + bool m_bParamSetsLostFlag; + bool m_bFreezeOutput; + int32_t m_DecCtxActiveCount; + PWelsDecoderThreadCTX m_pDecThrCtx; + PWelsDecoderThreadCTX m_pLastDecThrCtx; + int32_t m_iLastBufferedIdx; + WELS_MUTEX m_csDecoder; + SWelsDecEvent m_sBufferingEvent; + SWelsDecEvent m_sReleaseBufferEvent; + SWelsDecSemphore m_sIsBusy; + SPictInfo m_sPictInfoList[16]; + SPictReoderingStatus m_sReoderingStatus; + PWelsDecoderThreadCTX m_pDecThrCtxActive[WELS_DEC_MAX_NUM_CPU]; + SVlcTable m_sVlcTable; + SWelsLastDecPicInfo m_sLastDecPicInfo; + SDecoderStatistics m_sDecoderStatistics;// For real time debugging + + private: + int32_t InitDecoder (const SDecodingParam* pParam); + void UninitDecoder (void); + int32_t InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam); + void UninitDecoderCtx (PWelsDecoderContext& pCtx); + int32_t ResetDecoder (PWelsDecoderContext& pCtx); + int32_t ThreadResetDecoder (PWelsDecoderContext& pCtx); + + void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics); + DECODING_STATE ReorderPicturesInDisplay (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo); + int ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst, + SBufferInfo* pDstInfo); + void BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo); + void ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo); + + void OpenDecoderThreads(); + void CloseDecoderThreads(); +#ifdef OUTPUT_BIT_STREAM + WelsFileHandle* m_pFBS; + WelsFileHandle* m_pFBSSize; +#endif//OUTPUT_BIT_STREAM + +}; + +} // namespace WelsDec + +#endif // !defined(WELS_PLUS_WELSDECODEREXT_H) diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp new file mode 100644 index 000000000..083bd1395 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp @@ -0,0 +1,1504 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * welsDecoderExt.cpp + * + * Abstract + * Cisco OpenH264 decoder extension utilization + * + * History + * 3/12/2009 Created + * + * + ************************************************************************/ +//#include +#include "welsDecoderExt.h" +#include "welsCodecTrace.h" +#include "codec_def.h" +#include "typedefs.h" +#include "memory_align.h" +#include "utils.h" +#include "version.h" + +//#include "macros.h" +#include "decoder.h" +#include "decoder_core.h" +#include "manage_dec_ref.h" +#include "error_concealment.h" + +#include "measure_time.h" +extern "C" { +#include "decoder_core.h" +#include "manage_dec_ref.h" +} +#include "error_code.h" +#include "crt_util_safe_x.h" // Safe CRT routines like util for cross platforms +#include +#if defined(_WIN32) /*&& defined(_DEBUG)*/ + +#include +#include +#include +#include +#include +#else +#include +#endif + +namespace WelsDec { + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +/*************************************************************************** +* Description: +* class CWelsDecoder constructor function, do initialization and +* alloc memory required +* +* Input parameters: none +* +* return: none +***************************************************************************/ +DECLARE_PROCTHREAD (pThrProcInit, p) { + SWelsDecThreadInfo* sThreadInfo = (SWelsDecThreadInfo*)p; +#if defined(WIN32) + _alloca (WELS_DEC_MAX_THREAD_STACK_SIZE * (sThreadInfo->uiThrNum + 1)); +#endif + return sThreadInfo->pThrProcMain (p); +} + +static DECODING_STATE ConstructAccessUnit (CWelsDecoder* pWelsDecoder, PWelsDecoderThreadCTX pThrCtx) { + int iRet = dsErrorFree; + //WelsMutexLock (&pWelsDecoder->m_csDecoder); + if (pThrCtx->pCtx->pLastThreadCtx != NULL) { + PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pThrCtx->pCtx->pLastThreadCtx); + WAIT_EVENT (&pLastThreadCtx->sSliceDecodeStart, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&pLastThreadCtx->sSliceDecodeStart); + } + pThrCtx->pDec = NULL; + if (GetThreadCount (pThrCtx->pCtx) > 1) { + RESET_EVENT (&pThrCtx->sSliceDecodeFinish); + } + iRet |= pWelsDecoder->DecodeFrame2WithCtx (pThrCtx->pCtx, NULL, 0, pThrCtx->ppDst, &pThrCtx->sDstInfo); + + //WelsMutexUnlock (&pWelsDecoder->m_csDecoder); + return (DECODING_STATE)iRet; +} + +DECLARE_PROCTHREAD (pThrProcFrame, p) { + SWelsDecoderThreadCTX* pThrCtx = (SWelsDecoderThreadCTX*)p; + while (1) { + RELEASE_SEMAPHORE (pThrCtx->sThreadInfo.sIsBusy); + RELEASE_SEMAPHORE (&pThrCtx->sThreadInfo.sIsIdle); + WAIT_SEMAPHORE (&pThrCtx->sThreadInfo.sIsActivated, WELS_DEC_THREAD_WAIT_INFINITE); + if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_RUN) { + CWelsDecoder* pWelsDecoder = (CWelsDecoder*)pThrCtx->threadCtxOwner; + ConstructAccessUnit (pWelsDecoder, pThrCtx); + } else if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_ABORT) { + break; + } + } + return 0; +} + +CWelsDecoder::CWelsDecoder (void) + : m_pWelsTrace (NULL), + m_uiDecodeTimeStamp (0), + m_bIsBaseline (false), + m_iCpuCount (1), + m_iThreadCount (0), + m_iCtxCount (1), + m_pPicBuff (NULL), + m_bParamSetsLostFlag (false), + m_bFreezeOutput (false), + m_DecCtxActiveCount (0), + m_pDecThrCtx (NULL), + m_pLastDecThrCtx (NULL), + m_iLastBufferedIdx (0) { +#ifdef OUTPUT_BIT_STREAM + char chFileName[1024] = { 0 }; //for .264 + int iBufUsed = 0; + int iBufLeft = 1023; + int iCurUsed; + + char chFileNameSize[1024] = { 0 }; //for .len + int iBufUsedSize = 0; + int iBufLeftSize = 1023; + int iCurUsedSize; +#endif//OUTPUT_BIT_STREAM + + + m_pWelsTrace = new welsCodecTrace(); + if (m_pWelsTrace != NULL) { + m_pWelsTrace->SetCodecInstance (this); + m_pWelsTrace->SetTraceLevel (WELS_LOG_ERROR); + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::CWelsDecoder() entry"); + } + + ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true); + + m_iCpuCount = GetCPUCount(); + if (m_iCpuCount > WELS_DEC_MAX_NUM_CPU) { + m_iCpuCount = WELS_DEC_MAX_NUM_CPU; + } + + m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount]; + memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount); + for (int32_t i = 0; i < WELS_DEC_MAX_NUM_CPU; ++i) { + m_pDecThrCtxActive[i] = NULL; + } +#ifdef OUTPUT_BIT_STREAM + SWelsTime sCurTime; + + WelsGetTimeOfDay (&sCurTime); + + iCurUsed = WelsSnprintf (chFileName, iBufLeft, "bs_0x%p_", (void*)this); + iCurUsedSize = WelsSnprintf (chFileNameSize, iBufLeftSize, "size_0x%p_", (void*)this); + + iBufUsed += iCurUsed; + iBufLeft -= iCurUsed; + if (iBufLeft > 0) { + iCurUsed = WelsStrftime (&chFileName[iBufUsed], iBufLeft, "%y%m%d%H%M%S", &sCurTime); + iBufUsed += iCurUsed; + iBufLeft -= iCurUsed; + } + + iBufUsedSize += iCurUsedSize; + iBufLeftSize -= iCurUsedSize; + if (iBufLeftSize > 0) { + iCurUsedSize = WelsStrftime (&chFileNameSize[iBufUsedSize], iBufLeftSize, "%y%m%d%H%M%S", &sCurTime); + iBufUsedSize += iCurUsedSize; + iBufLeftSize -= iCurUsedSize; + } + + if (iBufLeft > 0) { + iCurUsed = WelsSnprintf (&chFileName[iBufUsed], iBufLeft, ".%03.3u.264", WelsGetMillisecond (&sCurTime)); + iBufUsed += iCurUsed; + iBufLeft -= iCurUsed; + } + + if (iBufLeftSize > 0) { + iCurUsedSize = WelsSnprintf (&chFileNameSize[iBufUsedSize], iBufLeftSize, ".%03.3u.len", + WelsGetMillisecond (&sCurTime)); + iBufUsedSize += iCurUsedSize; + iBufLeftSize -= iCurUsedSize; + } + + + m_pFBS = WelsFopen (chFileName, "wb"); + m_pFBSSize = WelsFopen (chFileNameSize, "wb"); +#endif//OUTPUT_BIT_STREAM +} + +/*************************************************************************** +* Description: +* class CWelsDecoder destructor function, destroy allocced memory +* +* Input parameters: none +* +* return: none +***************************************************************************/ +CWelsDecoder::~CWelsDecoder() { + if (m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::~CWelsDecoder()"); + } + CloseDecoderThreads(); + UninitDecoder(); + +#ifdef OUTPUT_BIT_STREAM + if (m_pFBS) { + WelsFclose (m_pFBS); + m_pFBS = NULL; + } + if (m_pFBSSize) { + WelsFclose (m_pFBSSize); + m_pFBSSize = NULL; + } +#endif//OUTPUT_BIT_STREAM + + if (m_pWelsTrace != NULL) { + delete m_pWelsTrace; + m_pWelsTrace = NULL; + } + if (m_pDecThrCtx != NULL) { + delete[] m_pDecThrCtx; + m_pDecThrCtx = NULL; + } +} + +long CWelsDecoder::Initialize (const SDecodingParam* pParam) { + int iRet = ERR_NONE; + if (m_pWelsTrace == NULL) { + return cmMallocMemeError; + } + + if (pParam == NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsDecoder::Initialize(), invalid input argument."); + return cmInitParaError; + } + + // H.264 decoder initialization,including memory allocation,then open it ready to decode + iRet = InitDecoder (pParam); + if (iRet) + return iRet; + + return cmResultSuccess; +} + +long CWelsDecoder::Uninitialize() { + UninitDecoder(); + + return ERR_NONE; +} + +void CWelsDecoder::UninitDecoder (void) { + for (int32_t i = 0; i < m_iCtxCount; ++i) { + if (m_pDecThrCtx[i].pCtx != NULL) { + if (i > 0) { + WelsResetRefPicWithoutUnRef (m_pDecThrCtx[i].pCtx); + } + UninitDecoderCtx (m_pDecThrCtx[i].pCtx); + } + } +} + +void CWelsDecoder::OpenDecoderThreads() { + if (m_iThreadCount >= 1) { + m_uiDecodeTimeStamp = 0; + CREATE_SEMAPHORE (&m_sIsBusy, m_iThreadCount, m_iThreadCount, NULL); + WelsMutexInit (&m_csDecoder); + CREATE_EVENT (&m_sBufferingEvent, 1, 0, NULL); + SET_EVENT (&m_sBufferingEvent); + CREATE_EVENT (&m_sReleaseBufferEvent, 1, 0, NULL); + SET_EVENT (&m_sReleaseBufferEvent); + for (int32_t i = 0; i < m_iThreadCount; ++i) { + m_pDecThrCtx[i].sThreadInfo.uiThrMaxNum = m_iThreadCount; + m_pDecThrCtx[i].sThreadInfo.uiThrNum = i; + m_pDecThrCtx[i].sThreadInfo.uiThrStackSize = WELS_DEC_MAX_THREAD_STACK_SIZE; + m_pDecThrCtx[i].sThreadInfo.pThrProcMain = pThrProcFrame; + m_pDecThrCtx[i].sThreadInfo.sIsBusy = &m_sIsBusy; + m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN; + m_pDecThrCtx[i].threadCtxOwner = this; + m_pDecThrCtx[i].kpSrc = NULL; + m_pDecThrCtx[i].kiSrcLen = 0; + m_pDecThrCtx[i].ppDst = NULL; + m_pDecThrCtx[i].pDec = NULL; + CREATE_EVENT (&m_pDecThrCtx[i].sImageReady, 1, 0, NULL); + CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart, 1, 0, NULL); + CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinish, 1, 0, NULL); + CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, 0, 1, NULL); + CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated, 0, 1, NULL); + CREATE_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle, pThrProcInit, (void*) (& (m_pDecThrCtx[i]))); + } + } +} +void CWelsDecoder::CloseDecoderThreads() { + if (m_iThreadCount >= 1) { + for (int32_t i = 0; i < m_iThreadCount; i++) { //waiting the completion begun slices + WAIT_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_ABORT; + RELEASE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated); + WAIT_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle); + CLOSE_EVENT (&m_pDecThrCtx[i].sImageReady); + CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart); + CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinish); + CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle); + CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated); + } + WelsMutexDestroy (&m_csDecoder); + CLOSE_EVENT (&m_sBufferingEvent); + CLOSE_EVENT (&m_sReleaseBufferEvent); + CLOSE_SEMAPHORE (&m_sIsBusy); + } +} + +void CWelsDecoder::UninitDecoderCtx (PWelsDecoderContext& pCtx) { + if (pCtx != NULL) { + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoderCtx(), openh264 codec version = %s.", + VERSION_NUMBER); + + WelsEndDecoder (pCtx); + + if (pCtx->pMemAlign != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..", + pCtx->pMemAlign->WelsGetMemoryUsage()); + delete pCtx->pMemAlign; + pCtx->pMemAlign = NULL; + } + + if (NULL != pCtx) { + WelsFree (pCtx, "m_pDecContext"); + + pCtx = NULL; + } + if (m_iCtxCount <= 1) m_pDecThrCtx[0].pCtx = NULL; + } +} + +// the return value of this function is not suitable, it need report failure info to upper layer. +int32_t CWelsDecoder::InitDecoder (const SDecodingParam* pParam) { + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d", + VERSION_NUMBER, (int32_t)pParam->bParseOnly); + if (m_iThreadCount >= 1 && pParam->bParseOnly) { + m_iThreadCount = 0; + } + OpenDecoderThreads(); + //reset decoder context + memset (&m_sDecoderStatistics, 0, sizeof (SDecoderStatistics)); + memset (&m_sLastDecPicInfo, 0, sizeof (SWelsLastDecPicInfo)); + memset (&m_sVlcTable, 0, sizeof (SVlcTable)); + UninitDecoder(); + WelsDecoderLastDecPicInfoDefaults (m_sLastDecPicInfo); + for (int32_t i = 0; i < m_iCtxCount; ++i) { + InitDecoderCtx (m_pDecThrCtx[i].pCtx, pParam); + if (m_iThreadCount >= 1) { + m_pDecThrCtx[i].pCtx->pThreadCtx = &m_pDecThrCtx[i]; + } + } + m_bParamSetsLostFlag = false; + m_bFreezeOutput = false; + return cmResultSuccess; +} + +// the return value of this function is not suitable, it need report failure info to upper layer. +int32_t CWelsDecoder::InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam) { + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d", + VERSION_NUMBER, (int32_t)pParam->bParseOnly); + + //reset decoder context + UninitDecoderCtx (pCtx); + pCtx = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext"); + if (NULL == pCtx) + return cmMallocMemeError; + int32_t iCacheLineSize = 16; // on chip cache line size in byte + pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize); + WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pMemAlign), UninitDecoderCtx (pCtx)) + if (m_iCtxCount <= 1) m_pDecThrCtx[0].pCtx = pCtx; + //fill in default value into context + pCtx->pLastDecPicInfo = &m_sLastDecPicInfo; + pCtx->pDecoderStatistics = &m_sDecoderStatistics; + pCtx->pVlcTable = &m_sVlcTable; + pCtx->pPictInfoList = m_sPictInfoList; + pCtx->pPictReoderingStatus = &m_sReoderingStatus; + pCtx->pCsDecoder = &m_csDecoder; + WelsDecoderDefaults (pCtx, &m_pWelsTrace->m_sLogCtx); + WelsDecoderSpsPpsDefaults (pCtx->sSpsPpsCtx); + //check param and update decoder context + pCtx->pParam = (SDecodingParam*)pCtx->pMemAlign->WelsMallocz (sizeof (SDecodingParam), + "SDecodingParam"); + WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pParam), UninitDecoderCtx (pCtx)); + int32_t iRet = DecoderConfigParam (pCtx, pParam); + WELS_VERIFY_RETURN_IFNEQ (iRet, cmResultSuccess); + + //init decoder + WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (pCtx, &m_pWelsTrace->m_sLogCtx), + UninitDecoderCtx (pCtx)) + pCtx->pPicBuff = NULL; + return cmResultSuccess; +} + +int32_t CWelsDecoder::ResetDecoder (PWelsDecoderContext& pCtx) { + // TBC: need to be modified when context and trace point are null + if (m_iThreadCount >= 1) { + ThreadResetDecoder (pCtx); + } else { + if (pCtx != NULL && m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", + pCtx->iErrorCode); + SDecodingParam sPrevParam; + memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam)); + + WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoderCtx (pCtx, &sPrevParam), + UninitDecoderCtx (pCtx)); + } else if (m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null"); + } + ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, false); + if (pCtx->pDstInfo) pCtx->pDstInfo->iBufferStatus = 0; + } + return ERR_INFO_UNINIT; +} + +int32_t CWelsDecoder::ThreadResetDecoder (PWelsDecoderContext& pCtx) { + // TBC: need to be modified when context and trace point are null + SDecodingParam sPrevParam; + if (pCtx != NULL && m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", pCtx->iErrorCode); + memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam)); + ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true); + if (pCtx->pDstInfo) pCtx->pDstInfo->iBufferStatus = 0; + CloseDecoderThreads(); + UninitDecoder(); + InitDecoder (&sPrevParam); + } else if (m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null"); + } + return ERR_INFO_UNINIT; +} + +/* + * Set Option + */ +long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) { + int iVal = 0; + if (eOptID == DECODER_OPTION_NUM_OF_THREADS) { + if (pOption != NULL) { + int32_t threadCount = * ((int32_t*)pOption); + if (threadCount < 0) threadCount = 0; + if (threadCount > m_iCpuCount) { + threadCount = m_iCpuCount; + } + if (threadCount > 3) { + threadCount = 3; + } + if (threadCount != m_iThreadCount) { + m_iThreadCount = threadCount; + if (m_pDecThrCtx != NULL) { + delete [] m_pDecThrCtx; + m_iCtxCount = m_iThreadCount == 0 ? 1 : m_iThreadCount; + m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount]; + memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount); + } + } + } + return cmResultSuccess; + } + for (int32_t i = 0; i < m_iCtxCount; ++i) { + PWelsDecoderContext pDecContext = m_pDecThrCtx[i].pCtx; + if (pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL && + eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT) + return dsInitialOptExpected; + if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded + if (pOption == NULL) + return cmInitParaError; + + iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag + + if (pDecContext == NULL) return dsInitialOptExpected; + + pDecContext->bEndOfStreamFlag = iVal ? true : false; + + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status + if (pOption == NULL) + return cmInitParaError; + + if (pDecContext == NULL) return dsInitialOptExpected; + + iVal = * ((int*)pOption); // int value for error concealment idc + iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE); + if ((pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal); + return cmInitParaError; + } + + pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal; + InitErrorCon (pDecContext); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal); + + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) { + if (m_pWelsTrace) { + uint32_t level = * ((uint32_t*)pOption); + m_pWelsTrace->SetTraceLevel (level); + } + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) { + if (m_pWelsTrace) { + WelsTraceCallback callback = * ((WelsTraceCallback*)pOption); + m_pWelsTrace->SetTraceCallback (callback); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.", + callback); + } + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) { + if (m_pWelsTrace) { + void* ctx = * ((void**)pOption); + m_pWelsTrace->SetTraceCallbackContext (ctx); + } + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_GET_STATISTICS) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!"); + return cmInitParaError; + } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) { + if (pOption) { + if (pDecContext == NULL) return dsInitialOptExpected; + pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption)); + return cmResultSuccess; + } + } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!"); + return cmInitParaError; + } + } + return cmInitParaError; +} + +/* + * Get Option + */ +long CWelsDecoder::GetOption (DECODER_OPTION eOptID, void* pOption) { + int iVal = 0; + if (DECODER_OPTION_NUM_OF_THREADS == eOptID) { + * ((int*)pOption) = m_iThreadCount; + return cmResultSuccess; + } + PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx; + if (pDecContext == NULL) + return cmInitExpected; + + if (pOption == NULL) + return cmInitParaError; + + if (DECODER_OPTION_END_OF_STREAM == eOptID) { + iVal = pDecContext->bEndOfStreamFlag; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } +#ifdef LONG_TERM_REF + else if (DECODER_OPTION_IDR_PIC_ID == eOptID) { + iVal = pDecContext->uiCurIdrPicId; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_FRAME_NUM == eOptID) { + iVal = pDecContext->iFrameNum; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_LTR_MARKING_FLAG == eOptID) { + iVal = pDecContext->bCurAuContainLtrMarkSeFlag; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_LTR_MARKED_FRAME_NUM == eOptID) { + iVal = pDecContext->iFrameNumOfAuMarkedLtr; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } +#endif + else if (DECODER_OPTION_VCL_NAL == eOptID) { //feedback whether or not have VCL NAL in current AU + iVal = pDecContext->iFeedbackVclNalInAu; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_TEMPORAL_ID == eOptID) { //if have VCL NAL in current AU, then feedback the temporal ID + iVal = pDecContext->iFeedbackTidInAu; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_IS_REF_PIC == eOptID) { + iVal = pDecContext->iFeedbackNalRefIdc; + if (iVal > 0) + iVal = 1; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_ERROR_CON_IDC == eOptID) { + iVal = (int)pDecContext->pParam->eEcActiveIdc; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_GET_STATISTICS == eOptID) { // get decoder statistics info for real time debugging + SDecoderStatistics* pDecoderStatistics = (static_cast (pOption)); + + memcpy (pDecoderStatistics, pDecContext->pDecoderStatistics, sizeof (SDecoderStatistics)); + + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount != 0) { //not original status + pDecoderStatistics->fAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) / + (pDecContext->pDecoderStatistics->uiDecodedFrameCount); + pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) / + (pDecContext->pDecoderStatistics->uiDecodedFrameCount + pDecContext->pDecoderStatistics->uiFreezingIDRNum + + pDecContext->pDecoderStatistics->uiFreezingNonIDRNum); + } + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) { + if (pOption) { + iVal = pDecContext->pDecoderStatistics->iStatisticsLogInterval; + * ((unsigned int*)pOption) = iVal; + return cmResultSuccess; + } + } else if (DECODER_OPTION_GET_SAR_INFO == eOptID) { //get decoder SAR info in VUI + PVuiSarInfo pVuiSarInfo = (static_cast (pOption)); + memset (pVuiSarInfo, 0, sizeof (SVuiSarInfo)); + if (!pDecContext->pSps) { + return cmInitExpected; + } else { + pVuiSarInfo->uiSarWidth = pDecContext->pSps->sVui.uiSarWidth; + pVuiSarInfo->uiSarHeight = pDecContext->pSps->sVui.uiSarHeight; + pVuiSarInfo->bOverscanAppropriateFlag = pDecContext->pSps->sVui.bOverscanAppropriateFlag; + return cmResultSuccess; + } + } else if (DECODER_OPTION_PROFILE == eOptID) { + if (!pDecContext->pSps) { + return cmInitExpected; + } + iVal = (int)pDecContext->pSps->uiProfileIdc; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_LEVEL == eOptID) { + if (!pDecContext->pSps) { + return cmInitExpected; + } + iVal = (int)pDecContext->pSps->uiLevelIdc; + * ((int*)pOption) = iVal; + return cmResultSuccess; + } else if (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER == eOptID) { + for (int32_t activeThread = 0; activeThread < m_DecCtxActiveCount; ++activeThread) { + WAIT_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + RELEASE_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle); + } + * ((int*)pOption) = m_sReoderingStatus.iNumOfPicts; + return cmResultSuccess; + } + + return cmInitParaError; +} + +DECODING_STATE CWelsDecoder::DecodeFrameNoDelay (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo) { + int iRet = dsErrorFree; + if (m_iThreadCount >= 1) { + iRet = ThreadDecodeFrameInternal (kpSrc, kiSrcLen, ppDst, pDstInfo); + if (m_sReoderingStatus.iNumOfPicts) { + WAIT_EVENT (&m_sBufferingEvent, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&m_sReleaseBufferEvent); + ReleaseBufferedReadyPicture (NULL, ppDst, pDstInfo); + SET_EVENT (&m_sReleaseBufferEvent); + } + return (DECODING_STATE)iRet; + } + //SBufferInfo sTmpBufferInfo; + //unsigned char* ppTmpDst[3] = {NULL, NULL, NULL}; + iRet = (int)DecodeFrame2 (kpSrc, kiSrcLen, ppDst, pDstInfo); + //memcpy (&sTmpBufferInfo, pDstInfo, sizeof (SBufferInfo)); + //ppTmpDst[0] = ppDst[0]; + //ppTmpDst[1] = ppDst[1]; + //ppTmpDst[2] = ppDst[2]; + iRet |= DecodeFrame2 (NULL, 0, ppDst, pDstInfo); + //if ((pDstInfo->iBufferStatus == 0) && (sTmpBufferInfo.iBufferStatus == 1)) { + //memcpy (pDstInfo, &sTmpBufferInfo, sizeof (SBufferInfo)); + //ppDst[0] = ppTmpDst[0]; + //ppDst[1] = ppTmpDst[1]; + //ppDst[2] = ppTmpDst[2]; + //} + return (DECODING_STATE)iRet; +} + +DECODING_STATE CWelsDecoder::DecodeFrame2WithCtx (PWelsDecoderContext pDecContext, const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo) { + if (pDecContext == NULL || pDecContext->pParam == NULL) { + if (m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeFrame2 without Initialize.\n"); + } + return dsInitialOptExpected; + } + + if (pDecContext->pParam->bParseOnly) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be false for this API calling! \n"); + pDecContext->iErrorCode |= dsInvalidArgument; + return dsInvalidArgument; + } + if (CheckBsBuffer (pDecContext, kiSrcLen)) { + if (ResetDecoder (pDecContext)) + return dsOutOfMemory; + + return dsErrorFree; + } + if (kiSrcLen > 0 && kpSrc != NULL) { +#ifdef OUTPUT_BIT_STREAM + if (m_pFBS) { + WelsFwrite (kpSrc, sizeof (unsigned char), kiSrcLen, m_pFBS); + WelsFflush (m_pFBS); + } + if (m_pFBSSize) { + WelsFwrite (&kiSrcLen, sizeof (int), 1, m_pFBSSize); + WelsFflush (m_pFBSSize); + } +#endif//OUTPUT_BIT_STREAM + pDecContext->bEndOfStreamFlag = false; + if (GetThreadCount (pDecContext) <= 0) { + pDecContext->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp; + } + } else { + //For application MODE, the error detection should be added for safe. + //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL. + pDecContext->bEndOfStreamFlag = true; + pDecContext->bInstantDecFlag = true; + } + + int64_t iStart, iEnd; + iStart = WelsTime(); + + if (GetThreadCount (pDecContext) <= 1) { + ppDst[0] = ppDst[1] = ppDst[2] = NULL; + } + pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding. + pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize + unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp; + if (GetThreadCount (pDecContext) <= 1) { + memset (pDstInfo, 0, sizeof (SBufferInfo)); + } + pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp; +#ifdef LONG_TERM_REF + pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR + pDecContext->bCurAuContainLtrMarkSeFlag = false; + pDecContext->iFrameNumOfAuMarkedLtr = 0; + pDecContext->iFrameNum = -1; //initialize +#endif + + pDecContext->iFeedbackTidInAu = -1; //initialize + pDecContext->iFeedbackNalRefIdc = -1; //initialize + if (pDstInfo) { + pDstInfo->uiOutYuvTimeStamp = 0; + pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp; + } else { + pDecContext->uiTimeStamp = 0; + } + WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, ppDst, + pDstInfo, NULL); //iErrorCode has been modified in this function + pDecContext->bInstantDecFlag = false; //reset no-delay flag + if (pDecContext->iErrorCode) { + EWelsNalUnitType eNalType = + NAL_UNIT_UNSPEC_0; //for NBR, IDR frames are expected to decode as followed if error decoding an IDR currently + + eNalType = pDecContext->sCurNalHead.eNalUnitType; + if (pDecContext->iErrorCode & dsOutOfMemory) { + if (ResetDecoder (pDecContext)) { + return dsOutOfMemory; + } + return dsErrorFree; + } + if (pDecContext->iErrorCode & dsRefListNullPtrs) { + if (ResetDecoder (pDecContext)) { + return dsRefListNullPtrs; + } + return dsErrorFree; + } + //for AVC bitstream (excluding AVC with temporal scalability, including TP), as long as error occur, SHOULD notify upper layer key frame loss. + if ((IS_PARAM_SETS_NALS (eNalType) || NAL_UNIT_CODED_SLICE_IDR == eNalType) || + (VIDEO_BITSTREAM_AVC == pDecContext->eVideoType)) { + if (pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { +#ifdef LONG_TERM_REF + pDecContext->bParamSetsLostFlag = true; +#else + pDecContext->bReferenceLostAtT0Flag = true; +#endif + } + } + + if (pDecContext->bPrintFrameErrorTraceFlag) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", + pDecContext->iErrorCode); + pDecContext->bPrintFrameErrorTraceFlag = false; + } else { + pDecContext->iIgnoredErrorInfoPacketCount++; + if (pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, "continuous error reached INT_MAX! Restart as 0."); + pDecContext->iIgnoredErrorInfoPacketCount = 0; + } + } + if ((pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) { + //TODO after dec status updated + pDecContext->iErrorCode |= dsDataErrorConcealed; + + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t + ResetDecStatNums (pDecContext->pDecoderStatistics); + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + } + int32_t iMbConcealedNum = pDecContext->iMbEcedNum + pDecContext->iMbEcedPropNum; + pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->iMbNum == 0 ? + (pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : (( + pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + (( + iMbConcealedNum * 100) / pDecContext->iMbNum)); + pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->iMbNum == 0 ? + (pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : (( + pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + (( + pDecContext->iMbEcedPropNum * 100) / pDecContext->iMbNum)); + pDecContext->pDecoderStatistics->uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1); + pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 : + pDecContext->pDecoderStatistics->uiAvgEcRatio / pDecContext->pDecoderStatistics->uiEcFrameNum; + pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 : + pDecContext->pDecoderStatistics->uiAvgEcPropRatio / pDecContext->pDecoderStatistics->uiEcFrameNum; + } + iEnd = WelsTime(); + pDecContext->dDecTime += (iEnd - iStart) / 1e3; + + OutputStatisticsLog (*pDecContext->pDecoderStatistics); + + if (GetThreadCount (pDecContext) >= 1) { + WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&m_sBufferingEvent); + BufferingReadyPicture (pDecContext, ppDst, pDstInfo); + SET_EVENT (&m_sBufferingEvent); + } else { + ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo); + } + + return (DECODING_STATE)pDecContext->iErrorCode; + } + // else Error free, the current codec works well + + if (pDstInfo->iBufferStatus == 1) { + + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t + ResetDecStatNums (pDecContext->pDecoderStatistics); + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + } + + OutputStatisticsLog (*pDecContext->pDecoderStatistics); + } + iEnd = WelsTime(); + pDecContext->dDecTime += (iEnd - iStart) / 1e3; + + if (GetThreadCount (pDecContext) >= 1) { + WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&m_sBufferingEvent); + BufferingReadyPicture (pDecContext, ppDst, pDstInfo); + SET_EVENT (&m_sBufferingEvent); + } else { + ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo); + } + return dsErrorFree; +} + +DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo) { + PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx; + pDecContext->pDstInfo = pDstInfo; + return DecodeFrame2WithCtx (pDecContext, kpSrc, kiSrcLen, ppDst, pDstInfo); +} + +DECODING_STATE CWelsDecoder::FlushFrame (unsigned char** ppDst, + SBufferInfo* pDstInfo) { + bool bEndOfStreamFlag = true; + if (m_iThreadCount <= 1) { + for (int32_t j = 0; j < m_iCtxCount; ++j) { + if (!m_pDecThrCtx[j].pCtx->bEndOfStreamFlag) { + bEndOfStreamFlag = false; + } + } + } + if (bEndOfStreamFlag && m_sReoderingStatus.iNumOfPicts > 0) { + m_sReoderingStatus.iMinPOC = IMinInt32; + if (m_bIsBaseline) { + uint32_t uiDecodingTimeStamp = 0; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; + } + } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].uiDecodingTimeStamp < uiDecodingTimeStamp) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + } + } + } else { + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; + } + } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + } + } + } + } + if (m_sReoderingStatus.iMinPOC > IMinInt32) { + m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC; +#if defined (_DEBUG) +#ifdef _MOTION_VECTOR_DUMP_ + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC, + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp); +#endif +#endif + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicBuff pPicBuff = m_iThreadCount <= 1 ? m_pDecThrCtx[0].pCtx->pPicBuff : m_pPicBuff; + if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < pPicBuff->iCapacity) { + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + } + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false; + m_sReoderingStatus.iMinPOC = IMinInt32; + --m_sReoderingStatus.iNumOfPicts; + } + + return dsErrorFree; +} + +void CWelsDecoder::OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics) { + if ((sDecoderStatistics.uiDecodedFrameCount > 0) && (sDecoderStatistics.iStatisticsLogInterval > 0) + && ((sDecoderStatistics.uiDecodedFrameCount % sDecoderStatistics.iStatisticsLogInterval) == 0)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "DecoderStatistics: uiWidth=%d, uiHeight=%d, fAverageFrameSpeedInMs=%.1f, fActualAverageFrameSpeedInMs=%.1f, \ + uiDecodedFrameCount=%d, uiResolutionChangeTimes=%d, uiIDRCorrectNum=%d, \ + uiAvgEcRatio=%d, uiAvgEcPropRatio=%d, uiEcIDRNum=%d, uiEcFrameNum=%d, \ + uiIDRLostNum=%d, uiFreezingIDRNum=%d, uiFreezingNonIDRNum=%d, iAvgLumaQp=%d, \ + iSpsReportErrorNum=%d, iSubSpsReportErrorNum=%d, iPpsReportErrorNum=%d, iSpsNoExistNalNum=%d, iSubSpsNoExistNalNum=%d, iPpsNoExistNalNum=%d, \ + uiProfile=%d, uiLevel=%d, \ + iCurrentActiveSpsId=%d, iCurrentActivePpsId=%d,", + sDecoderStatistics.uiWidth, + sDecoderStatistics.uiHeight, + sDecoderStatistics.fAverageFrameSpeedInMs, + sDecoderStatistics.fActualAverageFrameSpeedInMs, + + sDecoderStatistics.uiDecodedFrameCount, + sDecoderStatistics.uiResolutionChangeTimes, + sDecoderStatistics.uiIDRCorrectNum, + + sDecoderStatistics.uiAvgEcRatio, + sDecoderStatistics.uiAvgEcPropRatio, + sDecoderStatistics.uiEcIDRNum, + sDecoderStatistics.uiEcFrameNum, + + sDecoderStatistics.uiIDRLostNum, + sDecoderStatistics.uiFreezingIDRNum, + sDecoderStatistics.uiFreezingNonIDRNum, + sDecoderStatistics.iAvgLumaQp, + + sDecoderStatistics.iSpsReportErrorNum, + sDecoderStatistics.iSubSpsReportErrorNum, + sDecoderStatistics.iPpsReportErrorNum, + sDecoderStatistics.iSpsNoExistNalNum, + sDecoderStatistics.iSubSpsNoExistNalNum, + sDecoderStatistics.iPpsNoExistNalNum, + + sDecoderStatistics.uiProfile, + sDecoderStatistics.uiLevel, + + sDecoderStatistics.iCurrentActiveSpsId, + sDecoderStatistics.iCurrentActivePpsId); + } +} + +void CWelsDecoder::BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + if (pDstInfo->iBufferStatus == 0) { + return; + } + m_bIsBaseline = pCtx->pSps->uiProfileIdc == 66 || pCtx->pSps->uiProfileIdc == 83; + if (!m_bIsBaseline) { + if (m_sReoderingStatus.iNumOfPicts && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb + && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) { + m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts; + + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + m_sPictInfoList[i].bLastGOP = true; + } + } + } else { + if (m_sReoderingStatus.iNumOfPicts > 0) { + //This can happen when decoder moves to next GOP without being able to decoder first picture PicOrderCntLsb = 0 + bool hasGOPChanged = false; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC == pCtx->pSliceHeader->iPicOrderCntLsb) { + hasGOPChanged = true; + break; + } + } + if (hasGOPChanged) { + m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + m_sPictInfoList[i].bLastGOP = true; + } + } + } + } + } + } + for (int32_t i = 0; i < 16; ++i) { + if (m_sPictInfoList[i].iPOC == IMinInt32) { + memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo)); + m_sPictInfoList[i].iPOC = pCtx->pSliceHeader->iPicOrderCntLsb; + m_sPictInfoList[i].uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp; + m_sPictInfoList[i].iPicBuffIdx = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx; + if (GetThreadCount (pCtx) <= 1) ++pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iRefCount; + m_sPictInfoList[i].bLastGOP = false; + m_iLastBufferedIdx = i; + pDstInfo->iBufferStatus = 0; + ++m_sReoderingStatus.iNumOfPicts; + if (i > m_sReoderingStatus.iLargestBufferedPicIndex) { + m_sReoderingStatus.iLargestBufferedPicIndex = i; + } + break; + } + } +} + +void CWelsDecoder::ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + PPicBuff pPicBuff = pCtx ? pCtx->pPicBuff : m_pPicBuff; + if (pCtx == NULL && m_iThreadCount <= 1) { + pCtx = m_pDecThrCtx[0].pCtx; + } + if (!m_bIsBaseline && m_sReoderingStatus.iLastGOPRemainPicts > 0) { + m_sReoderingStatus.iMinPOC = IMinInt32; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].bLastGOP) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; + } + } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC + && m_sPictInfoList[i].bLastGOP) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + } + } + m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC; +#if defined (_DEBUG) +#ifdef _MOTION_VECTOR_DUMP_ + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC, + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp); +#endif +#endif + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false; + m_sReoderingStatus.iMinPOC = IMinInt32; + --m_sReoderingStatus.iNumOfPicts; + --m_sReoderingStatus.iLastGOPRemainPicts; + if (m_sReoderingStatus.iLastGOPRemainPicts == 0) { + m_sReoderingStatus.iLastWrittenPOC = IMinInt32; + } + return; + } + if (m_sReoderingStatus.iNumOfPicts && m_bIsBaseline) { + uint32_t uiDecodingTimeStamp = 0; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; + } + } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].uiDecodingTimeStamp < uiDecodingTimeStamp) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iPictInfoIndex = i; + } + } + if (uiDecodingTimeStamp > 0) { +#if defined (_DEBUG) +#ifdef _MOTION_VECTOR_DUMP_ + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC, + uiDecodingTimeStamp); +#endif +#endif + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + --m_sReoderingStatus.iNumOfPicts; + } + return; + } + if (m_sReoderingStatus.iNumOfPicts > 0) { + m_sReoderingStatus.iMinPOC = IMinInt32; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; + } + } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + } + } + } + if (m_sReoderingStatus.iMinPOC > IMinInt32) { + int32_t iLastPOC = pCtx != NULL ? pCtx->pSliceHeader->iPicOrderCntLsb : m_sPictInfoList[m_iLastBufferedIdx].iPOC; + bool isReady = (m_sReoderingStatus.iLastWrittenPOC > IMinInt32 + && m_sReoderingStatus.iMinPOC - m_sReoderingStatus.iLastWrittenPOC <= 1) + || m_sReoderingStatus.iMinPOC < iLastPOC; + if (isReady) { + m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC; +#if defined (_DEBUG) +#ifdef _MOTION_VECTOR_DUMP_ + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC, + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp); +#endif +#endif + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false; + m_sReoderingStatus.iMinPOC = IMinInt32; + --m_sReoderingStatus.iNumOfPicts; + } + } +} + +DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (PWelsDecoderContext pDecContext, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + DECODING_STATE iRet = dsErrorFree; + if ((pDstInfo->iBufferStatus == 1) && (pDecContext->pPps->bEntropyCodingModeFlag)) { + m_bIsBaseline = pDecContext->pSps->uiProfileIdc == 66 || pDecContext->pSps->uiProfileIdc == 83; + if (!m_bIsBaseline) { + BufferingReadyPicture (pDecContext, ppDst, pDstInfo); + ReleaseBufferedReadyPicture (pDecContext, ppDst, pDstInfo); + } + } + return iRet; +} + +DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc, const int kiSrcLen, SParserBsInfo* pDstInfo) { + PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx; + + if (pDecContext == NULL || pDecContext->pParam == NULL) { + if (m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeParser without Initialize.\n"); + } + return dsInitialOptExpected; + } + + if (!pDecContext->pParam->bParseOnly) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be true for this API calling! \n"); + pDecContext->iErrorCode |= dsInvalidArgument; + return dsInvalidArgument; + } + int64_t iEnd, iStart = WelsTime(); + if (CheckBsBuffer (pDecContext, kiSrcLen)) { + if (ResetDecoder (pDecContext)) + return dsOutOfMemory; + + return dsErrorFree; + } + if (kiSrcLen > 0 && kpSrc != NULL) { +#ifdef OUTPUT_BITSTREAM + if (m_pFBS) { + WelsFwrite (kpSrc, sizeof (unsigned char), kiSrcLen, m_pFBS); + WelsFflush (m_pFBS); + } +#endif//OUTPUT_BIT_STREAM + pDecContext->bEndOfStreamFlag = false; + } else { + //For application MODE, the error detection should be added for safe. + //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL. + pDecContext->bEndOfStreamFlag = true; + pDecContext->bInstantDecFlag = true; + } + + pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding. + pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here. + pDecContext->iFeedbackNalRefIdc = -1; //initialize + if (!pDecContext->bFramePending) { //frame complete + pDecContext->pParserBsInfo->iNalNum = 0; + memset (pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER); + } + pDstInfo->iNalNum = 0; + pDstInfo->iSpsWidthInPixel = pDstInfo->iSpsHeightInPixel = 0; + if (pDstInfo) { + pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp; + pDstInfo->uiOutBsTimeStamp = 0; + } else { + pDecContext->uiTimeStamp = 0; + } + WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo); + if (pDecContext->iErrorCode & dsOutOfMemory) { + if (ResetDecoder (pDecContext)) + return dsOutOfMemory; + return dsErrorFree; + } + + if (!pDecContext->bFramePending && pDecContext->pParserBsInfo->iNalNum) { + memcpy (pDstInfo, pDecContext->pParserBsInfo, sizeof (SParserBsInfo)); + + if (pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t + ResetDecStatNums (pDecContext->pDecoderStatistics); + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + } + } + } + + pDecContext->bInstantDecFlag = false; //reset no-delay flag + + if (pDecContext->iErrorCode && pDecContext->bPrintFrameErrorTraceFlag) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", pDecContext->iErrorCode); + pDecContext->bPrintFrameErrorTraceFlag = false; + } + iEnd = WelsTime(); + pDecContext->dDecTime += (iEnd - iStart) / 1e3; + return (DECODING_STATE)pDecContext->iErrorCode; +} + +DECODING_STATE CWelsDecoder::DecodeFrame (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + int* pStride, + int& iWidth, + int& iHeight) { + DECODING_STATE eDecState = dsErrorFree; + SBufferInfo DstInfo; + + memset (&DstInfo, 0, sizeof (SBufferInfo)); + DstInfo.UsrData.sSystemBuffer.iStride[0] = pStride[0]; + DstInfo.UsrData.sSystemBuffer.iStride[1] = pStride[1]; + DstInfo.UsrData.sSystemBuffer.iWidth = iWidth; + DstInfo.UsrData.sSystemBuffer.iHeight = iHeight; + + eDecState = DecodeFrame2 (kpSrc, kiSrcLen, ppDst, &DstInfo); + if (eDecState == dsErrorFree) { + pStride[0] = DstInfo.UsrData.sSystemBuffer.iStride[0]; + pStride[1] = DstInfo.UsrData.sSystemBuffer.iStride[1]; + iWidth = DstInfo.UsrData.sSystemBuffer.iWidth; + iHeight = DstInfo.UsrData.sSystemBuffer.iHeight; + } + + return eDecState; +} + +DECODING_STATE CWelsDecoder::DecodeFrameEx (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char* pDst, + int iDstStride, + int& iDstLen, + int& iWidth, + int& iHeight, + int& iColorFormat) { + DECODING_STATE state = dsErrorFree; + + return state; +} + +DECODING_STATE CWelsDecoder::ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx) { + sThreadCtx.pCtx->bHasNewSps = false; + sThreadCtx.pCtx->bParamSetsLostFlag = m_bParamSetsLostFlag; + sThreadCtx.pCtx->bFreezeOutput = m_bFreezeOutput; + sThreadCtx.pCtx->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp; + bool bPicBuffChanged = false; + if (m_pLastDecThrCtx != NULL && sThreadCtx.pCtx->sSpsPpsCtx.iSeqId < m_pLastDecThrCtx->pCtx->sSpsPpsCtx.iSeqId) { + CopySpsPps (m_pLastDecThrCtx->pCtx, sThreadCtx.pCtx); + sThreadCtx.pCtx->iPicQueueNumber = m_pLastDecThrCtx->pCtx->iPicQueueNumber; + if (sThreadCtx.pCtx->pPicBuff != m_pPicBuff) { + bPicBuffChanged = true; + sThreadCtx.pCtx->pPicBuff = m_pPicBuff; + sThreadCtx.pCtx->bHaveGotMemory = m_pPicBuff != NULL; + sThreadCtx.pCtx->iImgWidthInPixel = m_pLastDecThrCtx->pCtx->iImgWidthInPixel; + sThreadCtx.pCtx->iImgHeightInPixel = m_pLastDecThrCtx->pCtx->iImgHeightInPixel; + } + } + + //if threadCount > 1, then each thread must contain exact one complete frame. + if (GetThreadCount (sThreadCtx.pCtx) > 1) { + sThreadCtx.pCtx->pAccessUnitList->uiAvailUnitsNum = 0; + sThreadCtx.pCtx->pAccessUnitList->uiActualUnitsNum = 0; + } + + int32_t iRet = DecodeFrame2WithCtx (sThreadCtx.pCtx, sThreadCtx.kpSrc, sThreadCtx.kiSrcLen, sThreadCtx.ppDst, + &sThreadCtx.sDstInfo); + + int32_t iErr = InitConstructAccessUnit (sThreadCtx.pCtx, &sThreadCtx.sDstInfo); + if (ERR_NONE != iErr) { + return (DECODING_STATE) (iRet | iErr); + } + if (sThreadCtx.pCtx->bNewSeqBegin) { + m_pPicBuff = sThreadCtx.pCtx->pPicBuff; + } else if (bPicBuffChanged) { + InitialDqLayersContext (sThreadCtx.pCtx, sThreadCtx.pCtx->pSps->iMbWidth << 4, sThreadCtx.pCtx->pSps->iMbHeight << 4); + } + if (!sThreadCtx.pCtx->bNewSeqBegin && m_pLastDecThrCtx != NULL) { + sThreadCtx.pCtx->sFrameCrop = m_pLastDecThrCtx->pCtx->pSps->sFrameCrop; + } + m_bParamSetsLostFlag = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bParamSetsLostFlag; + m_bFreezeOutput = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bFreezeOutput; + return (DECODING_STATE)iErr; +} +/* +* Run decoding picture in separate thread. +*/ + +int CWelsDecoder::ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + int state = dsErrorFree; + int32_t i, j; + int32_t signal = 0; + + //serial using of threads + if (m_DecCtxActiveCount < m_iThreadCount) { + signal = m_DecCtxActiveCount; + } else { + signal = m_pDecThrCtxActive[0]->sThreadInfo.uiThrNum; + } + + WAIT_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + + for (i = 0; i < m_DecCtxActiveCount; ++i) { + if (m_pDecThrCtxActive[i] == &m_pDecThrCtx[signal]) { + m_pDecThrCtxActive[i] = NULL; + for (j = i; j < m_DecCtxActiveCount - 1; j++) { + m_pDecThrCtxActive[j] = m_pDecThrCtxActive[j + 1]; + m_pDecThrCtxActive[j + 1] = NULL; + } + --m_DecCtxActiveCount; + break; + } + } + + m_pDecThrCtxActive[m_DecCtxActiveCount++] = &m_pDecThrCtx[signal]; + if (m_pLastDecThrCtx != NULL) { + m_pDecThrCtx[signal].pCtx->pLastThreadCtx = m_pLastDecThrCtx; + } + m_pDecThrCtx[signal].kpSrc = const_cast (kpSrc); + m_pDecThrCtx[signal].kiSrcLen = kiSrcLen; + m_pDecThrCtx[signal].ppDst = ppDst; + memcpy (&m_pDecThrCtx[signal].sDstInfo, pDstInfo, sizeof (SBufferInfo)); + + ParseAccessUnit (m_pDecThrCtx[signal]); + if (m_iThreadCount > 1) { + m_pLastDecThrCtx = &m_pDecThrCtx[signal]; + } + m_pDecThrCtx[signal].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN; + RELEASE_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsActivated); + + // wait early picture + if (m_DecCtxActiveCount >= m_iThreadCount) { + WAIT_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + RELEASE_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle); + } + return state; +} + +} // namespace WelsDec + + +using namespace WelsDec; +/* +* WelsGetDecoderCapability +* @return: DecCapability information +*/ +int WelsGetDecoderCapability (SDecoderCapability* pDecCapability) { + memset (pDecCapability, 0, sizeof (SDecoderCapability)); + pDecCapability->iProfileIdc = 66; //Baseline + pDecCapability->iProfileIop = 0xE0; //11100000b + pDecCapability->iLevelIdc = 32; //level_idc = 3.2 + pDecCapability->iMaxMbps = 216000; //from level_idc = 3.2 + pDecCapability->iMaxFs = 5120; //from level_idc = 3.2 + pDecCapability->iMaxCpb = 20000; //from level_idc = 3.2 + pDecCapability->iMaxDpb = 20480; //from level_idc = 3.2 + pDecCapability->iMaxBr = 20000; //from level_idc = 3.2 + pDecCapability->bRedPicCap = 0; //not support redundant pic + + return ERR_NONE; +} +/* WINAPI is indeed in prefix due to sync to application layer callings!! */ + +/* +* WelsCreateDecoder +* @return: success in return 0, otherwise failed. +*/ +long WelsCreateDecoder (ISVCDecoder** ppDecoder) { + + if (NULL == ppDecoder) { + return ERR_INVALID_PARAMETERS; + } + + *ppDecoder = new CWelsDecoder(); + + if (NULL == *ppDecoder) { + return ERR_MALLOC_FAILED; + } + + return ERR_NONE; +} + +/* +* WelsDestroyDecoder +*/ +void WelsDestroyDecoder (ISVCDecoder* pDecoder) { + if (NULL != pDecoder) { + delete (CWelsDecoder*)pDecoder; + } +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/src/wels_dec_export.def b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/src/wels_dec_export.def new file mode 100644 index 000000000..29a37e3e2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/decoder/plus/src/wels_dec_export.def @@ -0,0 +1,4 @@ +EXPORTS + WelsGetDecoderCapability + WelsCreateDecoder + WelsDestroyDecoder diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/intra_pred_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/intra_pred_neon.S new file mode 100644 index 000000000..b06fa3660 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/intra_pred_neon.S @@ -0,0 +1,589 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON + +#include "arm_arch_common_macro.S" + +//Global macro +.macro GET_8BYTE_DATA arg0, arg1, arg2 + vld1.8 {\arg0[0]}, [\arg1], \arg2 + vld1.8 {\arg0[1]}, [\arg1], \arg2 + vld1.8 {\arg0[2]}, [\arg1], \arg2 + vld1.8 {\arg0[3]}, [\arg1], \arg2 + vld1.8 {\arg0[4]}, [\arg1], \arg2 + vld1.8 {\arg0[5]}, [\arg1], \arg2 + vld1.8 {\arg0[6]}, [\arg1], \arg2 + vld1.8 {\arg0[7]}, [\arg1], \arg2 +.endm + + +WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredDc_neon + //stmdb sp!, { r2-r5, lr} + //Get the left vertical line data + sub r3, r1, #1 + GET_8BYTE_DATA d0, r3, r2 + GET_8BYTE_DATA d1, r3, r2 + + //Get the top horizontal line data + sub r3, r1, r2 + vldm r3, {d2, d3} + + //Calculate the sum of top horizontal line data and vertical line data + vpaddl.u8 q0, q0 + vpaddl.u8 q1, q1 + vadd.u16 q0, q0, q1 + vadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + //Calculate the mean value + vrshr.u16 d0, d0, #5 + vdup.8 q0, d0[0] + + //Set the mean value to the all of member of MB + mov r3, #4 +loop_0_get_i16x16_luma_pred_dc_both: + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_dc_both + +WELS_ASM_FUNC_END + + +.align 3 +//The table for SIMD instruction {(8,7,6,5,4,3,2,1) * 5} +CONST0_GET_I16X16_LUMA_PRED_PLANE: .long 0x191e2328, 0x050a0f14 + +//The table for SIMD instruction {-7,-6,-5,-4,-3,-2,-1,0} +CONST1_GET_I16X16_LUMA_PRED_PLANE: .long 0xfcfbfaf9, 0x00fffefd + + +WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredPlane_neon + //stmdb sp!, { r4, lr} + + //Load the table {(8,7,6,5,4,3,2,1) * 5} + adr r3, CONST0_GET_I16X16_LUMA_PRED_PLANE + vldr d0, [r3] + + //Pack the top[-1] ~ top[6] to d1 + sub r3, r1, r2 + sub r1, r3, #1 + vld1.8 d1, [r1] + + //Pack the top[8] ~ top[15] to d2 + add r1, #9 + vld1.8 d2, [r1] + + //Save the top[15] to d6 for next step + vdup.u8 d6, d2[7] + + //Get and pack left[-1] ~ left[6] to d4 + sub r1, r3, #1 + GET_8BYTE_DATA d4, r1, r2 + + //Get and pack left[8] ~ left[15] to d3 + add r1, r2 + GET_8BYTE_DATA d3, r1, r2 + + //Save the left[15] to d7 for next step + vdup.u8 d7, d3[7] + + //revert the sequence of d2,d3 + vrev64.8 q1, q1 + + vsubl.u8 q2, d3, d4 //q2={left[8]-left[6],left[9]-left[5],left[10]-left[4], ...} + vsubl.u8 q1, d2, d1 //q1={top[8]-top[6],top[9]-top[5],top[10]-top[4], ...} + + + vmovl.u8 q0, d0 + vmul.s16 q1, q0, q1 //q1 = q1*{(8,7,6,5,4,3,2,1) * 5} + vmul.s16 q2, q0, q2 //q2 = q2*{(8,7,6,5,4,3,2,1) * 5} + + //Calculate the sum of items of q1, q2 + vpadd.s16 d0, d2, d3 + vpadd.s16 d1, d4, d5 + vpaddl.s16 q0, q0 + vpaddl.s32 q0, q0 + + //Get the value of 'b', 'c' and extend to q1, q2. + vrshr.s64 q0, #6 + vdup.s16 q1, d0[0] + vdup.s16 q2, d1[0] + + //Load the table {-7,-6,-5,-4,-3,-2,-1,0} to d0 + adr r3, CONST1_GET_I16X16_LUMA_PRED_PLANE + vld1.32 {d0}, [r3] + + //Get the value of 'a' and save to q3 + vaddl.u8 q3, d6, d7 + vshl.u16 q3, #4 + + //calculate a+'b'*{-7,-6,-5,-4,-3,-2,-1,0} + c*{-7} + vmovl.s8 q0, d0 + vmla.s16 q3, q0, q1 + vmla.s16 q3, q2, d0[0] + + //Calculate a+'b'*{1,2,3,4,5,6,7,8} + c*{-7} + vshl.s16 q8, q1, #3 + vadd.s16 q8, q3 + + //right shift 5 bits and rounding + vqrshrun.s16 d0, q3, #5 + vqrshrun.s16 d1, q8, #5 + + //Set the line of MB + vst1.u32 {d0,d1}, [r0]! + + + //Do the same processing for setting other lines + mov r3, #15 +loop_0_get_i16x16_luma_pred_plane: + vadd.s16 q3, q2 + vadd.s16 q8, q2 + vqrshrun.s16 d0, q3, #5 + vqrshrun.s16 d1, q8, #5 + vst1.u32 {d0,d1}, [r0]! + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_plane + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredV_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (4 bytes) + sub r3, r1, r2 + ldr r3, [r3] + + //Set the luma MB using top line + str r3, [r0], #4 + str r3, [r0], #4 + str r3, [r0], #4 + str r3, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredH_neon + //stmdb sp!, { r2-r5, lr} + //Load the left column (4 bytes) + sub r3, r1, #1 + vld1.8 {d0[]}, [r3], r2 + vld1.8 {d1[]}, [r3], r2 + vld1.8 {d2[]}, [r3], r2 + vld1.8 {d3[]}, [r3] + + //Set the luma MB using the left side byte + vst1.32 {d0[0]}, [r0]! + vst1.32 {d1[0]}, [r0]! + vst1.32 {d2[0]}, [r0]! + vst1.32 {d3[0]}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredDDL_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row data(8 bytes) + sub r3, r1, r2 + vld1.32 {d0}, [r3] + + //For "t7 + (t7<<1)" + vdup.8 d1, d0[7] + + //calculate "t0+t1,t1+t2,t2+t3...t6+t7,t7+t7" + vext.8 d1, d0, d1, #1 + vaddl.u8 q1, d1, d0 + + //calculate "x,t0+t1+t1+t2,t1+t2+t2+t3,...t5+t6+t6+t7,t6+t7+t7+t7" + vext.8 q2, q1, q1, #14 + vadd.u16 q0, q1, q2 + + //right shift 2 bits and rounding + vqrshrn.u16 d0, q0, #2 + + //Save "ddl0, ddl1, ddl2, ddl3" + vext.8 d1, d0, d0, #1 + vst1.32 d1[0], [r0]! + + //Save "ddl1, ddl2, ddl3, ddl4" + vext.8 d1, d0, d0, #2 + vst1.32 d1[0], [r0]! + + //Save "ddl2, ddl3, ddl4, ddl5" + vext.8 d1, d0, d0, #3 + vst1.32 d1[0], [r0]! + + //Save "ddl3, ddl4, ddl5, ddl6" + vst1.32 d0[1], [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredDDR_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (4 bytes) + sub r3, r1, r2 + vld1.32 {d0[1]}, [r3] + + //Load the left column (5 bytes) + sub r3, #1 + vld1.8 {d0[3]}, [r3], r2 + vld1.8 {d0[2]}, [r3], r2 + vld1.8 {d0[1]}, [r3], r2 + vld1.8 {d0[0]}, [r3], r2 + vld1.8 {d1[7]}, [r3] //For packing the right sequence to do SIMD processing + + + vext.8 d2, d1, d0, #7 //d0:{L2,L1,L0,LT,T0,T1,T2,T3} + //d2:{L3,L2,L1,L0,LT,T0,T1,T2} + + //q2:{L2+L3,L1+L2,L0+L1...T1+T2,T2+T3} + vaddl.u8 q2, d2, d0 + + //q1:{TL0+LT0,LT0+T01,...L12+L23} + vext.8 q3, q3, q2, #14 + vadd.u16 q1, q2, q3 + + //right shift 2 bits and rounding + vqrshrn.u16 d0, q1, #2 + + //Adjust the data sequence for setting luma MB of 'pred' + vst1.32 d0[1], [r0]! + vext.8 d0, d0, d0, #7 + vst1.32 d0[1], [r0]! + vext.8 d0, d0, d0, #7 + vst1.32 d0[1], [r0]! + vext.8 d0, d0, d0, #7 + vst1.32 d0[1], [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredVL_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (8 bytes) + sub r3, r1, r2 + vld1.32 {d0}, [r3] + + + vext.8 d1, d0, d0, #1 + vaddl.u8 q1, d1, d0 //q1:{t0+t1,t1+t2,t2+t3...t5+t6,x,x} + + vext.8 q2, q1, q1, #2 + vadd.u16 q2, q1, q2 //q2:{t0+t1+t1+t2,t1+t2+t2+t3,...t4+t5+t5+t6,x,x} + + //calculate the "vl0,vl1,vl2,vl3,vl4" + vqrshrn.u16 d0, q1, #1 + + //calculate the "vl5,vl6,vl7,vl8,vl9" + vqrshrn.u16 d1, q2, #2 + + //Adjust the data sequence for setting the luma MB + vst1.32 d0[0], [r0]! + vst1.32 d1[0], [r0]! + vext.8 d0, d0, d0, #1 + vext.8 d1, d1, d1, #1 + vst1.32 d0[0], [r0]! + vst1.32 d1[0], [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredVR_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row (4 bytes) + sub r3, r1, r2 + vld1.32 {d0[1]}, [r3] + + //Load the left column (4 bytes) + sub r3, #1 + vld1.8 {d0[3]}, [r3], r2 + vld1.8 {d0[2]}, [r3], r2 + vld1.8 {d0[1]}, [r3], r2 + vld1.8 {d0[0]}, [r3] + + + vext.8 d1, d0, d0, #7 + vaddl.u8 q1, d0, d1 //q1:{X,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2,T2+T3} + + vext.u8 q2, q1, q1, #14 + vadd.u16 q2, q2, q1 //q2:{X,L2+L1+L1+L0,L1+L0+L0+LT,...T1+T2+T2+T3} + + //Calculate the vr0 ~ vr9 + vqrshrn.u16 d1, q2, #2 + vqrshrn.u16 d0, q1, #1 + + //Adjust the data sequence for setting the luma MB + vst1.32 d0[1], [r0]! + vst1.32 d1[1], [r0]! + //add r2, r0, r1 + vst1.8 d1[3], [r0]! + vst1.16 d0[2], [r0]! + vst1.8 d0[6], [r0]! + vst1.8 d1[2], [r0]! + vst1.16 d1[2], [r0]! + vst1.8 d1[6], [r0] +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredHU_neon + //stmdb sp!, { r4, lr} + //Load the left column data + sub r3, r1, #1 + mov r1, #3 + mul r1, r2 + add r1, r3 + vld1.8 {d0[]}, [r1] + vld1.8 {d0[4]}, [r3], r2 + vld1.8 {d0[5]}, [r3], r2 + vld1.8 {d0[6]}, [r3], r2 //d0:{L3,L3,L3,L3,L0,L1,L2,L3} + + vext.8 d1, d0, d0, #1 + vaddl.u8 q2, d0, d1 //q2:{L3+L3,L3+L3,L3+L3,L3+L0,L0+L1,L1+L2,L2+L3,L3+L3} + + vext.u8 d2, d5, d4, #2 + vadd.u16 d3, d2, d5 //d3:{L0+L1+L1+L2,L1+L2+L2+L3,L2+L3+L3+L3,L3+L3+L3+L3} + + //Calculate the hu0 ~ hu5 + vqrshrn.u16 d2, q2, #1 + vqrshrn.u16 d1, q1, #2 + + //Adjust the data sequence for setting the luma MB + vzip.8 d2, d1 + vst1.32 d1[0], [r0]! + vext.8 d2, d1, d1, #2 + vst1.32 d2[0], [r0]! + vst1.32 d1[1], [r0]! + vst1.32 d0[0], [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredHD_neon + //stmdb sp!, { r2-r5, lr} + //Load the data + sub r3, r1, r2 + sub r3, #1 + vld1.32 {d0[1]}, [r3], r2 + vld1.8 {d0[3]}, [r3], r2 + vld1.8 {d0[2]}, [r3], r2 + vld1.8 {d0[1]}, [r3], r2 + vld1.8 {d0[0]}, [r3] //d0:{L3,L2,L1,L0,LT,T0,T1,T2} + + + vext.8 d1, d0, d0, #7 + vaddl.u8 q1, d0, d1 //q1:{x,L3+L2,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2} + + vext.u8 q2, q1, q1, #14 //q2:{x,x, L3+L2,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1} + vadd.u16 q3, q2, q1 //q3:{x,x,L3+L2+L2+L1,L2+L1+L1+L0,L1+L0+L0+LT,L0+LT+LT+T0,LT+T0+T0+T1,T0+T1+T1+T2} + + //Calculate the hd0~hd9 + vqrshrn.u16 d1, q3, #2 + vqrshrn.u16 d0, q2, #1 + + //Adjust the data sequence for setting the luma MB + vmov d3, d1 + vtrn.8 d0, d1 + vext.u8 d2, d1, d1, #6 + vst2.16 {d2[3], d3[3]}, [r0]! + vst2.16 {d0[2], d1[2]}, [r0]! + vmov d3, d0 + vst2.16 {d2[2], d3[2]}, [r0]! + vst2.16 {d0[1], d1[1]}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIChromaPredV_neon + //stmdb sp!, { r2-r5, lr} + //Get the top row (8 byte) + sub r3, r1, r2 + vldr d0, [r3] + + //Set the chroma MB using top row data + vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r0]! + vst1.8 {d0}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIChromaPredH_neon + //stmdb sp!, { r2-r5, lr} + ////Get the left column (8 byte) + sub r3, r1, #1 + vld1.8 {d0[]}, [r3], r2 + vld1.8 {d1[]}, [r3], r2 + vld1.8 {d2[]}, [r3], r2 + vld1.8 {d3[]}, [r3], r2 + vld1.8 {d4[]}, [r3], r2 + vld1.8 {d5[]}, [r3], r2 + vld1.8 {d6[]}, [r3], r2 + vld1.8 {d7[]}, [r3] + + //Set the chroma MB using left column data + vst1.8 {d0}, [r0]! + vst1.8 {d1}, [r0]! + vst1.8 {d2}, [r0]! + vst1.8 {d3}, [r0]! + vst1.8 {d4}, [r0]! + vst1.8 {d5}, [r0]! + vst1.8 {d6}, [r0]! + vst1.8 {d7}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIChromaPredDc_neon + //stmdb sp!, { r2-r5, lr} + //Load the left column data (8 bytes) + sub r3, r1, #1 + GET_8BYTE_DATA d0, r3, r2 + + //Load the top row data (8 bytes) + sub r3, r1, r2 + vldr d1, [r3] + + //Calculate the sum of left column and top row + vpaddl.u8 q0, q0 + vpaddl.u16 q0, q0 + vadd.u32 d2, d0, d1 //'m1' save to d2 + + vrshr.u32 q0, q0, #2 //calculate 'm2','m3' + vrshr.u32 d2, d2, #3 //calculate 'm4' + + //duplicate the 'mx' to a vector line + vdup.8 d4, d2[0] + vdup.8 d5, d1[4] + vdup.8 d6, d0[4] + vdup.8 d7, d2[4] + + //Set the chroma MB + vst2.32 {d4[0],d5[0]}, [r0]! + vst2.32 {d4[0],d5[0]}, [r0]! + vst2.32 {d4[0],d5[0]}, [r0]! + vst2.32 {d4[0],d5[0]}, [r0]! + vst2.32 {d6[0],d7[0]}, [r0]! + vst2.32 {d6[0],d7[0]}, [r0]! + vst2.32 {d6[0],d7[0]}, [r0]! + vst2.32 {d6[0],d7[0]}, [r0] + +WELS_ASM_FUNC_END + + +//Table {{1,2,3,4,1,2,3,4}*17} +CONST0_GET_I_CHROMA_PRED_PLANE: .long 0x44332211, 0x44332211//0x140f0a05, 0x28231e19 +//Table {-3,-2,-1,0,1,2,3,4} +CONST1_GET_I_CHROMA_PRED_PLANE: .long 0xfffefffd, 0x0000ffff,0x00020001,0x00040003 + +WELS_ASM_FUNC_BEGIN WelsIChromaPredPlane_neon + //stmdb sp!, { r2-r5, lr} + //Load the top row data + sub r3, r1, #1 + sub r3, r2 + vld1.32 {d1[0]}, [r3] + add r3, #5 + vld1.32 {d0[0]}, [r3] + + //Load the left column data + sub r3, #5 + vld1.8 {d1[4]}, [r3], r2 + vld1.8 {d1[5]}, [r3], r2 + vld1.8 {d1[6]}, [r3], r2 + vld1.8 {d1[7]}, [r3], r2 //d1:{LT,T0,T1,T2,LT,L0,L1,L2} + add r3, r2 + vld1.8 {d0[4]}, [r3], r2 + vld1.8 {d0[5]}, [r3], r2 + vld1.8 {d0[6]}, [r3], r2 + vld1.8 {d0[7]}, [r3] //d0:{T4,T5,T6,T7,L4,L5,L6.L7} + + + //Save T7 to d3 for next step + vdup.u8 d3, d0[3] + //Save L7 to d4 for next step + vdup.u8 d4, d0[7] + + //Calculate the value of 'a' and save to q2 + vaddl.u8 q2, d3, d4 + vshl.u16 q2, #4 + + //Load the table {{1,2,3,4,1,2,3,4}*17} + adr r3, CONST0_GET_I_CHROMA_PRED_PLANE + vld1.32 {d2}, [r3] + + //Calculate the 'b','c', and save to q0 + vrev32.8 d1, d1 + vsubl.u8 q0, d0, d1 + vmovl.u8 q1, d2 + vmul.s16 q0, q1 + vpaddl.s16 q0, q0 + vpaddl.s32 q0, q0 + vrshr.s64 q0, #5 + + //Load the table {-3,-2,-1,0,1,2,3,4} to q3 + adr r3, CONST1_GET_I_CHROMA_PRED_PLANE + vld1.32 {d6, d7}, [r3] + + //Duplicate the 'b','c' to q0, q1 for SIMD instruction + vdup.s16 q1, d1[0] + vdup.s16 q0, d0[0] + + //Calculate the "(a + b * (j - 3) + c * (- 3) + 16) >> 5;" + vmla.s16 q2, q0, q3 + vmla.s16 q2, q1, d6[0] + vqrshrun.s16 d0, q2, #5 + + //Set a line of chroma MB + vst1.u32 {d0}, [r0]! + + //Do the same processing for each line. + mov r3, #7 +loop_0_get_i_chroma_pred_plane: + vadd.s16 q2, q1 + vqrshrun.s16 d0, q2, #5 + vst1.u32 {d0}, [r0]! + subs r3, #1 + bne loop_0_get_i_chroma_pred_plane + +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/intra_pred_sad_3_opt_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/intra_pred_sad_3_opt_neon.S new file mode 100644 index 000000000..98498d4fb --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/intra_pred_sad_3_opt_neon.S @@ -0,0 +1,735 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + + + //The data sequence will be used +.macro GET_8BYTE_DATA_L0 arg0, arg1, arg2 + vld1.8 {\arg0[0]}, [\arg1], \arg2 + vld1.8 {\arg0[1]}, [\arg1], \arg2 + vld1.8 {\arg0[2]}, [\arg1], \arg2 + vld1.8 {\arg0[3]}, [\arg1], \arg2 + vld1.8 {\arg0[4]}, [\arg1], \arg2 + vld1.8 {\arg0[5]}, [\arg1], \arg2 + vld1.8 {\arg0[6]}, [\arg1], \arg2 + vld1.8 {\arg0[7]}, [\arg1], \arg2 +.endm + +.macro HDM_TRANSFORM_4X4_L0 arg0, arg1, arg2,arg3, arg4, arg5, arg6, arg7, arg8 + + //Do the vertical transform + vaddl.u8 q0, \arg0, \arg1 //{0,4,8,12,1,5,9,13} + vsubl.u8 q1, \arg0, \arg1 //{2,6,10,14,3,7,11,15} + vswp d1, d2 + vadd.s16 q2, q0, q1 //{0,1,2,3,4,5,6,7} + vsub.s16 q1, q0, q1 //{12,13,14,15,8,9,10,11} + + //Do the horizontal transform + vtrn.32 q2, q1 + vadd.s16 q0, q2, q1 + vsub.s16 q1, q2, q1 + + vtrn.16 q0, q1 + vadd.s16 q2, q0, q1 + vsub.s16 q1, q0, q1 + + vmov.s16 d0, d4 + vmov.s16 d1, d2 + + vabs.s16 d3, d3 + + //16x16_v + vtrn.32 d0, d1 //{0,1,3,2} + vaba.s16 \arg5, d0, \arg2 //16x16_v + vaba.s16 \arg5, d1, \arg8 + vaba.s16 \arg5, d5, \arg8 + vadd.u16 \arg5, d3 + + //16x16_h + vtrn.16 d4, d5 //{0,4,12,8} + vaba.s16 \arg6, d4, \arg3 //16x16_h + vabs.s16 d2, d2 + vabs.s16 d5, d5 + vadd.u16 d2, d3 + vadd.u16 d2, d5 + vadd.u16 \arg6, d2 + + //16x16_dc_both + vaba.s16 \arg7, d4, \arg4 //16x16_dc_both + vadd.u16 \arg7, d2 +.endm + +WELS_ASM_FUNC_BEGIN WelsIntra16x16Combined3Satd_neon + stmdb sp!, {r4-r7, lr} + vpush {q4-q7} + + //Get the top line data to 'q15'(16 bytes) + sub r7, r0, r1 + vld1.8 {q15}, [r7] + + //Get the left colume data to 'q14' (16 bytes) + sub r7, r0, #1 + GET_8BYTE_DATA_L0 d28, r7, r1 + GET_8BYTE_DATA_L0 d29, r7, r1 + + //Calculate the mean value and save to 'q13->d27(reserve the d26)' (2 bytes) + //Calculate the 16x16_dc_both mode SATD + vaddl.u8 q0, d30, d31 + vaddl.u8 q1, d28, d29 + vadd.u16 q0, q1 + vadd.u16 d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + //Calculate the mean value + vrshr.u16 d0, #5 + vshl.u16 d27, d0, #4 + + + //Calculate the 16x16_v mode SATD and save to "q11, 12" + vshll.u8 q0, d30, #2 + vshll.u8 q1, d31, #2 + vtrn.32 q0, q1 + vadd.s16 q2, q0, q1 + vsub.s16 q1, q0, q1 + vtrn.16 q2, q1 + vadd.s16 q12, q2, q1 + vsub.s16 q11, q2, q1 + vtrn.32 q12, q11 //{0,1,3,2, 4,5,7,6} q12 + //{8,9,11,10, 12,13,15,14} q11 + //Calculate the 16x16_h mode SATD and save to "q9, q10" + vshll.u8 q0, d28, #2 + vshll.u8 q1, d29, #2 + vtrn.32 q0, q1 + vadd.s16 q2, q0, q1 + vsub.s16 q1, q0, q1 + vtrn.16 q2, q1 + vadd.s16 q10, q2, q1 + vsub.s16 q9, q2, q1 + vtrn.32 q10, q9 //{0,1,3,2, 4,5,7,6} q10 + //{8,9,11,10, 12,13,15,14} q9 + + vmov.i32 d17, #0//Save the SATD of DC_BOTH + vmov.i32 d16, #0//Save the SATD of H + vmov.i32 d15, #0//Save the SATD of V + vmov.i32 d14, #0//For zero D register + //Load the p_enc data and save to "q3 ~ q6"--- 16X4 bytes + vld1.32 {q3}, [r2], r3 + vld1.32 {q4}, [r2], r3 + vld1.32 {q5}, [r2], r3 + vld1.32 {q6}, [r2], r3 + vtrn.32 q3, q4 + vtrn.32 q5, q6 + + HDM_TRANSFORM_4X4_L0 d6, d10, d24, d20, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d7, d11, d22, d20, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d8, d12, d25, d20, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d9, d13, d23, d20, d27, d15, d16, d17, d14 + + //Load the p_enc data and save to "q3 ~ q6"--- 16X4 bytes + vld1.32 {q3}, [r2], r3 + vld1.32 {q4}, [r2], r3 + vld1.32 {q5}, [r2], r3 + vld1.32 {q6}, [r2], r3 + vtrn.32 q3, q4 + vtrn.32 q5, q6 + + HDM_TRANSFORM_4X4_L0 d6, d10, d24, d21, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d7, d11, d22, d21, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d8, d12, d25, d21, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d9, d13, d23, d21, d27, d15, d16, d17, d14 + + //Load the p_enc data and save to "q3 ~ q6"--- 16X4 bytes + vld1.32 {q3}, [r2], r3 + vld1.32 {q4}, [r2], r3 + vld1.32 {q5}, [r2], r3 + vld1.32 {q6}, [r2], r3 + vtrn.32 q3, q4 + vtrn.32 q5, q6 + + HDM_TRANSFORM_4X4_L0 d6, d10, d24, d18, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d7, d11, d22, d18, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d8, d12, d25, d18, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d9, d13, d23, d18, d27, d15, d16, d17, d14 + + //Load the p_enc data and save to "q3 ~ q6"--- 16X4 bytes + vld1.32 {q3}, [r2], r3 + vld1.32 {q4}, [r2], r3 + vld1.32 {q5}, [r2], r3 + vld1.32 {q6}, [r2], r3 + vtrn.32 q3, q4 + vtrn.32 q5, q6 + + HDM_TRANSFORM_4X4_L0 d6, d10, d24, d19, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d7, d11, d22, d19, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d8, d12, d25, d19, d27, d15, d16, d17, d14 + HDM_TRANSFORM_4X4_L0 d9, d13, d23, d19, d27, d15, d16, d17, d14 + + //Get the data from stack + ldr r5, [sp, #84] //the addr of Best_mode + ldr r6, [sp, #88] //the value of i_lambda + + //vadd.u16 d24, d25 + vrshr.u16 d15, #1 + vpaddl.u16 d15, d15 + vpaddl.u32 d15, d15 + vmov.u32 r0, d15[0] + + //vadd.u16 d22, d23 + vrshr.u16 d16, #1 + vpaddl.u16 d16, d16 + vpaddl.u32 d16, d16 + vmov.u32 r1, d16[0] + add r1, r1, r6, lsl #1 + + //vadd.u16 d20, d21 + vrshr.u16 d17, #1 + vpaddl.u16 d17, d17 + vpaddl.u32 d17, d17 + vmov.u32 r2, d17[0] + add r2, r2, r6, lsl #1 + + mov r4, #0 + cmp r1, r0 + movcc r0, r1 + movcc r4, #1 + cmp r2, r0 + movcc r0, r2 + movcc r4, #2 + + str r4, [r5] + + vpop {q4-q7} + ldmia sp!, {r4-r7, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIntra16x16Combined3Sad_neon + stmdb sp!, {r4-r7, lr} + + //Get the top line data to 'q15'(16 bytes) + sub r4, r0, r1 + vld1.8 {q15}, [r4] + + //Get the left colume data to 'q14' (16 bytes) + sub r4, r0, #1 + GET_8BYTE_DATA_L0 d28, r4, r1 + GET_8BYTE_DATA_L0 d29, r4, r1 + + //Calculate the mean value and save to 'q13' (8 bytes) + //Calculate the 16x16_dc_both mode SATD + vaddl.u8 q0, d30, d31 + vaddl.u8 q1, d28, d29 + vadd.u16 q0, q1 + vadd.u16 d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + //Calculate the mean value + vrshr.u16 d0, d0, #5 + vdup.8 q13, d0[0] + + sub r4, r0, #1 + + vmov.i32 q12, #0//Save the SATD of DC_BOTH + vmov.i32 q11, #0//Save the SATD of H + vmov.i32 q10, #0//Save the SATD of V + + mov lr, #16 +sad_intra_16x16_x3_opt_loop0: + //Get the left colume data to 'd0' (16 bytes) + vld1.8 {d0[]}, [r4], r1 + + //Load the p_enc data and save to "q1 ~ q2"--- 16X4 bytes + vld1.8 {q1}, [r2], r3 + + subs lr, #1 + //Do the SAD for top colume + vabal.u8 q12, d30, d2 + vabal.u8 q12, d31, d3 + + //Do the SAD for left colume + vabal.u8 q11, d0, d2 + vabal.u8 q11, d0, d3 + + //Do the SAD for mean value + vabal.u8 q10, d26, d2 + vabal.u8 q10, d26, d3 + + bne sad_intra_16x16_x3_opt_loop0 + + //Get the data from stack + ldr r5, [sp, #20] //the addr of Best_mode + ldr r6, [sp, #24] //the value of i_lambda + + vadd.u16 d24, d25 + vpaddl.u16 d24, d24 + vpaddl.u32 d24, d24 + vmov.u32 r0, d24[0] + + vadd.u16 d22, d23 + vpaddl.u16 d22, d22 + vpaddl.u32 d22, d22 + vmov.u32 r1, d22[0] + add r1, r1, r6, lsl #1 + + vadd.u16 d20, d21 + vpaddl.u16 d20, d20 + vpaddl.u32 d20, d20 + vmov.u32 r2, d20[0] + add r2, r2, r6, lsl #1 + + mov r4, #0 + cmp r1, r0 + movcc r0, r1 + movcc r4, #1 + cmp r2, r0 + movcc r0, r2 + movcc r4, #2 + + str r4, [r5] + + ldmia sp!, {r4-r7, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIntra8x8Combined3Sad_neon + stmdb sp!, {r4-r7, lr} + + //Get the data from stack + ldr r4, [sp, #32] //p_dec_cr + ldr r5, [sp, #36] //p_enc_cr + + //Get the left colume data to 'd28(cb), d30(cr)' (16 bytes) + sub r6, r0, #1 + GET_8BYTE_DATA_L0 d28, r6, r1 + sub r6, r4, #1 + GET_8BYTE_DATA_L0 d30, r6, r1 + + //Get the top line data to 'd29(cb), d31(cr)'(16 bytes) + sub r6, r0, r1 + vld1.8 {d29}, [r6] + sub r6, r4, r1 + vld1.8 {d31}, [r6] + + //Calculate the sum of left column and top row + vmov.i32 q0, q14 + vpaddl.u8 q0, q0 + vpaddl.u16 q0, q0 + vadd.u32 d2, d0, d1 //'m1' save to d2 + vrshr.u32 q0, q0, #2 //calculate 'm2','m3' + vrshr.u32 d2, d2, #3 //calculate 'm4' + + //duplicate the 'mx' to a vector line + vdup.8 d27, d2[0] + vdup.8 d26, d1[4] + vtrn.32 d27, d26 + + vdup.8 d26, d0[4] + vdup.8 d25, d2[4] + vtrn.32 d26, d25 //Save to "d27, d26" + + vmov.i32 q0, q15 + vpaddl.u8 q0, q0 + vpaddl.u16 q0, q0 + vadd.u32 d2, d0, d1 //'m1' save to d2 + vrshr.u32 q0, q0, #2 //calculate 'm2','m3' + vrshr.u32 d2, d2, #3 //calculate 'm4' + + //duplicate the 'mx' to a vector line + vdup.8 d25, d2[0] + vdup.8 d24, d1[4] + vtrn.32 d25, d24 + + vdup.8 d24, d0[4] + vdup.8 d23, d2[4] + vtrn.32 d24, d23 //Save to "d25, d24" + + vmov.i32 q11, #0//Save the SATD of DC_BOTH + vmov.i32 q10, #0//Save the SATD of H + vmov.i32 q9 , #0//Save the SATD of V + sub r6, r0, #1 + sub r7, r4, #1 + mov lr, #4 +sad_intra_8x8_x3_opt_loop0: + + //Load the p_enc data and save to "q1 ~ q2"--- 16X4 bytes + vld1.8 {d0}, [r2], r3 + vld1.8 {d1}, [r5], r3 + + //Get the left colume data to 'd0' (16 bytes) + vld1.8 {d2[]}, [r6], r1 + vld1.8 {d3[]}, [r7], r1 + + subs lr, #1 + + + //Do the SAD for top colume + vabal.u8 q11, d29, d0 + vabal.u8 q11, d31, d1 + + //Do the SAD for left colume + vabal.u8 q10, d2, d0 + vabal.u8 q10, d3, d1 + + //Do the SAD for mean value + vabal.u8 q9, d27, d0 + vabal.u8 q9, d25, d1 + + + bne sad_intra_8x8_x3_opt_loop0 + + mov lr, #4 +sad_intra_8x8_x3_opt_loop1: + + //Load the p_enc data and save to "q1 ~ q2"--- 16X4 bytes + vld1.8 {d0}, [r2], r3 + vld1.8 {d1}, [r5], r3 + + //Get the left colume data to 'd0' (16 bytes) + vld1.8 {d2[]}, [r6], r1 + vld1.8 {d3[]}, [r7], r1 + + subs lr, #1 + + + //Do the SAD for top colume + vabal.u8 q11, d29, d0 + vabal.u8 q11, d31, d1 + + //Do the SAD for left colume + vabal.u8 q10, d2, d0 + vabal.u8 q10, d3, d1 + + //Do the SAD for mean value + vabal.u8 q9, d26, d0 + vabal.u8 q9, d24, d1 + + + bne sad_intra_8x8_x3_opt_loop1 + //Get the data from stack + ldr r5, [sp, #20] //the addr of Best_mode + ldr r6, [sp, #24] //the value of i_lambda + + vadd.u16 d22, d23 + vpaddl.u16 d22, d22 + vpaddl.u32 d22, d22 + vmov.u32 r0, d22[0] + add r0, r0, r6, lsl #1 + + vadd.u16 d20, d21 + vpaddl.u16 d20, d20 + vpaddl.u32 d20, d20 + vmov.u32 r1, d20[0] + add r1, r1, r6, lsl #1 + + vadd.u16 d18, d19 + vpaddl.u16 d18, d18 + vpaddl.u32 d18, d18 + vmov.u32 r2, d18[0] + + mov r4, #2 + cmp r1, r0 + movcc r0, r1 + movcc r4, #1 + cmp r2, r0 + movcc r0, r2 + movcc r4, #0 + + str r4, [r5] + + ldmia sp!, {r4-r7, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIntra8x8Combined3Satd_neon + stmdb sp!, {r4-r7, lr} + vpush {q4-q7} + + //Get the data from stack + ldr r4, [sp, #96] //p_dec_cr + ldr r5, [sp, #100] //p_enc_cr + + //Get the top line data to 'd29(cb), d31(cr)'(16 bytes) + sub r6, r0, r1 + vld1.8 {d29}, [r6] + sub r6, r4, r1 + vld1.8 {d31}, [r6] + + //Get the left colume data to 'd28(cb), d30(cr)' (16 bytes) + sub r6, r0, #1 + GET_8BYTE_DATA_L0 d28, r6, r1 + sub r6, r4, #1 + GET_8BYTE_DATA_L0 d30, r6, r1 + + //Calculate the 16x16_v mode SATD and save to "q12, 13" + vshll.u8 q0, d29, #2 + vshll.u8 q1, d31, #2 + vtrn.32 q0, q1 + vadd.s16 q2, q0, q1 + vsub.s16 q1, q0, q1 + vtrn.16 q2, q1 + vadd.s16 q13, q2, q1 + vsub.s16 q12, q2, q1 + vtrn.32 q13, q12 //{0,1,3,2, 4,5,7,6} q13 + //{8,9,11,10, 12,13,15,14} q12 + //Calculate the 16x16_h mode SATD and save to "q10, q11" + vshll.u8 q0, d28, #2 + vshll.u8 q1, d30, #2 + vtrn.32 q0, q1 + vadd.s16 q2, q0, q1 + vsub.s16 q1, q0, q1 + vtrn.16 q2, q1 + vadd.s16 q11, q2, q1 + vsub.s16 q10, q2, q1 + vtrn.32 q11, q10 //{0,1,3,2, 4,5,7,6} q11 + //{8,9,11,10, 12,13,15,14} q10 + + //Calculate the sum of left column and top row + //vmov.i32 q0, q14 + vpaddl.u8 q0, q14 + vpaddl.u16 q0, q0 + vadd.u32 d2, d0, d1 + + vpaddl.u8 q2, q15 + vpaddl.u16 q2, q2 + vadd.u32 d3, d4, d5 + + vtrn.32 q0, q2 + vrshr.u32 q1, #3 + vrshr.u32 q2, #2 + vshll.u32 q9, d4, #4 // {2cb, 2cr} q9 + vshll.u32 q8, d5, #4 // {1cb, 1cr} q8 + vshll.u32 q7, d2, #4 // {0cb, 3cb} q7 + vshll.u32 q6, d3, #4 // {0cr, 3cr} q6 + + + vmov.i32 d28, #0//Save the SATD of DC_BOTH + vmov.i32 d10, #0//Save the SATD of H + vmov.i32 d11, #0//Save the SATD of V + vmov.i32 d30, #0//For zero D register + //Load the p_enc data and save to "q3 ~ q6"--- 16X4 bytes + vld1.32 {d6}, [r2], r3 + vld1.32 {d7}, [r2], r3 + vld1.32 {d8}, [r2], r3 + vld1.32 {d9}, [r2], r3 + vtrn.32 d6, d7 + vtrn.32 d8, d9 + HDM_TRANSFORM_4X4_L0 d6, d8, d26, d22, d14, d11, d10, d28, d30 + HDM_TRANSFORM_4X4_L0 d7, d9, d27, d22, d16, d11, d10, d28, d30 + + vld1.32 {d6}, [r5], r3 + vld1.32 {d7}, [r5], r3 + vld1.32 {d8}, [r5], r3 + vld1.32 {d9}, [r5], r3 + vtrn.32 d6, d7 + vtrn.32 d8, d9 + HDM_TRANSFORM_4X4_L0 d6, d8, d24, d20, d12, d11, d10, d28, d30 + HDM_TRANSFORM_4X4_L0 d7, d9, d25, d20, d17, d11, d10, d28, d30 + + //Load the p_enc data and save to "q3 ~ q6"--- 16X4 bytes + vld1.32 {d6}, [r2], r3 + vld1.32 {d7}, [r2], r3 + vld1.32 {d8}, [r2], r3 + vld1.32 {d9}, [r2], r3 + vtrn.32 d6, d7 + vtrn.32 d8, d9 + HDM_TRANSFORM_4X4_L0 d6, d8, d26, d23, d18, d11, d10, d28, d30 + HDM_TRANSFORM_4X4_L0 d7, d9, d27, d23, d15, d11, d10, d28, d30 + + vld1.32 {d6}, [r5], r3 + vld1.32 {d7}, [r5], r3 + vld1.32 {d8}, [r5], r3 + vld1.32 {d9}, [r5], r3 + vtrn.32 d6, d7 + vtrn.32 d8, d9 + HDM_TRANSFORM_4X4_L0 d6, d8, d24, d21, d19, d11, d10, d28, d30 + HDM_TRANSFORM_4X4_L0 d7, d9, d25, d21, d13, d11, d10, d28, d30 + + //Get the data from stack + ldr r5, [sp, #84] //the addr of Best_mode + ldr r6, [sp, #88] //the value of i_lambda + + vrshr.u16 d11, #1 + vpaddl.u16 d11, d11 + vpaddl.u32 d11, d11 + vmov.u32 lr, d11[0] + add lr, lr, r6, lsl #1 + + vrshr.u16 d10, #1 + vpaddl.u16 d10, d10 + vpaddl.u32 d10, d10 + vmov.u32 r3, d10[0] + add r3, r3, r6, lsl #1 + + vrshr.u16 d28, #1 + vpaddl.u16 d28, d28 + vpaddl.u32 d28, d28 + vmov.u32 r2, d28[0] + + mov r6, #2 + cmp r3, lr + movcc lr, r3 + movcc r6, #1 + cmp r2, lr + movcc lr, r2 + movcc r6, #0 + + str r6, [r5] + mov r0, lr + + vpop {q4-q7} + ldmia sp!, {r4-r7, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIntra4x4Combined3Satd_neon + stmdb sp!, {r4-r7, lr} + + //Get the top line data to 'd31[0~3]'(4 bytes) + sub r7, r0, r1 + vld1.32 {d31[0]}, [r7] + + //Get the left colume data to 'd31[4~7]' (4 bytes) + sub r7, r0, #1 + vld1.8 {d31[4]}, [r7], r1 + vld1.8 {d31[5]}, [r7], r1 + vld1.8 {d31[6]}, [r7], r1 + vld1.8 {d31[7]}, [r7], r1 + + //Calculate the mean value and save to 'd30' (2 bytes) + vpaddl.u8 d0, d31 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + //Calculate the mean value + vrshr.u16 d0, #3 + vshl.u16 d30, d0, #4 + + //Calculate the 16x16_v mode SATD and save to "d29" + //Calculate the 16x16_h mode SATD and save to "d28" + vshll.u8 q0, d31, #2 + vtrn.32 d0, d1 + vadd.s16 d2, d0, d1 + vsub.s16 d1, d0, d1 + vtrn.16 d2, d1 + vadd.s16 d29, d2, d1 + vsub.s16 d28, d2, d1 + vtrn.32 d29, d28 //{0,1,3,2 top} d29 + //{0,1,3,2 left} d28 + + vmov.i32 d27, #0//Save the SATD of DC_BOTH + vmov.i32 d26, #0//Save the SATD of H + vmov.i32 d25, #0//Save the SATD of V + vmov.i32 d24, #0//For zero D register + + //Load the p_enc data and save to "d22,d23"--- 4X4 bytes + vld1.32 {d23[0]}, [r2], r3 + vld1.32 {d23[1]}, [r2], r3 + vld1.32 {d22[0]}, [r2], r3 + vld1.32 {d22[1]}, [r2], r3 + + HDM_TRANSFORM_4X4_L0 d23, d22, d29, d28, d30, d25, d26, d27, d24 + + //Get the data from stack + ldr r5, [sp, #28] //the value of lambda2 + ldr r6, [sp, #32] //the value of lambda1 + ldr r7, [sp, #36] //the value of lambda0 + + vrshr.u16 d25, #1 + vpaddl.u16 d25, d25 + vpaddl.u32 d25, d25 + vmov.u32 r0, d25[0] + add r0, r7 + + vrshr.u16 d26, #1 + vpaddl.u16 d26, d26 + vpaddl.u32 d26, d26 + vmov.u32 r1, d26[0] + add r1, r6 + + vrshr.u16 d27, #1 + vpaddl.u16 d27, d27 + vpaddl.u32 d27, d27 + vmov.u32 r2, d27[0] + add r2, r5 + + ldr r5, [sp, #20] //p_dst + ldr r6, [sp, #24] //the addr of Best_mode + + mov r4, r0 + cmp r1, r4 + movcc r4, r1 + cmp r2, r4 + movcc r4, r2 + + //The compare sequence affect the resule + cmp r4, r2 + bne satd_intra_4x4_x3_opt_jump0 + mov r0, #2 + str r0, [r6] + vshr.u32 d0, d30, #4 // {2cb, 2cr} q9 + vdup.8 q1, d0[0] + vst1.8 {q1}, [r5] + //... + bl satd_intra_4x4_x3_opt_end +satd_intra_4x4_x3_opt_jump0: + + cmp r4, r1 + bne satd_intra_4x4_x3_opt_jump1 + mov r0, #1 + str r0, [r6] + vdup.8 d0, d31[4] + vdup.8 d1, d31[5] + vdup.8 d2, d31[6] + vdup.8 d3, d31[7] + vst4.32 {d0[0],d1[0],d2[0],d3[0]}, [r5] + + bl satd_intra_4x4_x3_opt_end +satd_intra_4x4_x3_opt_jump1: + + mov r0, #0 + str r0, [r6] + vst1.32 {d31[0]}, [r5]! + vst1.32 {d31[0]}, [r5]! + vst1.32 {d31[0]}, [r5]! + vst1.32 {d31[0]}, [r5]! + + +satd_intra_4x4_x3_opt_end: + mov r0, r4 + + ldmia sp!, {r4-r7, lr} +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/memory_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/memory_neon.S new file mode 100644 index 000000000..0088727c8 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/memory_neon.S @@ -0,0 +1,62 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + + +WELS_ASM_FUNC_BEGIN WelsSetMemZero_neon + veor q0, q0 + cmp r1, #32 + beq mem_zero_32_neon_start + blt mem_zero_24_neon_start + +mem_zero_loop: + subs r1, r1, #64 + vst1.64 {q0}, [r0]! + vst1.64 {q0}, [r0]! + vst1.64 {q0}, [r0]! + vst1.64 {q0}, [r0]! + bne mem_zero_loop + bx lr + +mem_zero_32_neon_start: + vst1.64 {q0}, [r0]! + vst1.64 {q0}, [r0]! + bx lr + +mem_zero_24_neon_start: + vst1.64 {q0}, [r0]! + vst1.64 {d0}, [r0]! +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/pixel_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/pixel_neon.S new file mode 100644 index 000000000..1a93d6c2c --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/pixel_neon.S @@ -0,0 +1,882 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +.macro SATD_16x4 + vld1.64 {q0}, [r0,:128], r1 + vld1.64 {q1}, [r2], r3 + + vsubl.u8 q8, d0, d2 + vld1.64 {q2}, [r0,:128], r1 + + vsubl.u8 q10, d1, d3 + vld1.64 {q3}, [r2], r3 + + vsubl.u8 q9, d4, d6 + vld1.64 {q0}, [r0,:128], r1 + + vsubl.u8 q11, d5, d7 + vld1.64 {q1}, [r2], r3 + + vsubl.u8 q12, d0, d2 + vld1.64 {q2}, [r0,:128], r1 + + vsubl.u8 q14, d1, d3 + vadd.s16 q0, q8, q9 + + vld1.64 {q3}, [r2], r3 + vsub.s16 q1, q8, q9 + + vsubl.u8 q13, d4, d6 + vsubl.u8 q15, d5, d7 + + vadd.s16 q2, q12, q13 + vsub.s16 q3, q12, q13 + + vadd.s16 q8, q10, q11 + vsub.s16 q9, q10, q11 + + vadd.s16 q10, q14, q15 + vsub.s16 q11, q14, q15 + + vadd.s16 q12, q0, q2 + vsub.s16 q14, q0, q2 + + vadd.s16 q13, q8, q10 + vsub.s16 q15, q8, q10 + + vsub.s16 q0, q1, q3 + vadd.s16 q2, q1, q3 + + vsub.s16 q1, q9, q11 + vadd.s16 q3, q9, q11 + + vtrn.16 q12, q14 + vtrn.16 q13, q15 + + vadd.s16 q8, q12, q14 + vabd.s16 q10, q12, q14 + + vadd.s16 q9, q13, q15 + vabd.s16 q11, q13, q15 + + vabs.s16 q8, q8 + vabs.s16 q9, q9 + + vtrn.16 q0, q2 + vtrn.16 q1, q3 + + vadd.s16 q12, q0, q2 + vabd.s16 q14, q0, q2 + + vadd.s16 q13, q1, q3 + vabd.s16 q15, q1, q3 + + vabs.s16 q12, q12 + vabs.s16 q13, q13 + + vtrn.32 q8, q10 + vtrn.32 q9, q11 + + vtrn.32 q12, q14 + vtrn.32 q13, q15 + + vmax.s16 q0, q8, q10 + vmax.s16 q1, q9, q11 + vmax.s16 q2, q12, q14 + vmax.s16 q3, q13, q15 + + vadd.u16 q0, q0, q1 + vadd.u16 q2, q2, q3 +.endm + +.macro SATD_8x4 + + vld1.64 {d0}, [r0,:64], r1 + vld1.64 {d1}, [r2], r3 + + vld1.64 {d2}, [r0,:64], r1 + vsubl.u8 q8, d0, d1 + + vld1.64 {d3}, [r2], r3 + vsubl.u8 q9, d2, d3 + + vld1.64 {d4}, [r0,:64], r1 + vld1.64 {d5}, [r2], r3 + + vadd.s16 q12, q8, q9 + vsubl.u8 q10, d4, d5 + + vld1.64 {d6}, [r0,:64], r1 + vld1.64 {d7}, [r2], r3 + + vsubl.u8 q11, d6, d7 + vsub.s16 q13, q8, q9 + + vadd.s16 q14, q10, q11 + vsub.s16 q15, q10, q11 + + vadd.s16 q0, q12, q14 + vsub.s16 q1, q12, q14 + + vsub.s16 q2, q13, q15 + vadd.s16 q3, q13, q15 + + vtrn.16 q0, q1 + vtrn.16 q2, q3 + + vadd.s16 q8, q0, q1 + vabd.s16 q9, q0, q1 + + vabs.s16 q8, q8 + vadd.s16 q10, q2, q3 + + vabd.s16 q11, q2, q3 + vabs.s16 q10, q10 + + vtrn.32 q8, q9 + vtrn.32 q10, q11 + + vmax.s16 q0, q8, q9 + vmax.s16 q1, q10, q11 +.endm + +.macro SAD_16x4 + vld1.64 {q6}, [r0, :128], r1 + vabal.u8 q10, d8, d10 + + vld1.64 {q7}, [r2], r3 + vabal.u8 q11, d9, d11 + + vld1.64 {q0}, [r0, :128], r1 + vabal.u8 q12, d12, d14 + + vld1.64 {q1}, [r2], r3 + vabal.u8 q13, d13, d15 + + vld1.64 {q2}, [r0, :128], r1 + vabal.u8 q10, d0, d2 + + vld1.64 {q3}, [r2], r3 + vabal.u8 q11, d1, d3 + + vld1.64 {q4}, [r0, :128], r1 + vabal.u8 q12, d4, d6 + + vld1.64 {q5}, [r2], r3 + vabal.u8 q13, d5, d7 +.endm + +.macro SAD_8x4 + vld1.64 {d0}, [r0, :64], r1 + vld1.64 {d1}, [r2], r3 + + vabal.u8 q10, d0, d1 + vld1.64 {d2}, [r0, :64], r1 + + vld1.64 {d3}, [r2], r3 + vabal.u8 q11, d2, d3 + + vld1.64 {d4}, [r0, :64], r1 + vld1.64 {d5}, [r2], r3 + + vabal.u8 q12, d4, d5 + vld1.64 {d6}, [r0, :64], r1 + + vld1.64 {d7}, [r2], r3 + vabal.u8 q13, d6, d7 +.endm + + +WELS_ASM_FUNC_BEGIN WelsSampleSad16x16_neon + vpush {q4-q7} + + vld1.64 {q0}, [r0, :128], r1 + vld1.64 {q1}, [r2], r3 + + vabdl.u8 q10, d0, d2 + vld1.64 {q2}, [r0, :128], r1 + + vabdl.u8 q11, d1, d3 + vld1.64 {q3}, [r2], r3 + + vld1.64 {q4}, [r0, :128], r1 + vabdl.u8 q12, d4, d6 + vld1.64 {q5}, [r2], r3 + vabdl.u8 q13, d5, d7 + + SAD_16x4 + SAD_16x4 + SAD_16x4 + + vld1.64 {q6}, [r0, :128], r1 + vabal.u8 q10, d8, d10 + + vld1.64 {q7}, [r2], r3 + vabal.u8 q11, d9, d11 + + vabal.u8 q12, d12, d14 + vabal.u8 q13, d13, d15 + + vadd.u16 q14, q10, q11 + vadd.u16 q15, q12, q13 + + vadd.u16 q15, q14, q15 + vadd.u16 d0, d30, d31 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vmov.u32 r0, d0[0] + + vpop {q4-q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSad16x8_neon + vpush {q4-q7} + + vld1.64 {q0}, [r0, :128], r1 + vld1.64 {q1}, [r2], r3 + + vabdl.u8 q10, d0, d2 + vld1.64 {q2}, [r0, :128], r1 + + vabdl.u8 q11, d1, d3 + vld1.64 {q3}, [r2], r3 + + vld1.64 {q4}, [r0, :128], r1 + vabdl.u8 q12, d4, d6 + vld1.64 {q5}, [r2], r3 + vabdl.u8 q13, d5, d7 + + SAD_16x4 + + vld1.64 {q6}, [r0, :128], r1 + vabal.u8 q10, d8, d10 + + vld1.64 {q7}, [r2], r3 + vabal.u8 q11, d9, d11 + + vabal.u8 q12, d12, d14 + vabal.u8 q13, d13, d15 + + vadd.u16 q14, q10, q11 + vadd.u16 q15, q12, q13 + + vadd.u16 q15, q14, q15 + vadd.u16 d0, d30, d31 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vmov.u32 r0, d0[0] + vpop {q4-q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSad8x16_neon + + vld1.64 {d0}, [r0, :64], r1 + vld1.64 {d1}, [r2], r3 + + vabdl.u8 q10, d0, d1 + vld1.64 {d2}, [r0, :64], r1 + + vld1.64 {d3}, [r2], r3 + vabdl.u8 q11, d2, d3 + + vld1.64 {d4}, [r0, :64], r1 + vld1.64 {d5}, [r2], r3 + + vabdl.u8 q12, d4, d5 + vld1.64 {d6}, [r0, :64], r1 + + vld1.64 {d7}, [r2], r3 + vabdl.u8 q13, d6, d7 + + SAD_8x4 + SAD_8x4 + SAD_8x4 + + vadd.u16 q14, q10, q11 + vadd.u16 q15, q12, q13 + vadd.u16 q15, q15, q14 + vadd.u16 d0, d30, d31 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vmov.u32 r0, d0[0] +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSad8x8_neon + + vld1.64 {d0}, [r0, :64], r1 + vld1.64 {d1}, [r2], r3 + + vabdl.u8 q10, d0, d1 + vld1.64 {d2}, [r0, :64], r1 + + vld1.64 {d3}, [r2], r3 + vabdl.u8 q11, d2, d3 + + vld1.64 {d4}, [r0, :64], r1 + vld1.64 {d5}, [r2], r3 + + vabdl.u8 q12, d4, d5 + vld1.64 {d6}, [r0, :64], r1 + + vld1.64 {d7}, [r2], r3 + vabdl.u8 q13, d6, d7 + + SAD_8x4 + + vadd.u16 q14, q10, q11 + vadd.u16 q15, q12, q13 + vadd.u16 q15, q15, q14 + vadd.u16 d0, d30, d31 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vmov.u32 r0, d0[0] +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSad4x4_neon + stmdb sp!, {r4-r5, lr} + + //Loading a horizontal line data (4 bytes) + //line 0 + ldr r4, [r0], r1 + ldr r5, [r2], r3 + usad8 lr, r4, r5 + + //line 1 + ldr r4, [r0], r1 + ldr r5, [r2], r3 + usada8 lr, r4, r5, lr + + //line 2 + ldr r4, [r0], r1 + ldr r5, [r2], r3 + usada8 lr, r4, r5, lr + + //line 3 + ldr r4, [r0] + ldr r5, [r2] + usada8 r0, r4, r5, lr + + ldmia sp!, {r4-r5, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSadFour16x16_neon + + stmdb sp!, {r4-r5, lr} + + //Generate the pix2 start addr + sub r4, r2, #1 + add r5, r2, #1 + sub r2, r3 + + //Loading a horizontal line data (16 bytes) + vld1.8 {q0}, [r0], r1 //save pix1 + + vld1.8 {q1}, [r2], r3 //save pix2 - stride + vld1.8 {q10}, [r2], r3 //save pix2 + vld1.8 {q2}, [r2], r3 //save pix2 + stride + + vld1.8 {q3}, [r4], r3 //save pix2 - 1 + vld1.8 {q8}, [r5], r3 //save pix2 + 1 + + //Do the SAD for 16 bytes + vabdl.u8 q15, d0, d2 + vabal.u8 q15, d1, d3 + + vabdl.u8 q13, d0, d4 + vabal.u8 q13, d1, d5 + + vabdl.u8 q11, d0, d6 + vabal.u8 q11, d1, d7 + + vabdl.u8 q9, d0, d16 + vabal.u8 q9, d1, d17 + + mov lr, #15 +pixel_sad_4_16x16_loop_0: + + //Loading a horizontal line data (16 bytes) + vld1.8 {q0}, [r0], r1 //save pix1 + vmov.8 q1, q10 //save pix2 - stride + vmov.8 q10, q2 + vabal.u8 q15, d0, d2 + vld1.8 {q2}, [r2], r3 //save pix2 + stride + vabal.u8 q15, d1, d3 + vld1.8 {q3}, [r4], r3 //save pix2 - 1 + vabal.u8 q13, d0, d4 + vld1.8 {q8}, [r5], r3 //save pix2 + 1 + vabal.u8 q13, d1, d5 + subs lr, #1 + + vabal.u8 q11, d0, d6 + vabal.u8 q11, d1, d7 + + vabal.u8 q9, d0, d16 + vabal.u8 q9, d1, d17 + + bne pixel_sad_4_16x16_loop_0 + + + //Save SAD to 'r0' + ldr r0, [sp, #12] + + vadd.u16 d0, d30, d31 + vadd.u16 d1, d26, d27 + vadd.u16 d2, d22, d23 + vadd.u16 d3, d18, d19 + + vpaddl.u16 q0, q0 + vpaddl.u16 q1, q1 + + vpaddl.u32 q0, q0 + vpaddl.u32 q1, q1 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]}, [r0] + + ldmia sp!, {r4-r5, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSadFour16x8_neon + stmdb sp!, {r4-r5, lr} + + //Generate the pix2 start addr + sub r4, r2, #1 + add r5, r2, #1 + sub r2, r3 + + //Loading a horizontal line data (16 bytes) + vld1.8 {q0}, [r0], r1 //save pix1 + + vld1.8 {q1}, [r2], r3 //save pix2 - stride + vld1.8 {q10}, [r2], r3 //save pix2 + vld1.8 {q2}, [r2], r3 //save pix2 + stride + + vld1.8 {q3}, [r4], r3 //save pix2 - 1 + vld1.8 {q8}, [r5], r3 //save pix2 + 1 + + //Do the SAD for 16 bytes + vabdl.u8 q15, d0, d2 + vabal.u8 q15, d1, d3 + + vabdl.u8 q13, d0, d4 + vabal.u8 q13, d1, d5 + + vabdl.u8 q11, d0, d6 + vabal.u8 q11, d1, d7 + + vabdl.u8 q9, d0, d16 + vabal.u8 q9, d1, d17 + + mov lr, #7 +pixel_sad_4_16x8_loop_0: + + //Loading a horizontal line data (16 bytes) + vld1.8 {q0}, [r0], r1 //save pix1 + vmov.8 q1, q10 //save pix2 - stride + vmov.8 q10, q2 + vabal.u8 q15, d0, d2 + vld1.8 {q2}, [r2], r3 //save pix2 + stride + vabal.u8 q15, d1, d3 + vld1.8 {q3}, [r4], r3 //save pix2 - 1 + vabal.u8 q13, d0, d4 + vld1.8 {q8}, [r5], r3 //save pix2 + 1 + vabal.u8 q13, d1, d5 + subs lr, #1 + + vabal.u8 q11, d0, d6 + vabal.u8 q11, d1, d7 + + vabal.u8 q9, d0, d16 + vabal.u8 q9, d1, d17 + + bne pixel_sad_4_16x8_loop_0 + + //Save SAD to 'r0' + ldr r0, [sp, #12] + + vadd.u16 d0, d30, d31 + vadd.u16 d1, d26, d27 + vadd.u16 d2, d22, d23 + vadd.u16 d3, d18, d19 + + vpaddl.u16 q0, q0 + vpaddl.u16 q1, q1 + + vpaddl.u32 q0, q0 + vpaddl.u32 q1, q1 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]}, [r0] + + ldmia sp!, {r4-r5, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSadFour8x16_neon + stmdb sp!, {r4-r5, lr} + + //Generate the pix2 start addr + sub r4, r2, #1 + add r5, r2, #1 + sub r2, r3 + + //Loading a horizontal line data (8 bytes) + vld1.8 {d0}, [r0], r1 //save pix1 + + vld1.8 {d1}, [r2], r3 //save pix2 - stride + vld1.8 {d6}, [r2], r3 //save pix2 + vld1.8 {d2}, [r2], r3 //save pix2 + stride + + vld1.8 {d3}, [r4], r3 //save pix2 - 1 + vld1.8 {d4}, [r5], r3 //save pix2 + 1 + + //Do the SAD for 8 bytes + vabdl.u8 q15, d0, d1 + vabdl.u8 q14, d0, d2 + vabdl.u8 q13, d0, d3 + vabdl.u8 q12, d0, d4 + + mov lr, #15 +pixel_sad_4_8x16_loop_0: + + //Loading a horizontal line data (8 bytes) + vld1.8 {d0}, [r0], r1 //save pix1 + vmov.8 d1, d6 //save pix2 - stride + vmov.8 d6, d2 + vld1.8 {d2}, [r2], r3 //save pix2 + stride + vld1.8 {d3}, [r4], r3 //save pix2 - 1 + vabal.u8 q15, d0, d1 + + vld1.8 {d4}, [r5], r3 //save pix2 + 1 + //Do the SAD for 8 bytes + vabal.u8 q14, d0, d2 + vabal.u8 q13, d0, d3 + vabal.u8 q12, d0, d4 + subs lr, #1 + + bne pixel_sad_4_8x16_loop_0 + + //Save SAD to 'r0' + ldr r0, [sp, #12] + + vadd.u16 d0, d30, d31 + vadd.u16 d1, d28, d29 + vadd.u16 d2, d26, d27 + vadd.u16 d3, d24, d25 + + vpaddl.u16 q0, q0 + vpaddl.u16 q1, q1 + + vpaddl.u32 q0, q0 + vpaddl.u32 q1, q1 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]}, [r0] + + ldmia sp!, {r4-r5, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSadFour8x8_neon + stmdb sp!, {r4-r5, lr} + + //Generate the pix2 start addr + sub r4, r2, #1 + add r5, r2, #1 + sub r2, r3 + + //Loading a horizontal line data (8 bytes) + vld1.8 {d0}, [r0], r1 //save pix1 + + vld1.8 {d1}, [r2], r3 //save pix2 - stride + vld1.8 {d6}, [r2], r3 //save pix2 + vld1.8 {d2}, [r2], r3 //save pix2 + stride + + vld1.8 {d3}, [r4], r3 //save pix2 - 1 + vld1.8 {d4}, [r5], r3 //save pix2 + 1 + + //Do the SAD for 8 bytes + vabdl.u8 q15, d0, d1 + vabdl.u8 q14, d0, d2 + vabdl.u8 q13, d0, d3 + vabdl.u8 q12, d0, d4 + + mov lr, #7 +pixel_sad_4_8x8_loop_0: + + //Loading a horizontal line data (8 bytes) + vld1.8 {d0}, [r0], r1 //save pix1 + vmov.8 d1, d6 //save pix2 - stride + vmov.8 d6, d2 + vld1.8 {d2}, [r2], r3 //save pix2 + stride + vld1.8 {d3}, [r4], r3 //save pix2 - 1 + vabal.u8 q15, d0, d1 + + vld1.8 {d4}, [r5], r3 //save pix2 + 1 + //Do the SAD for 8 bytes + vabal.u8 q14, d0, d2 + vabal.u8 q13, d0, d3 + vabal.u8 q12, d0, d4 + subs lr, #1 + bne pixel_sad_4_8x8_loop_0 + + //Save SAD to 'r0' + ldr r0, [sp, #12] + + vadd.u16 d0, d30, d31 + vadd.u16 d1, d28, d29 + vadd.u16 d2, d26, d27 + vadd.u16 d3, d24, d25 + + vpaddl.u16 q0, q0 + vpaddl.u16 q1, q1 + + vpaddl.u32 q0, q0 + vpaddl.u32 q1, q1 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]}, [r0] + + ldmia sp!, {r4-r5, lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSadFour4x4_neon + + vld1.32 {d0[0]}, [r0], r1 + vld1.32 {d0[1]}, [r0], r1 + vld1.32 {d1[0]}, [r0], r1 + vld1.32 {d1[1]}, [r0] + + + sub r0, r2, r3 + vld1.32 {d2[0]}, [r0], r3 + vld1.32 {d2[1]}, [r0], r3 + vld1.32 {d3[0]}, [r0], r3 + vld1.32 {d3[1]}, [r0], r3 + vld1.32 {d4[0]}, [r0], r3 + vld1.32 {d4[1]}, [r0] + + sub r0, r2, #1 + vld1.32 {d5[0]}, [r0], r3 + vld1.32 {d5[1]}, [r0], r3 + vld1.32 {d6[0]}, [r0], r3 + vld1.32 {d6[1]}, [r0] + + add r0, r2, #1 + vld1.32 {d7[0]}, [r0], r3 + vld1.32 {d7[1]}, [r0], r3 + vld1.32 {d8[0]}, [r0], r3 + vld1.32 {d8[1]}, [r0] + + vabdl.u8 q15, d0, d2 + vabdl.u8 q14, d1, d3 + + vabdl.u8 q13, d0, d3 + vabdl.u8 q12, d1, d4 + + vabdl.u8 q11, d0, d5 + vabdl.u8 q10, d1, d6 + + vabdl.u8 q9, d0, d7 + vabdl.u8 q8, d1, d8 + + //Save SAD to 'r4' + ldr r0, [sp] + vadd.u16 q0, q14, q15 + vadd.u16 q1, q12, q13 + vadd.u16 q2, q10, q11 + vadd.u16 q3, q8 , q9 + + vadd.u16 d0, d1 + vadd.u16 d1, d2, d3 + vadd.u16 d2, d4, d5 + vadd.u16 d3, d6, d7 + + vpaddl.u16 q0, q0 + vpaddl.u16 q1, q1 + + vpaddl.u32 q0, q0 + vpaddl.u32 q1, q1 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]}, [r0] + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSatd16x16_neon + vpush {q7} + + SATD_16x4 + vadd.u16 q7, q0, q2 + + SATD_16x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q2 + + SATD_16x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q2 + + SATD_16x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q2 + + vadd.u16 d0, d14, d15 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + vmov.32 r0, d0[0] + vpop {q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSatd16x8_neon + vpush {q7} + + SATD_16x4 + vadd.u16 q7, q0, q2 + + SATD_16x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q2 + + vadd.u16 d0, d14, d15 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + vmov.32 r0, d0[0] + vpop {q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSatd8x16_neon + vpush {q7} + + SATD_8x4 + vadd.u16 q7, q0, q1 + + SATD_8x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q1 + + SATD_8x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q1 + + SATD_8x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q1 + + vadd.u16 d0, d14, d15 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + vmov.32 r0, d0[0] + vpop {q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSatd8x8_neon + vpush {q7} + + SATD_8x4 + vadd.u16 q7, q0, q1 + + SATD_8x4 + vadd.u16 q7, q7, q0 + vadd.u16 q7, q7, q1 + + vadd.u16 d0, d14, d15 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + vmov.32 r0, d0[0] + vpop {q7} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsSampleSatd4x4_neon + + //Load the pix1 data --- 16 bytes + vld1.32 {d0[0]}, [r0], r1 + vld1.32 {d0[1]}, [r0], r1 + vld1.32 {d1[0]}, [r0], r1 + vld1.32 {d1[1]}, [r0] + + //Load the pix2 data --- 16 bytes + vld1.32 {d2[0]}, [r2], r3 + vld1.32 {d2[1]}, [r2], r3 + vld1.32 {d3[0]}, [r2], r3 + vld1.32 {d3[1]}, [r2] + + //Get the difference + vsubl.u8 q15, d0, d2 //{0,1,2,3,4,5,6,7} + vsubl.u8 q14, d1, d3 //{8,9,10,11,12,13,14,15} + + //Do the vertical transform + vadd.s16 q13, q15, q14 //{0,4,8,12,1,5,9,13} + vsub.s16 q12, q15, q14 //{2,6,10,14,3,7,11,15} + vswp d27, d24 + vadd.s16 q15, q13, q12 //{0,1,2,3,4,5,6,7} + vsub.s16 q14, q13, q12 //{12,13,14,15,8,9,10,11} + + //Do the horizontal transform + vtrn.32 q15, q14 + vadd.s16 q13, q15, q14 + vsub.s16 q12, q15, q14 + + vtrn.16 q13, q12 + vadd.s16 q15, q13, q12 + + //Do the SAD + vabs.s16 q15, q15 + vabd.s16 q14, q13, q12 + + vadd.u16 q0, q15, q14 + + vrhadd.u16 d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + vmov.u32 r0, d0[0] + +WELS_ASM_FUNC_END + +#endif + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/reconstruct_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/reconstruct_neon.S new file mode 100644 index 000000000..9fdf9d9ae --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/reconstruct_neon.S @@ -0,0 +1,903 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +.macro LOAD_4x4_DATA_FOR_DCT arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: \arg0~\arg3, src1*, src1_stride, src2*, src2_stride + vld2.16 {\arg0[0],\arg1[0]}, [\arg4], \arg5 + vld2.16 {\arg2[0],\arg3[0]}, [\arg6], \arg7 + vld2.16 {\arg0[1],\arg1[1]}, [\arg4], \arg5 + vld2.16 {\arg2[1],\arg3[1]}, [\arg6], \arg7 + + vld2.16 {\arg0[2],\arg1[2]}, [\arg4], \arg5 + vld2.16 {\arg2[2],\arg3[2]}, [\arg6], \arg7 + vld2.16 {\arg0[3],\arg1[3]}, [\arg4], \arg5 + vld2.16 {\arg2[3],\arg3[3]}, [\arg6], \arg7 +// } +.endm + +.macro LOAD_8x8_DATA_FOR_DCT arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 +// { // input: \arg0~\arg3, src1*, src2*; untouched r2:src1_stride &r4:src2_stride + vld1.64 {\arg0}, [\arg8], r2 + vld1.64 {\arg4}, [\arg9], r4 + vld1.64 {\arg1}, [\arg8], r2 + vld1.64 {\arg5}, [\arg9], r4 + + vld1.64 {\arg2}, [\arg8], r2 + vld1.64 {\arg6}, [\arg9], r4 + vld1.64 {\arg3}, [\arg8], r2 + vld1.64 {\arg7}, [\arg9], r4 +// } +.endm + +.macro DCT_ROW_TRANSFORM_TOTAL_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_d[0]~[3], working: [4]~[7] + vadd.s16 \arg4, \arg0, \arg3 //int16 s[0] = data[i] + data[i3]; + vsub.s16 \arg7, \arg0, \arg3 //int16 s[3] = data[i] - data[i3]; + vadd.s16 \arg5, \arg1, \arg2 //int16 s[1] = data[i1] + data[i2]; + vsub.s16 \arg6, \arg1, \arg2 //int16 s[2] = data[i1] - data[i2]; + + vadd.s16 \arg0, \arg4, \arg5 //int16 dct[i ] = s[0] + s[1]; + vsub.s16 \arg2, \arg4, \arg5 //int16 dct[i2] = s[0] - s[1]; + vshl.s16 \arg1, \arg7, #1 + vshl.s16 \arg3, \arg6, #1 + vadd.s16 \arg1, \arg1, \arg6 //int16 dct[i1] = (s[3] << 1) + s[2]; + vsub.s16 \arg3, \arg7, \arg3 //int16 dct[i3] = s[3] - (s[2] << 1); +// } +.endm + +.macro MATRIX_TRANSFORM_EACH_16BITS arg0, arg1, arg2, arg3 +// { // input & output: src_d[0]~[3];[0 1 2 3]+[4 5 6 7]+[8 9 10 11]+[12 13 14 15] + vtrn.s16 \arg0, \arg1 //[0 1 2 3]+[4 5 6 7]-->[0 4 2 6]+[1 5 3 7] + vtrn.s16 \arg2, \arg3 //[8 9 10 11]+[12 13 14 15]-->[8 12 10 14]+[9 13 11 15] + vtrn.32 \arg0, \arg2 //[0 4 2 6]+[8 12 10 14]-->[0 4 8 12]+[2 6 10 14] + vtrn.32 \arg1, \arg3 //[1 5 3 7]+[9 13 11 15]-->[1 5 9 13]+[3 7 11 15] +// } +.endm + +.macro NEWQUANT_COEF_EACH_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8 +// { // input: coef, ff (dst), ff_d0, ff_d1, mf_d0, md_d1 + veor.s16 \arg6, \arg6 // init 0 , and keep 0; + vaba.s16 \arg1, \arg0, \arg6 // f + abs(coef - 0) + vmull.s16 \arg7, \arg2, \arg4 + vmull.s16 \arg8, \arg3, \arg5 + vshr.s32 \arg7, #16 + vshr.s32 \arg8, #16 + vmovn.s32 \arg2, \arg7 + vmovn.s32 \arg3, \arg8 + + vcgt.s16 \arg7, \arg0, #0 // if true, location of coef == 11111111 + vbif.s16 \arg6, \arg1, \arg7 // if (x<0) reserved part; else keep 0 untouched + vshl.s16 \arg6, #1 + vsub.s16 \arg1, \arg1, \arg6 // if x > 0, -= 0; else x-= 2x +// } +.endm + +.macro NEWQUANT_COEF_EACH_16BITS_MAX arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 +// { // input: coef, ff (dst), ff_d0, ff_d1, mf_d0(max), md_d1 + veor.s16 \arg6, \arg6 // init 0 , and keep 0; + vaba.s16 \arg1, \arg0, \arg6 // f + abs(coef - 0) + vmull.s16 \arg7, \arg2, \arg4 + vmull.s16 \arg8, \arg3, \arg5 + vshr.s32 \arg7, #16 + vshr.s32 \arg8, #16 + vmovn.s32 \arg2, \arg7 + vmovn.s32 \arg3, \arg8 + + vcgt.s16 \arg7, \arg0, #0 // if true, location of coef == 11111111 + vbif.s16 \arg6, \arg1, \arg7 // if (x<0) reserved part; else keep 0 untouched + vshl.s16 \arg6, #1 + vmax.s16 \arg9, \arg2, \arg3 + vsub.s16 \arg1, \arg1, \arg6 // if x > 0, -= 0; else x-= 2x +// } +.endm + +.macro QUANT_DUALWORD_COEF_EACH_16BITS arg0, arg1, arg2, arg3, arg4 +// { // input: coef, ff (dst), mf , working_d (all 0), working_q + vaba.s16 \arg1, \arg0, \arg3 // f + abs(coef - 0) + vmull.s16 \arg4, \arg1, \arg2 // *= mf + vshr.s32 \arg4, #16 + vmovn.s32 \arg1, \arg4 // >> 16 + + vcgt.s16 \arg2, \arg0, #0 // if true, location of coef == 11111111 + vbif.s16 \arg3, \arg1, \arg2 // if (x<0) reserved part; else keep 0 untouched + vshl.s16 \arg3, #1 + vsub.s16 \arg1, \arg1, \arg3 // if x > 0, -= 0; else x-= 2x +// } +.endm + +.macro DC_ZERO_COUNT_IN_DUALWORD arg0, arg1, arg2 +// { // input: coef, dst_d, working_d (all 0x01) + vceq.s16 \arg1, \arg0, #0 + vand.s16 \arg1, \arg2 + vpadd.s16 \arg1, \arg1, \arg1 + vpadd.s16 \arg1, \arg1, \arg1 +// } +.endm + +.macro SELECT_MAX_IN_ABS_COEF arg0, arg1, arg2, arg3, arg4 +// { // input: coef_0, coef_1, max_q (identy to follow two), output: max_d0, max_d1 + vmax.s16 \arg2, \arg0, \arg1 // max 1st in \arg3 & max 2nd in \arg4 + vpmax.s16 \arg3, \arg3, \arg4 // max 1st in \arg3[0][1] & max 2nd in \arg3[2][3] + vpmax.s16 \arg3, \arg3, \arg4 // max 1st in \arg3[0][1] +// } +.endm + +.macro ZERO_COUNT_IN_2_QUARWORD arg0, arg1, arg2, arg3, arg4, arg5, arg6 +// { // input: coef_0 (identy to \arg3 \arg4), coef_1(identy to \arg5 \arg6), mask_q + vceq.s16 \arg0, #0 + vceq.s16 \arg1, #0 + vand.s16 \arg0, \arg2 + vand.s16 \arg1, \arg2 + + vpadd.s16 \arg3, \arg3, \arg5 + vpadd.s16 \arg4, \arg4, \arg6 + vpadd.s16 \arg3, \arg3, \arg4 // 8-->4 + vpadd.s16 \arg3, \arg3, \arg3 + vpadd.s16 \arg3, \arg3, \arg3 +// } +.endm + +.macro HDM_QUANT_2x2_TOTAL_16BITS arg0, arg1, arg2 +// { // input: src_d[0]~[3], working_d, dst_d + vshr.s64 \arg1, \arg0, #32 + vadd.s16 \arg2, \arg0, \arg1 // [0] = rs[0] + rs[32];[1] = rs[16] + rs[48]; + vsub.s16 \arg1, \arg0, \arg1 // [0] = rs[0] - rs[32];[1] = rs[16] - rs[48]; + vtrn.s16 \arg2, \arg1 + vtrn.s32 \arg2, \arg1 +// } +.endm + +.macro IHDM_4x4_TOTAL_16BITS arg0, arg1, arg2 +// { // input: each src_d[0]~[3](dst), working_q0, working_q1, working_q2 + vshr.s64 \arg1, \arg0, #32 + vadd.s16 \arg2, \arg0, \arg1 // [0] = rs[0] + rs[2];[1] = rs[1] + rs[3]; + vsub.s16 \arg1, \arg0, \arg1 // [0] = rs[0] - rs[2];[1] = rs[1] - rs[3]; + vtrn.s16 \arg2, \arg1 + vrev32.16 \arg1, \arg1 + vtrn.s32 \arg2, \arg1 // [0] = rs[0] + rs[2];[1] = rs[0] - rs[2];[2] = rs[1] - rs[3];[3] = rs[1] + rs[3]; + + vrev64.16 \arg1, \arg2 + vadd.s16 \arg0, \arg2, \arg1 // [0] = rs[0] + rs[3];[1] = rs[1] + rs[2]; + vsub.s16 \arg1, \arg2, \arg1 + vrev32.16 \arg1, \arg1 // [0] = rs[1] - rs[2];[1] = rs[0] - rs[3]; + vtrn.s32 \arg0, \arg1 // [0] = rs[0] + rs[3];[1] = rs[1] + rs[2];[2] = rs[1] - rs[2];[3] = rs[0] - rs[3]; +// } +.endm + +.macro MB_PRED_8BITS_ADD_DCT_16BITS_CLIP arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: pred_d[0]/[1](output), dct_q0/1, working_q0/1; + vmovl.u8 \arg4,\arg0 + vmovl.u8 \arg5,\arg1 + vadd.s16 \arg4,\arg2 + vadd.s16 \arg5,\arg3 + vqmovun.s16 \arg0,\arg4 + vqmovun.s16 \arg1,\arg5 +// } +.endm + +.macro ROW_TRANSFORM_1_STEP_TOTAL_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_d[0]~[3], output: e_d[0]~[3]; + vadd.s16 \arg4, \arg0, \arg2 //int16 e[i][0] = src[0] + src[2]; + vsub.s16 \arg5, \arg0, \arg2 //int16 e[i][1] = src[0] - src[2]; + vshr.s16 \arg6, \arg1, #1 + vshr.s16 \arg7, \arg3, #1 + vsub.s16 \arg6, \arg6, \arg3 //int16 e[i][2] = (src[1]>>1)-src[3]; + vadd.s16 \arg7, \arg1, \arg7 //int16 e[i][3] = src[1] + (src[3]>>1); +// } +.endm + +.macro TRANSFORM_TOTAL_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 // both row & col transform used +// { // output: f_q[0]~[3], input: e_q[0]~[3]; + vadd.s16 \arg0, \arg4, \arg7 //int16 f[i][0] = e[i][0] + e[i][3]; + vadd.s16 \arg1, \arg5, \arg6 //int16 f[i][1] = e[i][1] + e[i][2]; + vsub.s16 \arg2, \arg5, \arg6 //int16 f[i][2] = e[i][1] - e[i][2]; + vsub.s16 \arg3, \arg4, \arg7 //int16 f[i][3] = e[i][0] - e[i][3]; +// } +.endm + + +.macro ROW_TRANSFORM_0_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_d[0]~[3], output: e_q[0]~[3]; + vaddl.s16 \arg4, \arg0, \arg2 //int32 e[i][0] = src[0] + src[2]; + vsubl.s16 \arg5, \arg0, \arg2 //int32 e[i][1] = src[0] - src[2]; + vsubl.s16 \arg6, \arg1, \arg3 //int32 e[i][2] = src[1] - src[3]; + vaddl.s16 \arg7, \arg1, \arg3 //int32 e[i][3] = src[1] + src[3]; +// } +.endm + +.macro ROW_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 +// { // input: src_d[0]~[3], output: e_q[0]~[3]; working: \arg8 \arg9 + vaddl.s16 \arg4, \arg0, \arg2 //int32 e[i][0] = src[0] + src[2]; + vsubl.s16 \arg5, \arg0, \arg2 //int32 e[i][1] = src[0] - src[2]; + vshr.s16 \arg8, \arg1, #1 + vshr.s16 \arg9, \arg3, #1 + vsubl.s16 \arg6, \arg8, \arg3 //int32 e[i][2] = (src[1]>>1)-src[3]; + vaddl.s16 \arg7, \arg1, \arg9 //int32 e[i][3] = src[1] + (src[3]>>1); +// } +.endm + +.macro TRANSFORM_4BYTES arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 // both row & col transform used +// { // output: f_q[0]~[3], input: e_q[0]~[3]; + vadd.s32 \arg0, \arg4, \arg7 //int16 f[i][0] = e[i][0] + e[i][3]; + vadd.s32 \arg1, \arg5, \arg6 //int16 f[i][1] = e[i][1] + e[i][2]; + vsub.s32 \arg2, \arg5, \arg6 //int16 f[i][2] = e[i][1] - e[i][2]; + vsub.s32 \arg3, \arg4, \arg7 //int16 f[i][3] = e[i][0] - e[i][3]; +// } +.endm + +.macro COL_TRANSFORM_0_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_q[0]~[3], output: e_q[0]~[3]; + vadd.s32 \arg4, \arg0, \arg2 //int32 e[0][j] = f[0][j] + f[2][j]; + vsub.s32 \arg5, \arg0, \arg2 //int32 e[1][j] = f[0][j] - f[2][j]; + vsub.s32 \arg6, \arg1, \arg3 //int32 e[2][j] = (f[1][j]>>1) - f[3][j]; + vadd.s32 \arg7, \arg1, \arg3 //int32 e[3][j] = f[1][j] + (f[3][j]>>1); +// } +.endm + +.macro COL_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_q[0]~[3], output: e_q[0]~[3]; + vadd.s32 \arg4, \arg0, \arg2 //int32 e[0][j] = f[0][j] + f[2][j]; + vsub.s32 \arg5, \arg0, \arg2 //int32 e[1][j] = f[0][j] - f[2][j]; + vshr.s32 \arg6, \arg1, #1 + vshr.s32 \arg7, \arg3, #1 + vsub.s32 \arg6, \arg6, \arg3 //int32 e[2][j] = (f[1][j]>>1) - f[3][j]; + vadd.s32 \arg7, \arg1, \arg7 //int32 e[3][j] = f[1][j] + (f[3][j]>>1); +// } +.endm + + +WELS_ASM_FUNC_BEGIN WelsDctT4_neon + push {r4} + ldr r4, [sp, #4] + + LOAD_4x4_DATA_FOR_DCT d4, d5, d6, d7, r1, r2, r3, r4 + + vsubl.u8 q0, d4, d6 + vsubl.u8 q1, d5, d7 + vtrn.s32 q0, q1 + vswp d1, d2 + + // horizontal transform + DCT_ROW_TRANSFORM_TOTAL_16BITS d0, d1, d2, d3, d4, d5, d6, d7 + + // transform element + MATRIX_TRANSFORM_EACH_16BITS d0, d1, d2, d3 + + // vertical transform + DCT_ROW_TRANSFORM_TOTAL_16BITS d0, d1, d2, d3, d4, d5, d6, d7 + + // transform element + MATRIX_TRANSFORM_EACH_16BITS d0, d1, d2, d3 + + vst1.s16 {q0, q1}, [r0]! + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDctFourT4_neon + push {r4} + ldr r4, [sp, #4] + + LOAD_8x8_DATA_FOR_DCT d16, d17, d18, d19, d20, d21, d22, d23, r1, r3 + + vsubl.u8 q0, d16, d20 + vsubl.u8 q1, d17, d21 + vsubl.u8 q2, d18, d22 + vsubl.u8 q3, d19, d23 + MATRIX_TRANSFORM_EACH_16BITS q0, q1, q2, q3 + + // horizontal transform + DCT_ROW_TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + // transform element + MATRIX_TRANSFORM_EACH_16BITS q0, q1, q2, q3 + + // vertical transform + DCT_ROW_TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + vswp d1, d2 + vswp d5, d6 + vswp q1, q2 + vst1.s16 {q0, q1}, [r0]! + vst1.s16 {q2, q3}, [r0]! + + //////////////// + LOAD_8x8_DATA_FOR_DCT d16, d17, d18, d19, d20, d21, d22, d23, r1, r3 + + vsubl.u8 q0, d16, d20 + vsubl.u8 q1, d17, d21 + vsubl.u8 q2, d18, d22 + vsubl.u8 q3, d19, d23 + MATRIX_TRANSFORM_EACH_16BITS q0, q1, q2, q3 + + // horizontal transform + DCT_ROW_TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + // transform element + MATRIX_TRANSFORM_EACH_16BITS q0, q1, q2, q3 + + // vertical transform + DCT_ROW_TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + vswp d1, d2 + vswp d5, d6 + vswp q1, q2 + vst1.s16 {q0, q1}, [r0]! + vst1.s16 {q2, q3}, [r0]! + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsQuant4x4_neon + vld1.s16 {q2}, [r1] + vld1.s16 {q0, q1}, [r0] + vld1.s16 {q3}, [r2] + + vmov q8, q2 + + NEWQUANT_COEF_EACH_16BITS q0, q2, d4, d5, d6, d7, q9, q10, q11 + vst1.s16 {q2}, [r0]! + + NEWQUANT_COEF_EACH_16BITS q1, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r0]! + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsQuant4x4Dc_neon + + vld1.s16 {q0, q1}, [r0] + vdup.s16 q2, r1 // even ff range [0, 768] + vdup.s16 q3, r2 + + vmov q8, q2 + + NEWQUANT_COEF_EACH_16BITS q0, q2, d4, d5, d6, d7, q9, q10, q11 + vst1.s16 {q2}, [r0]! + + NEWQUANT_COEF_EACH_16BITS q1, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r0]! + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsQuantFour4x4_neon + vld1.s16 {q2}, [r1] + vld1.s16 {q3}, [r2] + mov r1, r0 + + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q0, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q1, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q0, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q1, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q0, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q1, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q0, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS q1, q8, d16, d17, d6, d7, q9, q10, q11 + vst1.s16 {q8}, [r1]! + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsQuantFour4x4Max_neon + vld1.s16 {q2}, [r1] + vld1.s16 {q3}, [r2] + mov r1, r0 + + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q0, q8, d16, d17, d6, d7, q9, q10, q11, d26 + vst1.s16 {q8}, [r1]! + vmov q12, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q1, q12, d24, d25, d6, d7, q9, q10, q11, d28 + vst1.s16 {q12}, [r1]! // then 1st 16 elem in d26 & d28 + + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q0, q8, d16, d17, d6, d7, q9, q10, q11, d27 + vst1.s16 {q8}, [r1]! + vmov q12, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q1, q12, d24, d25, d6, d7, q9, q10, q11, d29 + vst1.s16 {q12}, [r1]! // then 2nd 16 elem in d27 & d29 + + SELECT_MAX_IN_ABS_COEF q13, q14, q0, d0, d1 + vst1.s32 {d0[0]}, [r3]! + + /////////// + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q0, q8, d16, d17, d6, d7, q9, q10, q11, d26 + vst1.s16 {q8}, [r1]! + vmov q12, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q1, q12, d24, d25, d6, d7, q9, q10, q11, d28 + vst1.s16 {q12}, [r1]! // then 3rd 16 elem in d26 & d28 + + vld1.s16 {q0, q1}, [r0]! + vmov q8, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q0, q8, d16, d17, d6, d7, q9, q10, q11, d27 + vst1.s16 {q8}, [r1]! + vmov q12, q2 + NEWQUANT_COEF_EACH_16BITS_MAX q1, q12, d24, d25, d6, d7, q9, q10, q11, d29 + vst1.s16 {q12}, [r1]! // then 4th 16 elem in d27 & d29 + + SELECT_MAX_IN_ABS_COEF q13, q14, q0, d0, d1 + vst1.s32 {d0[0]}, [r3]! + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsHadamardT4Dc_neon + push {r2,r3} + mov r2, #64 // 2*16*sizeof(int16_t) + add r3, r1, #32 + + vld1.s16 {d0}, [r1], r2 + vld1.s16 {d1}, [r3], r2 + vld1.s16 {d4}, [r1], r2 + vld1.s16 {d5}, [r3], r2 + vld1.s16 {d2}, [r1], r2 + vld1.s16 {d3}, [r3], r2 + vld1.s16 {d6}, [r1], r2 + vld1.s16 {d7}, [r3], r2 + vtrn.16 q0, q2 // d0[0 4], d1[1 5] + vtrn.16 q1, q3 // d2[2 6], d3[3 7] + + vld1.s16 {d16}, [r1], r2 + vld1.s16 {d17}, [r3], r2 + vld1.s16 {d20}, [r1], r2 + vld1.s16 {d21}, [r3], r2 + vld1.s16 {d18}, [r1], r2 + vld1.s16 {d19}, [r3], r2 + vld1.s16 {d22}, [r1], r2 + vld1.s16 {d23}, [r3], r2 + vtrn.16 q8, q10 //d16[08 12],d17[09 13] + vtrn.16 q9, q11 //d18[10 14],d19[11 15] + + vtrn.32 q0, q8 // d0 [0 4 08 12] = dct[idx], d1[1 5 09 13] = dct[idx+16] + vtrn.32 q1, q9 // d2 [2 6 10 14] = dct[idx+64], d3[3 7 11 15] = dct[idx+80] + + ROW_TRANSFORM_0_STEP d0, d1, d3, d2, q8, q11, q10, q9 + + TRANSFORM_4BYTES q0, q1, q3, q2, q8, q11, q10, q9 + + // transform element 32bits + vtrn.s32 q0, q1 //[0 1 2 3]+[4 5 6 7]-->[0 4 2 6]+[1 5 3 7] + vtrn.s32 q2, q3 //[8 9 10 11]+[12 13 14 15]-->[8 12 10 14]+[9 13 11 15] + vswp d1, d4 //[0 4 2 6]+[8 12 10 14]-->[0 4 8 12]+[2 6 10 14] + vswp d3, d6 //[1 5 3 7]+[9 13 11 15]-->[1 5 9 13]+[3 7 11 15] + + COL_TRANSFORM_0_STEP q0, q1, q3, q2, q8, q11, q10, q9 + + TRANSFORM_4BYTES q0, q1, q3, q2, q8, q11, q10, q9 + + vrshrn.s32 d16, q0, #1 + vrshrn.s32 d17, q1, #1 + vrshrn.s32 d18, q2, #1 + vrshrn.s32 d19, q3, #1 + vst1.16 {q8, q9}, [r0] //store + + pop {r2,r3} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsHadamardQuant2x2_neon + + vdup.s16 d1, r1 //ff + vdup.s16 d2, r2 //mf + veor d3, d3 + + mov r1, #32 + mov r2, r0 + + vld1.s16 {d0[0]}, [r0], r1 //rs[00] + vst1.s16 {d3[0]}, [r2], r1 //rs[00]=0 + vld1.s16 {d0[1]}, [r0], r1 //rs[16] + vst1.s16 {d3[0]}, [r2], r1 //rs[16]=0 + vld1.s16 {d0[2]}, [r0], r1 //rs[32] + vst1.s16 {d3[0]}, [r2], r1 //rs[32]=0 + vld1.s16 {d0[3]}, [r0], r1 //rs[48] + vst1.s16 {d3[0]}, [r2], r1 //rs[48]=0 + + HDM_QUANT_2x2_TOTAL_16BITS d0, d4, d5 // output d5 + + HDM_QUANT_2x2_TOTAL_16BITS d5, d4, d0 // output d0 + + QUANT_DUALWORD_COEF_EACH_16BITS d0, d1, d2, d3, q2 + + vst1.s16 d1, [r3] // store to dct + ldr r2, [sp, #0] + vst1.s16 d1, [r2] // store to block + + mov r1, #1 + vdup.s16 d3, r1 + DC_ZERO_COUNT_IN_DUALWORD d1, d0, d3 + + vmov r0, r1, d0 + and r0, #0x07 // range [0~4] + rsb r0, #4 +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsHadamardQuant2x2SkipKernel_neon + + vdup.s16 d3, r1 + mov r1, #32 + vld1.s16 {d0[0]}, [r0], r1 //rs[00] + vld1.s16 {d0[1]}, [r0], r1 //rs[16] + vld1.s16 {d0[2]}, [r0], r1 //rs[32] + vld1.s16 {d0[3]}, [r0], r1 //rs[48] + + HDM_QUANT_2x2_TOTAL_16BITS d0, d1, d2 // output d2 + + HDM_QUANT_2x2_TOTAL_16BITS d2, d1, d0 // output d0 + + vabs.s16 d1, d0 + vcgt.s16 d1, d1, d3 // abs(dct[i])>threshold; + vmov r0, r1, d1 + orr r0, r1 +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsGetNoneZeroCount_neon + push {r1} + vld1.s16 {q0, q1}, [r0] + vmov.s16 q8, #1 + + ZERO_COUNT_IN_2_QUARWORD q0, q1, q8, d0, d1, d2, d3 + vmov r0, r1, d0 + and r0, #0x1F // range [0~16] + rsb r0, #16 + pop {r1} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDequant4x4_neon + vld1.s16 {q0, q1}, [r0] + vld1.u16 {q2}, [r1] + + vmul.s16 q8, q0, q2 + vmul.s16 q9, q1, q2 + + vst1.s16 {q8, q9}, [r0] +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDequantFour4x4_neon + vld1.u16 {q12}, [r1] + mov r1, r0 + vld1.s16 {q0, q1}, [r0]! + vld1.s16 {q2, q3}, [r0]! + vmul.s16 q0, q0, q12 + vld1.s16 {q8, q9}, [r0]! + vmul.s16 q1, q1, q12 + vld1.s16 {q10, q11}, [r0]! + + vst1.s16 {q0, q1}, [r1]! + + vmul.s16 q2, q2, q12 + vmul.s16 q3, q3, q12 + vmul.s16 q8, q8, q12 + vst1.s16 {q2, q3}, [r1]! + + vmul.s16 q9, q9, q12 + vmul.s16 q10, q10, q12 + vmul.s16 q11, q11, q12 + vst1.s16 {q8, q9}, [r1]! + vst1.s16 {q10, q11}, [r1]! + +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsDequantIHadamard4x4_neon + + vld1.s16 {q0, q1}, [r0] + vdup.s16 q8, r1 + + IHDM_4x4_TOTAL_16BITS q0, q2, q3 + IHDM_4x4_TOTAL_16BITS q1, q2, q3 + + MATRIX_TRANSFORM_EACH_16BITS d0, d1, d2, d3 + + IHDM_4x4_TOTAL_16BITS q0, q2, q3 + vmul.s16 q0, q8 + + IHDM_4x4_TOTAL_16BITS q1, q2, q3 + vmul.s16 q1, q8 + + MATRIX_TRANSFORM_EACH_16BITS d0, d1, d2, d3 + vst1.s16 {q0, q1}, [r0] +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIDctT4Rec_neon + vld1.u32 {d16[0]}, [r2], r3 + push {r4} + ldr r4, [sp, #4] + vld1.u32 {d16[1]}, [r2], r3 + + vld4.s16 {d0, d1, d2, d3}, [r4] // cost 3 cycles! + vld1.u32 {d17[0]}, [r2], r3 + vld1.u32 {d17[1]}, [r2], r3 // q7 is pred + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS d0, d1, d2, d3, d4, d5, d6, d7 + + TRANSFORM_TOTAL_16BITS d0, d1, d2, d3, d4, d5, d6, d7 + + MATRIX_TRANSFORM_EACH_16BITS d0, d1, d2, d3 + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS d0, d1, d2, d3, d4, d5, d6, d7 + + TRANSFORM_TOTAL_16BITS d0, d1, d2, d3, d4, d5, d6, d7 + vrshr.s16 d0, d0, #6 + vrshr.s16 d1, d1, #6 + vrshr.s16 d2, d2, #6 + vrshr.s16 d3, d3, #6 + + //after rounding 6, clip into [0, 255] + vmovl.u8 q2,d16 + vadd.s16 q0,q2 + vqmovun.s16 d16,q0 + vst1.32 {d16[0]},[r0],r1 + vst1.32 {d16[1]},[r0],r1 + + vmovl.u8 q2,d17 + vadd.s16 q1,q2 + vqmovun.s16 d17,q1 + vst1.32 {d17[0]},[r0],r1 + vst1.32 {d17[1]},[r0] + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIDctFourT4Rec_neon + + vld1.u64 {d24}, [r2], r3 + push {r4} + ldr r4, [sp, #4] + vld1.u64 {d25}, [r2], r3 + + vld4.s16 {d0, d1, d2, d3}, [r4]! // cost 3 cycles! + vld1.u64 {d26}, [r2], r3 + vld1.u64 {d27}, [r2], r3 + vld4.s16 {d4, d5, d6, d7}, [r4]! // cost 3 cycles! + vswp d1, d4 + vswp d3, d6 + vswp q1, q2 // q0~q3 + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + MATRIX_TRANSFORM_EACH_16BITS q0, q1, q2, q3 + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + vrshr.s16 q0, q0, #6 + vrshr.s16 q1, q1, #6 + vrshr.s16 q2, q2, #6 + vrshr.s16 q3, q3, #6 + + //after rounding 6, clip into [0, 255] + vmovl.u8 q8,d24 + vadd.s16 q0,q8 + vqmovun.s16 d24,q0 + vst1.u8 {d24},[r0],r1 + + vmovl.u8 q8,d25 + vadd.s16 q1,q8 + vqmovun.s16 d25,q1 + vst1.u8 {d25},[r0],r1 + + vmovl.u8 q8,d26 + vadd.s16 q2,q8 + vqmovun.s16 d26,q2 + vst1.u8 {d26},[r0],r1 + + vmovl.u8 q8,d27 + vadd.s16 q3,q8 + vqmovun.s16 d27,q3 + vst1.u8 {d27},[r0],r1 + + vld1.u64 {d24}, [r2], r3 + vld1.u64 {d25}, [r2], r3 + + vld4.s16 {d0, d1, d2, d3}, [r4]! // cost 3 cycles! + vld1.u64 {d26}, [r2], r3 + vld1.u64 {d27}, [r2], r3 + vld4.s16 {d4, d5, d6, d7}, [r4]! // cost 3 cycles! + vswp d1, d4 + vswp d3, d6 + vswp q1, q2 // q0~q3 + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + MATRIX_TRANSFORM_EACH_16BITS q0, q1, q2, q3 + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + + TRANSFORM_TOTAL_16BITS q0, q1, q2, q3, q8, q9, q10, q11 + vrshr.s16 q0, q0, #6 + vrshr.s16 q1, q1, #6 + vrshr.s16 q2, q2, #6 + vrshr.s16 q3, q3, #6 + + //after rounding 6, clip into [0, 255] + vmovl.u8 q8,d24 + vadd.s16 q0,q8 + vqmovun.s16 d24,q0 + vst1.u8 {d24},[r0],r1 + + vmovl.u8 q8,d25 + vadd.s16 q1,q8 + vqmovun.s16 d25,q1 + vst1.u8 {d25},[r0],r1 + + vmovl.u8 q8,d26 + vadd.s16 q2,q8 + vqmovun.s16 d26,q2 + vst1.u8 {d26},[r0],r1 + + vmovl.u8 q8,d27 + vadd.s16 q3,q8 + vqmovun.s16 d27,q3 + vst1.u8 {d27},[r0],r1 + + pop {r4} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsIDctRecI16x16Dc_neon + push {r4} + ldr r4, [sp, #4] + + vld1.s16 {q8,q9}, [r4] + vrshr.s16 q8, q8, #6 + vrshr.s16 q9, q9, #6 + + vdup.s16 d20, d16[0] + vdup.s16 d21, d16[1] + vdup.s16 d22, d16[2] + vdup.s16 d23, d16[3] + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vdup.s16 d20, d17[0] + vdup.s16 d21, d17[1] + vdup.s16 d22, d17[2] + vdup.s16 d23, d17[3] + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vdup.s16 d20, d18[0] + vdup.s16 d21, d18[1] + vdup.s16 d22, d18[2] + vdup.s16 d23, d18[3] + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vdup.s16 d20, d19[0] + vdup.s16 d21, d19[1] + vdup.s16 d22, d19[2] + vdup.s16 d23, d19[3] + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + vld1.u8 {q0}, [r2], r3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP d0, d1, q10, q11, q12, q13 + vst1.u8 {q0}, [r0], r1 + + pop {r4} +WELS_ASM_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/svc_motion_estimation.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/svc_motion_estimation.S new file mode 100644 index 000000000..1f47a7c43 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm/svc_motion_estimation.S @@ -0,0 +1,366 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + + +WELS_ASM_FUNC_BEGIN SumOf8x8SingleBlock_neon + vld1.64 {d0}, [r0], r1 + vld1.64 {d1}, [r0], r1 + vld1.64 {d2}, [r0], r1 + vld1.64 {d3}, [r0], r1 + vld1.64 {d4}, [r0], r1 + vld1.64 {d5}, [r0], r1 + vld1.64 {d6}, [r0], r1 + vld1.64 {d7}, [r0] + vpaddl.u8 q0, q0 + vpadal.u8 q0, q1 + vpadal.u8 q0, q2 + vpadal.u8 q0, q3 + + vpaddl.u16 q0, q0 + vpadd.i32 d0, d1 + vpadd.i32 d0, d0 + vmov r0, r1, d0 +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN SumOf16x16SingleBlock_neon + vld1.64 {q0}, [r0], r1 + vpaddl.u8 q0, q0 +.rept 15 + vld1.64 {q1}, [r0], r1 + vpadal.u8 q0, q1 +.endr + vpaddl.u16 q0, q0 + vpadd.i32 d0, d1 + vpadd.i32 d0, d0 + vmov r0, r1, d0 +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN SumOf8x8BlockOfFrame_neon +//(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,const int32_t kiRefStride,uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) + stmdb sp!, {r4-r12} + ldr r5, [sp, #40] //pTimesOfFeatureValue + ldr r4, [sp, #36] //pFeatureOfBlock + + mov r8, r0 + mov r6, r1 + add r8, r6 + add r4, r4, r6, lsl #1 + + mov r7, r6 +_width_loop8x8_1: + subs r0, r8, r7 + vld1.64 {d0}, [r0], r3 + vld1.64 {d1}, [r0], r3 + vld1.64 {d2}, [r0], r3 + vld1.64 {d3}, [r0], r3 + vld1.64 {d4}, [r0], r3 + vld1.64 {d5}, [r0], r3 + vld1.64 {d6}, [r0], r3 + vld1.64 {d7}, [r0] + + vpaddl.u8 q0, q0 + vpadal.u8 q0, q1 + vpadal.u8 q0, q2 + vpadal.u8 q0, q3 + vpaddl.u16 q0, q0 + vpadd.i32 d0, d1 + vpadd.i32 d0, d0 + + subs r1, r4, r7, lsl #1 + vst1.16 {d0[0]}, [r1] // sum -> pFeatureOfBlock[i] + vmov r0, r1, d0 + add r1, r5, r0, lsl #2 + ldr r0, [r1] + add r0, #1 + str r0, [r1] + + subs r7, #1 + bne _width_loop8x8_1 + + add r8, r3 + add r4, r4, r6, lsl #1 + subs r2, #1 + beq _SumOf8x8BlockOfFrame_end + + +_height_loop8x8: + mov r7, r6 +_width_loop8x8_2: + subs r0, r8, r7 + subs r1, r4, r7, lsl #1 + + subs r9, r1, r6, lsl #1 // last line of pFeatureOfBlock[i] + ldrh r10, [r9] // sum of last line of pFeatureOfBlock[i] + + subs r11, r0, r3 + vld1.64 {d1}, [r11] + add r0, r11, r3, lsl #3 + vld1.64 {d0}, [r0] // + + vpaddl.u8 q0, q0 + vpadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + vmov r11, r12, d0 + subs r10, r12 + add r0, r10, r11 + + strh r0, [r1] // sum -> pFeatureOfBlock[i] + + add r1, r5, r0, lsl #2 + ldr r0, [r1] + add r0, #1 + str r0, [r1] + subs r7, #1 + bne _width_loop8x8_2 + + add r8, r3 + add r4, r4, r6, lsl #1 + subs r2, #1 + bne _height_loop8x8 +_SumOf8x8BlockOfFrame_end: + ldmia sp!, {r4-r12} +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN SumOf16x16BlockOfFrame_neon +//(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,const int32_t kiRefStride,uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) + stmdb sp!, {r4-r12} + ldr r5, [sp, #40] //pTimesOfFeatureValue + ldr r4, [sp, #36] //pFeatureOfBlock + + mov r8, r0 + mov r6, r1 + add r8, r6 + add r4, r4, r6, lsl #1 + + mov r7, r6 +_width_loop16x16_1: + subs r0, r8, r7 + vld1.64 {q0}, [r0], r3 + vpaddl.u8 q0, q0 +.rept 15 + vld1.64 {q1}, [r0], r3 + vpadal.u8 q0, q1 +.endr + vpaddl.u16 q0, q0 + vpadd.i32 d0, d1 + vpadd.i32 d0, d0 + + subs r1, r4, r7, lsl #1 + vst1.16 {d0[0]}, [r1] // sum -> pFeatureOfBlock[i] + vmov r0, r1, d0 + add r1, r5, r0, lsl #2 + ldr r0, [r1] + add r0, #1 + str r0, [r1] + + subs r7, #1 + bne _width_loop16x16_1 + add r8, r3 + add r4, r4, r6, lsl #1 + subs r2, #1 + beq _SumOf16x16BlockOfFrame_neon_end + +_height_loop16x16: + mov r7, r6 +_width_loop16x16_2: + subs r0, r8, r7 + subs r1, r4, r7, lsl #1 + subs r9, r1, r6, lsl #1 // last line of pFeatureOfBlock[i] + ldrh r10, [r9] // sum of last line of pFeatureOfBlock[i] + + subs r11, r0, r3 + vld1.64 {q1}, [r11] + add r0, r11, r3, lsl #4 + vld1.64 {q0}, [r0] // + + vpaddl.u8 q0, q0 + vpaddl.u8 q1, q1 + vpadd.u16 d0, d0, d1 + vpadd.u16 d1, d2, d3 + vpadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + + vmov r11, r12, d0 + subs r10, r12 + add r0, r10, r11 + + strh r0, [r1] // sum -> pFeatureOfBlock[i] + add r1, r5, r0, lsl #2 + ldr r0, [r1] + add r0, #1 + str r0, [r1] + + subs r7, #1 + bne _width_loop16x16_2 + + add r8, r3 + add r4, r4, r6, lsl #1 + subs r2, #1 + bne _height_loop16x16 +_SumOf16x16BlockOfFrame_neon_end: + ldmia sp!, {r4-r12} +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN InitializeHashforFeature_neon +// (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); + stmdb sp!, {r4-r7} + ldr r4, [sp, #16] //pFeatureValuePointerList + bic r5, r2, #3 +_hash_assign_loop_x4: + vld1.64 {q0}, [r0]! + vshl.u32 q0, q0, #2 + vceq.u32 q1, q0, #0 + vand.i32 d2, d2, d3 + vmov r6, r7, d2 + and r6, r6, r7 + cmp r6, #0xffffffff + beq _hash_assign_with_copy_x4 + + veor q1, q1 + vext.32 q2, q1, q0, #3 + vext.32 q3, q1, q0, #2 + vext.32 q4, q1, q0, #1 + vadd.u32 q0, q0, q2 + vadd.u32 q0, q0, q3 + vadd.u32 q0, q0, q4 + vext.32 q2, q1, q0, #3 + vdup.32 q3, r1 + vadd.u32 q2, q2, q3 + vst1.64 {q2}, [r3]! + vst1.64 {q2}, [r4]! + vmov.32 r6, d1[1] + add r1, r1, r6 + b _assign_next + +_hash_assign_with_copy_x4: + vdup.32 q2, r1 + vst1.64 {q2}, [r3]! + vst1.64 {q2}, [r4]! + +_assign_next: + subs r5, r5, #4 + bne _hash_assign_loop_x4 + + and r5, r2, #3 + cmp r5, #0 + beq _hash_assign_end +_hash_assign_loop_x4_rem: + str r1, [r3], #4 + str r1, [r4], #4 + ldr r7, [r0], #4 + lsl r7, r7, #2 + add r1, r1, r7 + subs r5, r5, #1 + bne _hash_assign_loop_x4_rem +_hash_assign_end: + + ldmia sp!, {r4-r7} +WELS_ASM_FUNC_END + +.align 4 +mv_x_inc_x4: .short 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00 +mv_y_inc_x4: .short 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00 +mx_x_offset_x4: .short 0x00, 0x04, 0x08, 0x0c, 0x00, 0x00, 0x00, 0x00 + +WELS_ASM_FUNC_BEGIN FillQpelLocationByFeatureValue_neon +// void (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList) + stmdb sp!, {r4-r8} + vpush {q4-q7} + adr r7, mv_x_inc_x4 + vld1.64 {q7}, [r7] + adr r7, mv_y_inc_x4 + vld1.64 {q6}, [r7] + adr r7, mx_x_offset_x4 + vld1.64 {q5}, [r7] + veor q4, q4 + veor q3, q3 + vdup.32 q8, r3 +_hash_height_loop: + mov r7, r1 + vmov q2, q5 //mx_x_offset_x4 +_hash_width_loop: + vld1.64 {d0}, [r0]! + vshll.u16 q0, d0, #2 + vadd.u32 q0, q8 + vmov q1, q2 + vmov q4, q3 + vzip.16 q1, q4 + + vmov.32 r4, d0[0] + ldr r5, [r4] + vmov.32 r6, d2[0] + str r6, [r5] + add r5, r5, #4 + pld [r5] // cache miss? + str r5, [r4] + + vmov.32 r4, d0[1] + ldr r5, [r4] + vmov.32 r6, d2[1] + str r6, [r5] + add r5, r5, #4 + pld [r5] // cache miss? + str r5, [r4] + + vmov.32 r4, d1[0] + ldr r5, [r4] + vmov.32 r6, d3[0] + str r6, [r5] + add r5, r5, #4 + pld [r5] // cache miss? + str r5, [r4] + + vmov.32 r4, d1[1] + ldr r5, [r4] + vmov.32 r6, d3[1] + str r6, [r5] + add r5, r5, #4 + pld [r5] // cache miss? + str r5, [r4] + + vadd.u16 q2, q2, q7 + subs r7, #4 + bne _hash_width_loop + + vadd.u16 q3, q3, q6 + subs r2, #1 + bne _hash_height_loop + + vpop {q4-q7} + ldmia sp!, {r4-r8} +WELS_ASM_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/intra_pred_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/intra_pred_aarch64_neon.S new file mode 100644 index 000000000..ef50027d6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/intra_pred_aarch64_neon.S @@ -0,0 +1,505 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +// for Luma 4x4 +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredH_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, #1 +.rept 4 + ld1r {v0.8b}, [x3], x2 + st1 {v0.S}[0], [x0], 4 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredDc_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + sub x4, x1, #1 + ldr s0, [x3] + ld1 {v0.b}[4], [x4], x2 + ld1 {v0.b}[5], [x4], x2 + ld1 {v0.b}[6], [x4], x2 + ld1 {v0.b}[7], [x4] + uaddlv h0, v0.8b + uqrshrn b0, h0, #3 + dup v0.8b, v0.b[0] +.rept 4 + st1 {v0.S}[0], [x0], 4 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredDcTop_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + sub v0.8b, v0.8b, v0.8b + ldr s0, [x3] + uaddlv h0, v0.8b + uqrshrn v0.8b, v0.8h, #2 + dup v0.8b, v0.b[0] +.rept 4 + st1 {v0.S}[0], [x0], 4 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredDDL_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.8b}, [x3] + dup v1.8b, v0.b[7] + ext v2.8b, v0.8b, v1.8b, #1 + ext v3.8b, v0.8b, v1.8b, #2 + ushll v2.8h, v2.8b, #1 + uaddl v1.8h, v3.8b, v0.8b + add v1.8h, v1.8h, v2.8h + uqrshrn v1.8b, v1.8h, #2 + st1 {v1.S}[0], [x0], 4 + ext v0.8b, v1.8b, v2.8b, #1 + st1 {v0.S}[0], [x0], 4 + ext v0.8b, v1.8b, v2.8b, #2 + st1 {v0.S}[0], [x0], 4 + ext v0.8b, v1.8b, v2.8b, #3 + st1 {v0.S}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredDDLTop_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.8b}, [x3] + dup v1.8b, v0.b[3] + mov v0.S[1], v1.S[0] + ext v2.8b, v0.8b, v1.8b, #1 + ext v3.8b, v0.8b, v1.8b, #2 + ushll v2.8h, v2.8b, #1 + uaddl v1.8h, v3.8b, v0.8b + add v1.8h, v1.8h, v2.8h + uqrshrn v1.8b, v1.8h, #2 + st1 {v1.S}[0], [x0], 4 + ext v0.8b, v1.8b, v2.8b, #1 + st1 {v0.S}[0], [x0], 4 + ext v0.8b, v1.8b, v2.8b, #2 + st1 {v0.S}[0], [x0], 4 + ext v0.8b, v1.8b, v2.8b, #3 + st1 {v0.S}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredVL_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.8b}, [x3] + ext v1.8b, v0.8b, v0.8b, #1 + uaddl v1.8h, v1.8b, v0.8b + uqrshrn v0.8b, v1.8h, #1 // v0.8b is VL0, VL1, VL2, VL3, VL4, ... + ext v2.16b, v1.16b, v1.16b, #2 + add v1.8h, v2.8h, v1.8h + uqrshrn v1.8b, v1.8h, #2 // v1.8b is VL5, VL6, VL7, VL8, VL9 + st1 {v0.s}[0], [x0], 4 // write the first row + st1 {v1.s}[0], [x0], 4 // write the second row + ext v3.8b, v0.8b, v0.8b, #1 + ext v2.8b, v1.8b, v1.8b, #1 + st1 {v3.s}[0], [x0], 4 // write the third row + st1 {v2.s}[0], [x0] // write the fourth row +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredVLTop_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.8b}, [x3] + dup v1.8b, v0.b[3] + mov v0.s[1], v1.s[0] + ext v1.8b, v0.8b, v0.8b, #1 + uaddl v1.8h, v1.8b, v0.8b + uqrshrn v0.8b, v1.8h, #1 // v0.8b is VL0, VL1, VL2, VL3, VL4, ... + ext v2.16b, v1.16b, v1.16b, #2 + add v1.8h, v2.8h, v1.8h + uqrshrn v1.8b, v1.8h, #2 // v1.8b is VL5, VL6, VL7, VL8, VL9 + st1 {v0.s}[0], [x0], 4 // write the first row + st1 {v1.s}[0], [x0], 4 // write the second row + ext v3.8b, v0.8b, v0.8b, #1 + ext v2.8b, v1.8b, v1.8b, #1 + st1 {v3.s}[0], [x0], 4 // write the third row + st1 {v2.s}[0], [x0] // write the fourth row +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredVR_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.s}[1], [x3] + sub x3, x3, #1 + ld1 {v0.b}[3], [x3], x2 + ld1 {v0.b}[2], [x3], x2 + ld1 {v0.b}[1], [x3], x2 + ld1 {v0.b}[0], [x3] // v0.8b l2, l1, l0, lt, t0, t1, t2, t3 + + ext v1.8b, v0.8b, v0.8b, #7 + uaddl v2.8h, v1.8b, v0.8b //v2:{X,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2,T2+T3} + ext v1.16b, v2.16b, v2.16b, #14 + add v3.8h, v2.8h, v1.8h //v3:{X,L2+L1+L1+L0,L1+L0+L0+LT,...T1+T2+T2+T3} + + uqrshrn v3.8b, v3.8h, #2 + uqrshrn v2.8b, v2.8h, #1 + + st1 {v2.s}[1], [x0], 4 + st1 {v3.s}[1], [x0], 4 + + ext v2.8b, v2.8b, v2.8b, #7 + ins v2.b[4], v3.b[3] + st1 {v2.s}[1], [x0], 4 + + ext v3.8b, v3.8b, v3.8b, #7 + ins v3.b[4], v3.b[3] + st1 {v3.s}[1], [x0] + +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredHU_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, #1 + mov x4, #3 + mul x4, x4, x2 + add x4, x4, x3 + ld1r {v0.8b}, [x4] + ld1 {v0.b}[4], [x3], x2 + ld1 {v0.b}[5], [x3], x2 + ld1 {v0.b}[6], [x3], x2 //d0:{L3,L3,L3,L3,L0,L1,L2,L3} + + ext v1.8b, v0.8b, v0.8b, #1 + uaddl v2.8h, v0.8b, v1.8b //v2:{L3+L3,L3+L3,L3+L3,L3+L0,L0+L1,L1+L2,L2+L3,L3+L3} + ext v3.16b, v2.16b, v2.16b, #2 + add v3.8h, v3.8h, v2.8h //v2:{x, HU1, HU3, HU5, x} + + uqrshrn v2.8b, v2.8h, #1 // HU0, HU2, HU4 + uqrshrn v3.8b, v3.8h, #2 // HU1, HU3, HU5 + zip2 v3.8b, v2.8b, v3.8b // HU0, HU1, HU2, HU3, HU4, HU5 + mov v3.h[3], v0.h[0] // v0.8b is hu0, hu1, hu2, hu3, hu4, hu5, l3, l3 + ext v2.8b, v3.8b, v0.8b, #2 + st1 {v3.s}[0], [x0], 4 + st1 {v2.s}[0], [x0], 4 + st1 {v3.s}[1], [x0], 4 + st1 {v0.s}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI4x4LumaPredHD_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, #1 + sub x3, x3, x2 // x2 points to top left + ld1 {v0.s}[1], [x3], x2 + ld1 {v0.b}[3], [x3], x2 + ld1 {v0.b}[2], [x3], x2 + ld1 {v0.b}[1], [x3], x2 + ld1 {v0.b}[0], [x3] // v0.8b: l3, l2, l1, l0, lt, t0, t1, t2 + ext v1.8b, v0.8b, v0.8b, #1 // v1.8b: l2, l1, l0, lt, t0, t1, t2, l3 + uaddl v2.8h, v0.8b, v1.8b + ext v3.16b, v2.16b, v2.16b, #2 + add v3.8h, v3.8h, v2.8h + uqrshrn v2.8b, v2.8h, #1 // hd8, hd6, hd4, hd0, xxx + uqrshrn v3.8b, v3.8h, #2 // hd9, hd7, hd5, hd1, hd2, hd3 + zip1 v2.8b, v2.8b, v3.8b // hd8, hd9, hd6, hd7, hd4, hd5, hd0, hd1 + mov v1.h[0], v3.h[2] + ext v3.8b, v2.8b, v1.8b, #6 + st1 {v3.s}[0], [x0], 4 + st1 {v2.s}[1], [x0], 4 + ext v3.8b, v2.8b, v1.8b, #2 + st1 {v3.s}[0], [x0], 4 + st1 {v2.s}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +// for Chroma 8x8 +WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredV_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.8b}, [x3] +.rept 8 + st1 {v0.8b}, [x0], 8 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredH_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, #1 +.rept 8 + ld1r {v0.8b}, [x3], x2 + st1 {v0.8b}, [x0], 8 +.endr +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredDc_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + sub x4, x1, #1 + ld1 {v0.8b}, [x3] + ld1 {v0.b}[8], [x4], x2 + ld1 {v0.b}[9], [x4], x2 + ld1 {v0.b}[10], [x4], x2 + ld1 {v0.b}[11], [x4], x2 + ld1 {v0.b}[12], [x4], x2 + ld1 {v0.b}[13], [x4], x2 + ld1 {v0.b}[14], [x4], x2 + ld1 {v0.b}[15], [x4] + + uaddlp v1.8h, v0.16b + uaddlp v2.4s, v1.8h + ins v3.d[0], v2.d[1] + add v3.2s, v2.2s, v3.2s + urshr v2.4s, v2.4s, #2 + urshr v3.2s, v3.2s, #3 + + dup v0.8b, v3.b[0] + dup v1.8b, v2.b[4] + dup v2.8b, v2.b[12] + dup v3.8b, v3.b[4] + ins v0.s[1], v1.s[0] + ins v2.s[1], v3.s[0] +.rept 4 + st1 {v0.8b}, [x0], 8 +.endr +.rept 4 + st1 {v2.8b}, [x0], 8 +.endr + +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredDcTop_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.8b}, [x3] + uaddlp v0.4h, v0.8b + addp v0.8h, v0.8h, v0.8h + dup v1.8h, v0.h[0] + dup v2.8h, v0.h[1] + mov v1.D[1], v2.D[0] + uqrshrn v1.8b, v1.8h, #2 +.rept 8 + st1 {v1.8b}, [x0], 8 +.endr +WELS_ASM_AARCH64_FUNC_END + +.align 4 +intra_1_to_4: .short 17*1, 17*2, 17*3, 17*4, 17*1, 17*2, 17*3, 17*4 +intra_m3_to_p4: .short -3, -2, -1, 0, 1, 2, 3, 4 + +WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredPlane_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + sub x3, x3, #1 + mov x4, x3 + // load pTop[2-i] and pLeft[(2-i)*kiStride] + ld1 {v1.b}[3], [x3], #1 + ld1 {v1.b}[2], [x3], #1 + ld1 {v1.b}[1], [x3], #1 + ld1 {v1.b}[0], [x3], #1 + ld1 {v1.b}[7], [x4], x2 + ld1 {v1.b}[6], [x4], x2 + ld1 {v1.b}[5], [x4], x2 + ld1 {v1.b}[4], [x4], x2 + add x3, x3, #1 + add x4, x4, x2 + // load pTop[4+i] and pLeft[(4+i)*kiStride] + ld1 {v0.b}[0], [x3], #1 + ld1 {v0.b}[1], [x3], #1 + ld1 {v0.b}[2], [x3], #1 + ld1 {v0.b}[3], [x3], #1 + ld1 {v0.b}[4], [x4], x2 + ld1 {v0.b}[5], [x4], x2 + ld1 {v0.b}[6], [x4], x2 + ld1 {v0.b}[7], [x4], x2 + + uxtl v1.8h, v1.8b + uxtl v0.8h, v0.8b + ldr q2, intra_1_to_4 + ldr q3, intra_m3_to_p4 + dup v4.8h, v0.h[3] + dup v5.8h, v0.h[7] + add v4.8h, v4.8h, v5.8h + sub v0.8h, v0.8h, v1.8h + shl v4.8h, v4.8h, #4 // v4.8h is a + mul v0.8h, v0.8h, v2.8h // v0.h[0-3] is H, v0.h[4-7] is V + saddlp v0.4s, v0.8h + addp v0.4s, v0.4s, v0.4s // v0.s[0] is H, v0.s[1] is V + sqrshrn v0.4h, v0.4s, #5 + dup v1.8h, v0.h[0] // v1.8h is b + dup v0.8h, v0.h[1] // v0.8h is c + mla v4.8h, v1.8h, v3.8h + mla v4.8h, v0.8h, v3.h[0] + sqrshrun v1.8b, v4.8h, #5 + st1 {v1.8b}, [x0], 8 +.rept 7 + add v4.8h, v4.8h, v0.8h + sqrshrun v1.8b, v4.8h, #5 + st1 {v1.8b}, [x0], 8 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredDc_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + sub x4, x1, #1 + ld1 {v0.16b}, [x3] + ld1 {v1.b}[0], [x4], x2 + ld1 {v1.b}[1], [x4], x2 + ld1 {v1.b}[2], [x4], x2 + ld1 {v1.b}[3], [x4], x2 + ld1 {v1.b}[4], [x4], x2 + ld1 {v1.b}[5], [x4], x2 + ld1 {v1.b}[6], [x4], x2 + ld1 {v1.b}[7], [x4], x2 + ld1 {v1.b}[8], [x4], x2 + ld1 {v1.b}[9], [x4], x2 + ld1 {v1.b}[10], [x4], x2 + ld1 {v1.b}[11], [x4], x2 + ld1 {v1.b}[12], [x4], x2 + ld1 {v1.b}[13], [x4], x2 + ld1 {v1.b}[14], [x4], x2 + ld1 {v1.b}[15], [x4] + // reduce instruction + uaddlv h0, v0.16b + uaddlv h1, v1.16b + add v0.8h, v0.8h, v1.8h + uqrshrn b0, h0, #5 + dup v0.16b, v0.b[0] +.rept 16 + st1 {v0.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredDcTop_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + ld1 {v0.16b}, [x3] + // reduce instruction + uaddlv h0, v0.16b + uqrshrn v0.8b, v0.8h, 4 + dup v0.16b, v0.b[0] +.rept 16 + st1 {v0.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredDcLeft_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, #1 + ld1 {v1.b}[0], [x3], x2 + ld1 {v1.b}[1], [x3], x2 + ld1 {v1.b}[2], [x3], x2 + ld1 {v1.b}[3], [x3], x2 + ld1 {v1.b}[4], [x3], x2 + ld1 {v1.b}[5], [x3], x2 + ld1 {v1.b}[6], [x3], x2 + ld1 {v1.b}[7], [x3], x2 + ld1 {v1.b}[8], [x3], x2 + ld1 {v1.b}[9], [x3], x2 + ld1 {v1.b}[10], [x3], x2 + ld1 {v1.b}[11], [x3], x2 + ld1 {v1.b}[12], [x3], x2 + ld1 {v1.b}[13], [x3], x2 + ld1 {v1.b}[14], [x3], x2 + ld1 {v1.b}[15], [x3] + // reduce instruction + uaddlv h1, v1.16b + uqrshrn v0.8b, v1.8h, #4 + dup v0.16b, v0.b[0] +.rept 16 + st1 {v0.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END + + +.align 4 +intra_1_to_8: .short 5, 10, 15, 20, 25, 30, 35, 40 +intra_m7_to_p8: .short -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8 +//void WelsI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredPlane_AArch64_neon + SIGN_EXTENSION x2,w2 + sub x3, x1, x2 + sub x3, x3, #1 + mov x4, x3 + ld1 {v0.8b}, [x4] // v0 low 8 bit in top(reverse order) + add x4, x4, #9 + rev64 v0.8b, v0.8b // reverse v0 + ld1 {v1.8b}, [x4] // v1 high 8 bit in top + uxtl v0.8h, v0.8b // extend to 16 bit integer + uxtl v1.8h, v1.8b // extend to 16 bit integer + ld1 {v2.b}[7], [x3], x2 + ld1 {v2.b}[6], [x3], x2 + ld1 {v2.b}[5], [x3], x2 + ld1 {v2.b}[4], [x3], x2 + ld1 {v2.b}[3], [x3], x2 + ld1 {v2.b}[2], [x3], x2 + ld1 {v2.b}[1], [x3], x2 + ld1 {v2.b}[0], [x3], x2 // v2.8b low 8 bit in left + add x3, x3, x2 + ld1 {v3.b}[0], [x3], x2 + ld1 {v3.b}[1], [x3], x2 + ld1 {v3.b}[2], [x3], x2 + ld1 {v3.b}[3], [x3], x2 + ld1 {v3.b}[4], [x3], x2 + ld1 {v3.b}[5], [x3], x2 + ld1 {v3.b}[6], [x3], x2 + ld1 {v3.b}[7], [x3] // v3.8b high 8bit in left + uxtl v2.8h, v2.8b + uxtl v3.8h, v3.8b + sub v0.8h, v1.8h, v0.8h + sub v2.8h, v3.8h, v2.8h + ldr q4, intra_1_to_8 + mul v0.8h, v0.8h, v4.8h + mul v2.8h, v2.8h, v4.8h + saddlv s0, v0.8h + saddlv s2, v2.8h + add v1.8h, v1.8h, v3.8h + sqrshrn v0.4h, v0.4S, #6 // b is in v0.h[0] + sqrshrn v2.4h, v2.4S, #6 // c is in v2.h[0] + shl v1.8h, v1.8h, #4 // a is in v1.h[7] + ldr q4, intra_m7_to_p8 + ldr q5, intra_m7_to_p8 + 16 + dup v1.8h, v1.h[7] + dup v3.8h, v1.h[7] + mla v1.8h, v4.8h, v0.h[0] + mla v3.8h, v5.8h, v0.h[0] + dup v2.8h, v2.h[0] // v2.8h is [cccccccc] + mla v1.8h, v2.8h, v4.h[0] + mla v3.8h, v2.8h, v4.h[0] + sqrshrun v4.8b, v1.8h, #5 + sqrshrun2 v4.16b, v3.8h, #5 + st1 {v4.16b}, [x0], 16 +.rept 15 + add v1.8h, v1.8h, v2.8h + add v3.8h, v3.8h, v2.8h + sqrshrun v4.8b, v1.8h, #5 + sqrshrun2 v4.16b, v3.8h, #5 + st1 {v4.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S new file mode 100644 index 000000000..a41621370 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S @@ -0,0 +1,611 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +.macro LOAD_LUMA_DATA + sub x7, x0, x1 + ld1 {v0.16b}, [x7] //top + sub x7, x0, #1 + ld1 {v1.b}[0], [x7], x1 + ld1 {v1.b}[1], [x7], x1 + ld1 {v1.b}[2], [x7], x1 + ld1 {v1.b}[3], [x7], x1 + ld1 {v1.b}[4], [x7], x1 + ld1 {v1.b}[5], [x7], x1 + ld1 {v1.b}[6], [x7], x1 + ld1 {v1.b}[7], [x7], x1 + ld1 {v1.b}[8], [x7], x1 + ld1 {v1.b}[9], [x7], x1 + ld1 {v1.b}[10], [x7], x1 + ld1 {v1.b}[11], [x7], x1 + ld1 {v1.b}[12], [x7], x1 + ld1 {v1.b}[13], [x7], x1 + ld1 {v1.b}[14], [x7], x1 + ld1 {v1.b}[15], [x7] //left +.endm + +.macro LOAD_16X4_DATA + //Load the p_enc data and save to "v22 ~ v25"--- 16X4 bytes + ld1 {v0.16b}, [x2], x3 + ld1 {v1.16b}, [x2], x3 + ld1 {v20.16b}, [x2], x3 + ld1 {v21.16b}, [x2], x3 + trn1 v22.4s, v0.4s, v1.4s + trn2 v23.4s, v0.4s, v1.4s + trn1 v24.4s, v20.4s, v21.4s + trn2 v25.4s, v20.4s, v21.4s +.endm + +.macro GET_16X16_V_SATD + trn1 v6.4s, v4.4s, v5.4s + trn2 v7.4s, v4.4s, v5.4s + add v4.8h, v6.8h, v7.8h + sub v5.8h, v6.8h, v7.8h + trn1 v6.8h, v4.8h, v5.8h + trn2 v7.8h, v4.8h, v5.8h + add v4.8h, v6.8h, v7.8h + sub v5.8h, v6.8h, v7.8h + trn1 v6.4s, v4.4s, v5.4s + trn2 v7.4s, v4.4s, v5.4s //{0,1,3,2, 4,5,7,6} v6 {8,9,11,10, 12,13,15,14} v7 +.endm + +.macro GET_16X16_H_SATD + trn1 v16.4s, v4.4s, v5.4s + trn2 v17.4s, v4.4s, v5.4s + add v4.8h, v16.8h, v17.8h + sub v5.8h, v16.8h, v17.8h + trn1 v16.8h, v4.8h, v5.8h + trn2 v17.8h, v4.8h, v5.8h + add v4.8h, v16.8h, v17.8h + sub v5.8h, v16.8h, v17.8h + trn1 v16.4s, v4.4s, v5.4s + trn2 v17.4s, v4.4s, v5.4s //{0,1,3,2, 4,5,7,6} v16 {8,9,11,10, 12,13,15,14} v17 +.endm + +.macro SELECT_BEST_COST arg0, arg1, arg2 + cmp w1, \arg0 + csel \arg0, \arg0, w1, \arg2 + cset w7, \arg1 + cmp w2, \arg0 + mov w6, #2 + csel \arg0, \arg0, w2, \arg2 + csel w7, w7, w6, \arg2 +.endm + +.macro SELECT_BEST_COST_PREFER_HIGHER arg0 + SELECT_BEST_COST \arg0, ls, hi +.endm + +.macro SELECT_BEST_COST_PREFER_LOWER arg0 + SELECT_BEST_COST \arg0, lo, hs +.endm + +.macro LOAD_CHROMA_DATA arg0, arg1, arg2 + sub x9, \arg0, x1 + ld1 {\arg1}, [x9] //top_cb + sub x9, \arg0, #1 + ld1 {\arg2}[8], [x9], x1 + ld1 {\arg2}[9], [x9], x1 + ld1 {\arg2}[10], [x9], x1 + ld1 {\arg2}[11], [x9], x1 + ld1 {\arg2}[12], [x9], x1 + ld1 {\arg2}[13], [x9], x1 + ld1 {\arg2}[14], [x9], x1 + ld1 {\arg2}[15], [x9], x1 //left_cb +.endm + +.macro LOAD_8X4_DATA arg0 + //Load the p_enc data and save to "v20 ~ v21"--- 8X4 bytes + ld1 {v0.8b}, [\arg0], x3 + ld1 {v1.8b}, [\arg0], x3 + ld1 {v0.d}[1], [\arg0], x3 + ld1 {v1.d}[1], [\arg0], x3 + trn1 v2.4s, v0.4s, v1.4s + trn2 v1.4s, v0.4s, v1.4s + trn1 v20.2d, v2.2d, v1.2d + trn2 v21.2d, v2.2d, v1.2d +.endm + +.macro HDM_TRANSFORM_4X4_L0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 + //Do the vertical transform + uadd\arg9\() v0.8h, \arg0, \arg1 + usub\arg9\() v1.8h, \arg0, \arg1 + trn1 v3.2d, v0.2d, v1.2d + trn2 v1.2d, v0.2d, v1.2d + add v4.8h, v3.8h, v1.8h //{0,1,2,3,4,5,6,7} + sub v5.8h, v3.8h, v1.8h //{12,13,14,15,8,9,10,11} + + //Do the horizontal transform + trn1 v0.4s, v4.4s, v5.4s + trn2 v1.4s, v4.4s, v5.4s + add v4.8h, v0.8h, v1.8h + sub v5.8h, v0.8h, v1.8h + trn1 v0.8h, v4.8h, v5.8h + trn2 v1.8h, v4.8h, v5.8h + add v4.8h, v0.8h, v1.8h + sub v5.8h, v0.8h, v1.8h + + //16x16_v + trn1 v0.2s, v4.2s, v5.2s + trn2 v1.2s, v4.2s, v5.2s + sabal \arg5, v0.4h, \arg2 + sabal \arg5, v1.4h, \arg8\().4h + sabal2 \arg5, v4.8h, \arg8\().8h + sabal2 \arg5, v5.8h, \arg8\().8h + + //16x16_h + ins v3.d[0], v4.d[1] + trn1 v0.4h, v4.4h, v3.4h + trn2 v1.4h, v4.4h, v3.4h + sabal \arg6, v0.4h, \arg3 + sabdl v4.4s, v1.4h, \arg8\().4h + sabal v4.4s, v5.4h, \arg8\().4h + sabal2 v4.4s, v5.8h, \arg8\().8h + add \arg6, \arg6, v4.4s + + //16x16_dc_both + sabal \arg7, v0.4h, \arg4 + add \arg7, \arg7, v4.4s +.endm + +//int32_t WelsIntra8x8Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*,uint8_t*); +WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra8x8Combined3Sad_AArch64_neon + ldr x11, [sp, #0] + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + LOAD_CHROMA_DATA x0, v0.8b, v0.b + + uaddlp v1.8h, v0.16b + uaddlp v2.4s, v1.8h + ins v3.d[0], v2.d[1] + add v3.2s, v2.2s, v3.2s + urshr v2.4s, v2.4s, #2 + urshr v3.2s, v3.2s, #3 + + dup v20.8b, v3.b[0] + dup v21.8b, v2.b[4] + dup v22.8b, v2.b[12] + dup v23.8b, v3.b[4] + ins v20.s[1], v21.s[0] + ins v22.s[1], v23.s[0] + + LOAD_CHROMA_DATA x7, v4.8b, v4.b + + uaddlp v5.8h, v4.16b + uaddlp v6.4s, v5.8h + ins v7.d[0], v6.d[1] + add v7.2s, v6.2s, v7.2s + urshr v6.4s, v6.4s, #2 + urshr v7.2s, v7.2s, #3 + + dup v24.8b, v7.b[0] + dup v25.8b, v6.b[4] + dup v26.8b, v6.b[12] + dup v27.8b, v7.b[4] + ins v24.s[1], v25.s[0] + ins v26.s[1], v27.s[0] + + sub x9, x0, #1 + sub x10, x7, #1 + + ld1 {v3.8b}, [x2], x3 + ld1 {v5.8b}, [x11], x3 + + ld1r {v6.8b}, [x9], x1 + ld1r {v7.8b}, [x10], x1 + + uabdl v29.8h, v0.8b, v3.8b + uabal v29.8h, v4.8b, v5.8b //top + + uabdl v30.8h, v6.8b, v3.8b + uabal v30.8h, v7.8b, v5.8b //left + + uabdl v31.8h, v20.8b, v3.8b + uabal v31.8h, v24.8b, v5.8b //Dc +.rept 3 + ld1 {v3.8b}, [x2], x3 + ld1 {v5.8b}, [x11], x3 + + ld1r {v6.8b}, [x9], x1 + ld1r {v7.8b}, [x10], x1 + + uabal v29.8h, v0.8b, v3.8b + uabal v29.8h, v4.8b, v5.8b //top + + uabal v30.8h, v6.8b, v3.8b + uabal v30.8h, v7.8b, v5.8b //left + + uabal v31.8h, v20.8b, v3.8b + uabal v31.8h, v24.8b, v5.8b //Dc +.endr + +.rept 4 + ld1 {v3.8b}, [x2], x3 + ld1 {v5.8b}, [x11], x3 + + ld1r {v6.8b}, [x9], x1 + ld1r {v7.8b}, [x10], x1 + + uabal v29.8h, v0.8b, v3.8b + uabal v29.8h, v4.8b, v5.8b //top + + uabal v30.8h, v6.8b, v3.8b + uabal v30.8h, v7.8b, v5.8b //left + + uabal v31.8h, v22.8b, v3.8b + uabal v31.8h, v26.8b, v5.8b //Dc +.endr + + saddlv s29, v29.8h + fmov w2, s29 + add w2, w2, w5, lsl #1 + saddlv s30, v30.8h + fmov w1, s30 + add w1, w1, w5, lsl #1 + saddlv s31, v31.8h + fmov w0, s31 + + SELECT_BEST_COST_PREFER_HIGHER w0 + + str w7, [x4] +WELS_ASM_AARCH64_FUNC_END + +//int32_t WelsIntra16x16Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra16x16Combined3Sad_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + LOAD_LUMA_DATA + + uaddlv h2, v0.16b + uaddlv h3, v1.16b + add v2.8h, v2.8h, v3.8h + uqrshrn b2, h2, #5 + dup v2.16b, v2.b[0] //Dc + + sub x7, x0, #1 + ld1 {v3.16b}, [x2], x3 + ld1r {v4.16b}, [x7], x1 + + uabdl v29.8h, v0.8b, v3.8b + uabal2 v29.8h, v0.16b,v3.16b //top + + uabdl v30.8h, v4.8b, v3.8b + uabal2 v30.8h, v4.16b,v3.16b //left + + uabdl v31.8h, v2.8b, v3.8b + uabal2 v31.8h, v2.16b,v3.16b //Dc + mov x6, #15 +sad_intra_16x16_x3_opt_loop0: + ld1 {v3.16b}, [x2], x3 + ld1r {v4.16b}, [x7], x1 + + uabal v29.8h, v0.8b, v3.8b + uabal2 v29.8h, v0.16b,v3.16b //top + + uabal v30.8h, v4.8b, v3.8b + uabal2 v30.8h, v4.16b,v3.16b //left + + uabal v31.8h, v2.8b, v3.8b + uabal2 v31.8h, v2.16b,v3.16b //Dc + sub x6, x6, #1 + cbnz x6, sad_intra_16x16_x3_opt_loop0 + + saddlv s29, v29.8h + fmov w0, s29 + saddlv s30, v30.8h + fmov w1, s30 + add w1, w1, w5, lsl #1 + saddlv s31, v31.8h + fmov w2, s31 + add w2, w2, w5, lsl #1 + + SELECT_BEST_COST_PREFER_LOWER w0 + + str w7, [x4] +WELS_ASM_AARCH64_FUNC_END + +//int32_t WelsIntra4x4Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, int32_t,int32_t); +WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra4x4Combined3Satd_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x6,w6 + SIGN_EXTENSION x7,w7 + + sub x9, x0, x1 + ld1 {v16.s}[0], [x9] //top + sub x9, x0, #1 + ld1 {v16.b}[4], [x9], x1 + ld1 {v16.b}[5], [x9], x1 + ld1 {v16.b}[6], [x9], x1 + ld1 {v16.b}[7], [x9], x1 + + + uaddlv h2, v16.8b + uqrshrn b17, h2, #3 + urshr v2.4h, v2.4h, #3 + shl v2.4h, v2.4h, #4 + + //Calculate the 4x4_v 4x4_h mode SATD and save to "v6, v7" + ushll v4.8h, v16.8b, #2 + ins v5.d[0], v4.d[1] + trn1 v6.2s, v4.2s, v5.2s + trn2 v7.2s, v4.2s, v5.2s + + add v4.4h, v6.4h, v7.4h + sub v5.4h, v6.4h, v7.4h + trn1 v6.4h, v4.4h, v5.4h + trn2 v7.4h, v4.4h, v5.4h + add v4.4h, v6.4h, v7.4h + sub v5.4h, v6.4h, v7.4h + trn1 v6.2s, v4.2s, v5.2s + trn2 v7.2s, v4.2s, v5.2s //{0,1,3,2,top} v6 {0,1,3,2,left} v7 + + eor v31.16b, v31.16b, v31.16b //Save the SATD of DC_BOTH + eor v30.16b, v30.16b, v30.16b //Save the SATD of H + eor v29.16b, v29.16b, v29.16b //Save the SATD of V + eor v28.16b, v28.16b, v28.16b //For zero register + + //Load the p_enc data and save to "v22 ~ v23"--- 16X4 bytes + ld1 {v22.s}[0], [x2], x3 + ld1 {v22.s}[1], [x2], x3 + ld1 {v23.s}[0], [x2], x3 + ld1 {v23.s}[1], [x2], x3 + + HDM_TRANSFORM_4X4_L0 v22.8b, v23.8b, v6.4h, v7.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + + ldr x11, [sp, #0] + urshr v29.4s, v29.4s, #1 + addv s29, v29.4s + fmov w0, s29 + add w0, w0, w11 + + urshr v30.4s, v30.4s, #1 + addv s30, v30.4s + fmov w1, s30 + add w1, w1, w7 + + urshr v31.4s, v31.4s, #1 + addv s31, v31.4s + fmov w2, s31 + add w2, w2, w6 + + mov w10, w0 + SELECT_BEST_COST_PREFER_HIGHER w10 + + str w7, [x5] + + sub w9, w10, w2 + cbnz w9, satd_intra_4x4_x3_opt_jump0 + dup v0.16b, v17.b[0] + st1 {v0.16b}, [x4] + b satd_intra_4x4_x3_opt_end + +satd_intra_4x4_x3_opt_jump0: + sub w8, w10, w1 + cbnz w8, satd_intra_4x4_x3_opt_jump1 + dup v0.16b, v16.b[4] + dup v1.16b, v16.b[5] + dup v2.16b, v16.b[6] + dup v3.16b, v16.b[7] + st4 {v0.s,v1.s,v2.s,v3.s}[0], [x4] + b satd_intra_4x4_x3_opt_end + +satd_intra_4x4_x3_opt_jump1: + st1 {v16.S}[0], [x4], #4 + st1 {v16.S}[0], [x4], #4 + st1 {v16.S}[0], [x4], #4 + st1 {v16.S}[0], [x4] +satd_intra_4x4_x3_opt_end: + mov w0, w10 + +WELS_ASM_AARCH64_FUNC_END + +//int32_t WelsIntra8x8Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*,uint8_t*); +WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra8x8Combined3Satd_AArch64_neon + ldr x11, [sp, #0] + + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + LOAD_CHROMA_DATA x0, v0.8b, v0.b + + LOAD_CHROMA_DATA x7, v1.8b, v1.b + + //Calculate the 16x16_v mode SATD and save to "v6, v7" + ushll v4.8h, v0.8b, #2 + ushll v5.8h, v1.8b, #2 + GET_16X16_V_SATD + + //Calculate the 16x16_h mode SATD and save to "v16, v17" + ushll2 v4.8h, v0.16b, #2 + ushll2 v5.8h, v1.16b, #2 + GET_16X16_H_SATD + + uaddlp v0.8h, v0.16b + uaddlp v2.4s, v0.8h + ins v3.d[0], v2.d[1] + add v3.2s, v2.2s, v3.2s + + uaddlp v1.8h, v1.16b + uaddlp v4.4s, v1.8h + ins v5.d[0], v4.d[1] + add v5.2s, v4.2s, v5.2s + + trn2 v0.4s, v2.4s, v4.4s + urshr v0.4s, v0.4s, #2 + urshr v3.2s, v3.2s, #3 + urshr v5.2s, v5.2s, #3 + + ushll v22.2d, v0.2s, #4 //{1cb, 1cr} + ushll2 v23.2d, v0.4s, #4 //{2cb, 2cr} + ushll v24.2d, v3.2s, #4 //{0cb, 3cb} + ushll v25.2d, v5.2s, #4 //{0cr, 3cr} + + eor v31.16b, v31.16b, v31.16b //Save the SATD of DC_BOTH + eor v30.16b, v30.16b, v30.16b //Save the SATD of H + eor v29.16b, v29.16b, v29.16b //Save the SATD of V + eor v28.16b, v28.16b, v28.16b //For zero register + + ins v18.d[0], v6.d[1] + ins v19.d[0], v7.d[1] + ins v26.d[0], v16.d[1] + ins v27.d[0], v17.d[1] + + LOAD_8X4_DATA x2 + + HDM_TRANSFORM_4X4_L0 v20.8b, v21.8b, v6.4h, v16.4h, v24.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v20.16b, v21.16b, v18.4h, v16.4h, v22.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + LOAD_8X4_DATA x11 + + ins v22.d[0], v22.d[1] + HDM_TRANSFORM_4X4_L0 v20.8b, v21.8b, v7.4h, v17.4h, v25.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v20.16b, v21.16b, v19.4h, v17.4h, v22.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + LOAD_8X4_DATA x2 + + ins v24.d[0], v24.d[1] + HDM_TRANSFORM_4X4_L0 v20.8b, v21.8b, v6.4h, v26.4h, v23.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v20.16b, v21.16b, v18.4h, v26.4h, v24.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + LOAD_8X4_DATA x11 + + ins v23.d[0], v23.d[1] + ins v25.d[0], v25.d[1] + HDM_TRANSFORM_4X4_L0 v20.8b, v21.8b, v7.4h, v27.4h, v23.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v20.16b, v21.16b, v19.4h, v27.4h, v25.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + urshr v29.4s, v29.4s, #1 + addv s29, v29.4s + fmov w2, s29 + add w2, w2, w5, lsl #1 + + urshr v30.4s, v30.4s, #1 + addv s30, v30.4s + fmov w1, s30 + add w1, w1, w5, lsl #1 + + urshr v31.4s, v31.4s, #1 + addv s31, v31.4s + fmov w0, s31 + + SELECT_BEST_COST_PREFER_HIGHER w0 + + str w7, [x4] +WELS_ASM_AARCH64_FUNC_END + +//int32_t WelsIntra16x16Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra16x16Combined3Satd_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + SIGN_EXTENSION x5,w5 + LOAD_LUMA_DATA + + uaddlv h2, v0.16b + uaddlv h3, v1.16b + add v2.8h, v2.8h, v3.8h + urshr v2.4h, v2.4h, #5 + shl v2.4h, v2.4h, #4 + + //Calculate the 16x16_v mode SATD and save to "v6, v7" + ushll v4.8h, v0.8b, #2 + ushll2 v5.8h, v0.16b, #2 + GET_16X16_V_SATD + + //Calculate the 16x16_h mode SATD and save to "v16, v17" + ushll v4.8h, v1.8b, #2 + ushll2 v5.8h, v1.16b, #2 + GET_16X16_H_SATD + + eor v31.16b, v31.16b, v31.16b //Save the SATD of DC_BOTH + eor v30.16b, v30.16b, v30.16b //Save the SATD of H + eor v29.16b, v29.16b, v29.16b //Save the SATD of V + eor v28.16b, v28.16b, v28.16b //For zero register + + ins v18.d[0], v6.d[1] + ins v19.d[0], v7.d[1] + ins v26.d[0], v16.d[1] + ins v27.d[0], v17.d[1] + + LOAD_16X4_DATA + + HDM_TRANSFORM_4X4_L0 v22.8b, v24.8b, v6.4h, v16.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v22.16b, v24.16b, v7.4h, v16.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + HDM_TRANSFORM_4X4_L0 v23.8b, v25.8b, v18.4h, v16.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v23.16b, v25.16b, v19.4h, v16.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + LOAD_16X4_DATA + + HDM_TRANSFORM_4X4_L0 v22.8b, v24.8b, v6.4h, v26.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v22.16b, v24.16b, v7.4h, v26.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + HDM_TRANSFORM_4X4_L0 v23.8b, v25.8b, v18.4h, v26.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v23.16b, v25.16b, v19.4h, v26.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + LOAD_16X4_DATA + + HDM_TRANSFORM_4X4_L0 v22.8b, v24.8b, v6.4h, v17.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v22.16b, v24.16b, v7.4h, v17.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + HDM_TRANSFORM_4X4_L0 v23.8b, v25.8b, v18.4h, v17.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v23.16b, v25.16b, v19.4h, v17.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + LOAD_16X4_DATA + + HDM_TRANSFORM_4X4_L0 v22.8b, v24.8b, v6.4h, v27.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v22.16b, v24.16b, v7.4h, v27.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + HDM_TRANSFORM_4X4_L0 v23.8b, v25.8b, v18.4h, v27.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l + HDM_TRANSFORM_4X4_L0 v23.16b, v25.16b, v19.4h, v27.4h, v2.4h, v29.4s, v30.4s, v31.4s, v28, l2 + + urshr v29.4s, v29.4s, #1 + addv s29, v29.4s + fmov w0, s29 + + urshr v30.4s, v30.4s, #1 + addv s30, v30.4s + fmov w1, s30 + add w1, w1, w5, lsl #1 + + urshr v31.4s, v31.4s, #1 + addv s31, v31.4s + fmov w2, s31 + add w2, w2, w5, lsl #1 + + SELECT_BEST_COST_PREFER_LOWER w0 + + str w7, [x4] + +WELS_ASM_AARCH64_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/memory_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/memory_aarch64_neon.S new file mode 100644 index 000000000..e20500df6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/memory_aarch64_neon.S @@ -0,0 +1,63 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +//void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize); +WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon + eor v0.16b, v0.16b, v0.16b + SIGN_EXTENSION x1,w1 + cmp x1, #32 + b.eq mem_zero_32_neon_start + b.lt mem_zero_24_neon_start +mem_zero_loop: + subs x1, x1, #64 + st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x0], #16 + b.ne mem_zero_loop + b mem_zero_end + +mem_zero_32_neon_start: + st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x0], #16 + b mem_zero_end +mem_zero_24_neon_start: + st1 {v0.16b}, [x0], #16 + st1 {v0.8b}, [x0], #8 +mem_zero_end: + +WELS_ASM_AARCH64_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/pixel_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/pixel_aarch64_neon.S new file mode 100644 index 000000000..4840c90f3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/pixel_aarch64_neon.S @@ -0,0 +1,621 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +.macro CALC_AND_STORE_SAD + saddlv s2, v2.8h + fmov w0, s2 +.endm + +.macro CALC_AND_STORE_SAD_FOUR + saddlv s28, v28.8h + saddlv s29, v29.8h + saddlv s30, v30.8h + saddlv s31, v31.8h + st4 {v28.s, v29.s, v30.s, v31.s}[0], [x4] +.endm + +.macro LOAD_8X8_1 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x0], x1 + ld1 {v2.8b}, [x0], x1 + ld1 {v3.8b}, [x0], x1 + ld1 {v4.8b}, [x0], x1 + ld1 {v5.8b}, [x0], x1 + ld1 {v6.8b}, [x0], x1 + ld1 {v7.8b}, [x0], x1 +.endm + +.macro LOAD_16X8_1 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x0], x1 + ld1 {v2.16b}, [x0], x1 + ld1 {v3.16b}, [x0], x1 + ld1 {v4.16b}, [x0], x1 + ld1 {v5.16b}, [x0], x1 + ld1 {v6.16b}, [x0], x1 + ld1 {v7.16b}, [x0], x1 +.endm + +.macro LOAD_8X8_2 arg0 + ld1 {v16.8b}, [\arg0], x3 + ld1 {v17.8b}, [\arg0], x3 + ld1 {v18.8b}, [\arg0], x3 + ld1 {v19.8b}, [\arg0], x3 + ld1 {v20.8b}, [\arg0], x3 + ld1 {v21.8b}, [\arg0], x3 + ld1 {v22.8b}, [\arg0], x3 + ld1 {v23.8b}, [\arg0], x3 +.endm + +.macro CALC_ABS_8X8_1 arg0, arg1 + uab\arg1\()l \arg0, v0.8b, v16.8b + uabal \arg0, v1.8b, v17.8b + uabal \arg0, v2.8b, v18.8b + uabal \arg0, v3.8b, v19.8b + uabal \arg0, v4.8b, v20.8b + uabal \arg0, v5.8b, v21.8b + uabal \arg0, v6.8b, v22.8b + uabal \arg0, v7.8b, v23.8b +.endm + +.macro CALC_ABS_8X8_2 arg0 + uab\arg0\()l v29.8h, v0.8b, v18.8b + uabal v29.8h, v1.8b, v19.8b + uabal v29.8h, v2.8b, v20.8b + uabal v29.8h, v3.8b, v21.8b + uabal v29.8h, v4.8b, v22.8b + uabal v29.8h, v5.8b, v23.8b + uabal v29.8h, v6.8b, v24.8b + uabal v29.8h, v7.8b, v25.8b +.endm + +.macro LOAD_16X8_2 arg0 + ld1 {v16.16b}, [\arg0], x3 + ld1 {v17.16b}, [\arg0], x3 + ld1 {v18.16b}, [\arg0], x3 + ld1 {v19.16b}, [\arg0], x3 + ld1 {v20.16b}, [\arg0], x3 + ld1 {v21.16b}, [\arg0], x3 + ld1 {v22.16b}, [\arg0], x3 + ld1 {v23.16b}, [\arg0], x3 +.endm + +.macro CALC_ABS_16X8_1 arg0, arg1 + uab\arg1\()l \arg0, v0.8b, v16.8b + uabal2 \arg0, v0.16b,v16.16b + uabal \arg0, v1.8b, v17.8b + uabal2 \arg0, v1.16b,v17.16b + uabal \arg0, v2.8b, v18.8b + uabal2 \arg0, v2.16b,v18.16b + uabal \arg0, v3.8b, v19.8b + uabal2 \arg0, v3.16b,v19.16b + uabal \arg0, v4.8b, v20.8b + uabal2 \arg0, v4.16b,v20.16b + uabal \arg0, v5.8b, v21.8b + uabal2 \arg0, v5.16b,v21.16b + uabal \arg0, v6.8b, v22.8b + uabal2 \arg0, v6.16b,v22.16b + uabal \arg0, v7.8b, v23.8b + uabal2 \arg0, v7.16b,v23.16b +.endm + +.macro CALC_ABS_16X8_2 arg0 + uab\arg0\()l v29.8h, v0.8b, v18.8b + uabal2 v29.8h, v0.16b,v18.16b + uabal v29.8h, v1.8b, v19.8b + uabal2 v29.8h, v1.16b,v19.16b + uabal v29.8h, v2.8b, v20.8b + uabal2 v29.8h, v2.16b,v20.16b + uabal v29.8h, v3.8b, v21.8b + uabal2 v29.8h, v3.16b,v21.16b + uabal v29.8h, v4.8b, v22.8b + uabal2 v29.8h, v4.16b,v22.16b + uabal v29.8h, v5.8b, v23.8b + uabal2 v29.8h, v5.16b,v23.16b + uabal v29.8h, v6.8b, v24.8b + uabal2 v29.8h, v6.16b,v24.16b + uabal v29.8h, v7.8b, v25.8b + uabal2 v29.8h, v7.16b,v25.16b +.endm + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad4x4_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v1.s}[0], [x2], x3 + uabdl v2.8h, v0.8b, v1.8b +.rept 3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v1.s}[0], [x2], x3 + uabal v2.8h, v0.8b, v1.8b +.endr + saddlv s2, v2.4h + fmov w0, s2 +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad8x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b +.rept 7 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b +.endr + CALC_AND_STORE_SAD +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad8x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b +.rept 15 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b +.endr + CALC_AND_STORE_SAD +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad16x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.rept 7 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.endr + CALC_AND_STORE_SAD +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad16x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.rept 15 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.endr + CALC_AND_STORE_SAD +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour4x4_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v0.s}[1], [x0], x1 + ld1 {v1.s}[0], [x0], x1 + ld1 {v1.s}[1], [x0] + sub x0, x2, x3 + ld1 {v2.s}[0], [x0], x3 + ld1 {v2.s}[1], [x0], x3 + ld1 {v3.s}[0], [x0], x3 + ld1 {v3.s}[1], [x0], x3 + ld1 {v4.s}[0], [x0], x3 + ld1 {v4.s}[1], [x0], x3 + + uabdl v28.8h, v0.8b, v2.8b + uabal v28.8h, v1.8b, v3.8b + + uabdl v29.8h, v0.8b, v3.8b + uabal v29.8h, v1.8b, v4.8b + + sub x0, x2, #1 + ld1 {v2.s}[0], [x0], x3 + ld1 {v2.s}[1], [x0], x3 + ld1 {v3.s}[0], [x0], x3 + ld1 {v3.s}[1], [x0] + uabdl v30.8h, v0.8b, v2.8b + uabal v30.8h, v1.8b, v3.8b + + add x0, x2, #1 + ld1 {v2.s}[0], [x0], x3 + ld1 {v2.s}[1], [x0], x3 + ld1 {v3.s}[0], [x0], x3 + ld1 {v3.s}[1], [x0] + uabdl v31.8h, v0.8b, v2.8b + uabal v31.8h, v1.8b, v3.8b + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour8x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + LOAD_8X8_1 + sub x0, x2, x3 + LOAD_8X8_2 x0 + ld1 {v24.8b}, [x0], x3 + ld1 {v25.8b}, [x0] + + CALC_ABS_8X8_1 v28.8h, d + CALC_ABS_8X8_2 d + + sub x0, x2, #1 + LOAD_8X8_2 x0 + CALC_ABS_8X8_1 v30.8h, d + + add x0, x2, #1 + LOAD_8X8_2 x0 + CALC_ABS_8X8_1 v31.8h, d + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour8x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + LOAD_8X8_1 + sub x5, x2, x3 + LOAD_8X8_2 x5 + ld1 {v24.8b}, [x5], x3 + ld1 {v25.8b}, [x5], x3 + + CALC_ABS_8X8_1 v28.8h, d + CALC_ABS_8X8_2 d + + sub x6, x2, #1 + LOAD_8X8_2 x6 + CALC_ABS_8X8_1 v30.8h, d + + add x7, x2, #1 + LOAD_8X8_2 x7 + CALC_ABS_8X8_1 v31.8h, d + + LOAD_8X8_1 + sub x5, x5, x3 + sub x5, x5, x3 + LOAD_8X8_2 x5 + ld1 {v24.8b}, [x5], x3 + ld1 {v25.8b}, [x5] + + CALC_ABS_8X8_1 v28.8h, a + CALC_ABS_8X8_2 a + + LOAD_8X8_2 x6 + CALC_ABS_8X8_1 v30.8h, a + + LOAD_8X8_2 x7 + CALC_ABS_8X8_1 v31.8h, a + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour16x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + LOAD_16X8_1 + sub x0, x2, x3 + LOAD_16X8_2 x0 + ld1 {v24.16b}, [x0], x3 + ld1 {v25.16b}, [x0] + + CALC_ABS_16X8_1 v28.8h, d + CALC_ABS_16X8_2 d + + sub x0, x2, #1 + LOAD_16X8_2 x0 + CALC_ABS_16X8_1 v30.8h, d + + add x0, x2, #1 + LOAD_16X8_2 x0 + CALC_ABS_16X8_1 v31.8h, d + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour16x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + + LOAD_16X8_1 + sub x5, x2, x3 + LOAD_16X8_2 x5 + ld1 {v24.16b}, [x5], x3 + ld1 {v25.16b}, [x5], x3 + + CALC_ABS_16X8_1 v28.8h, d + CALC_ABS_16X8_2 d + + sub x6, x2, #1 + LOAD_16X8_2 x6 + CALC_ABS_16X8_1 v30.8h, d + + add x7, x2, #1 + LOAD_16X8_2 x7 + CALC_ABS_16X8_1 v31.8h, d + + LOAD_16X8_1 + sub x5, x5, x3 + sub x5, x5, x3 + LOAD_16X8_2 x5 + ld1 {v24.16b}, [x5], x3 + ld1 {v25.16b}, [x5] + + CALC_ABS_16X8_1 v28.8h, a + CALC_ABS_16X8_2 a + + LOAD_16X8_2 x6 + CALC_ABS_16X8_1 v30.8h, a + + LOAD_16X8_2 x7 + CALC_ABS_16X8_1 v31.8h, a + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd4x4_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v0.s}[1], [x0], x1 + ld1 {v1.s}[0], [x0], x1 + ld1 {v1.s}[1], [x0] + + ld1 {v2.s}[0], [x2], x3 + ld1 {v2.s}[1], [x2], x3 + ld1 {v3.s}[0], [x2], x3 + ld1 {v3.s}[1], [x2] + usubl v4.8h, v0.8b, v2.8b //{0,1,2,3,4,5,6,7} + usubl v5.8h, v1.8b, v3.8b //{8,9,10,11,12,13,14,15} + + //Do the vertical transform + add v6.8h, v4.8h, v5.8h //{0,4,8,12,1,5,9,13} + sub v7.8h, v4.8h, v5.8h //{2,6,10,14,3,7,11,15} + mov x4, v6.d[1] + mov v6.d[1], v7.d[0] + ins v7.d[0], x4 + add v4.8h, v6.8h, v7.8h + sub v5.8h, v6.8h, v7.8h + + //Do the horizontal transform + trn1 v6.4s, v4.4s, v5.4s + trn2 v7.4s, v4.4s, v5.4s + add v4.8h, v6.8h, v7.8h + sub v5.8h, v6.8h, v7.8h + trn1 v6.8h, v4.8h, v5.8h + trn2 v7.8h, v4.8h, v5.8h + add v4.8h, v6.8h, v7.8h + abs v4.8h, v4.8h + saba v4.8h, v6.8h, v7.8h + uaddlv s4, v4.8h + fmov w0, s4 + add w0, w0, #1 + lsr w0, w0, #1 + +WELS_ASM_AARCH64_FUNC_END + +.macro SATD_8x4 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + ld1 {v2.8b}, [x0], x1 + usubl v16.8h, v0.8b, v1.8b + + ld1 {v3.8b}, [x2], x3 + usubl v17.8h, v2.8b, v3.8b + ld1 {v4.8b}, [x0], x1 + ld1 {v5.8b}, [x2], x3 + + add v25.8h, v16.8h, v17.8h + usubl v18.8h, v4.8b, v5.8b + + ld1 {v6.8b}, [x0], x1 + ld1 {v7.8b}, [x2], x3 + + usubl v19.8h, v6.8b, v7.8b + sub v26.8h, v16.8h, v17.8h + + add v27.8h, v18.8h, v19.8h + sub v28.8h, v18.8h, v19.8h + + add v0.8h, v25.8h, v27.8h + sub v1.8h, v25.8h, v27.8h + + add v2.8h, v26.8h, v28.8h + sub v3.8h, v26.8h, v28.8h + + trn1 v4.8h, v0.8h, v1.8h + trn2 v5.8h, v0.8h, v1.8h + trn1 v6.8h, v2.8h, v3.8h + trn2 v7.8h, v2.8h, v3.8h + + add v16.8h, v4.8h, v5.8h + sabd v17.8h, v4.8h, v5.8h + abs v16.8h, v16.8h + add v18.8h, v6.8h, v7.8h + sabd v19.8h, v6.8h, v7.8h + abs v18.8h, v18.8h + + trn1 v4.4s, v16.4s, v17.4s + trn2 v5.4s, v16.4s, v17.4s + trn1 v6.4s, v18.4s, v19.4s + trn2 v7.4s, v18.4s, v19.4s + + smax v0.8h, v4.8h, v5.8h + smax v1.8h, v6.8h, v7.8h +.endm + +.macro SATD_16x4 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + ld1 {v2.16b}, [x0], x1 + usubl v16.8h, v0.8b, v1.8b + usubl2 v24.8h, v0.16b, v1.16b + + ld1 {v3.16b}, [x2], x3 + usubl v17.8h, v2.8b, v3.8b + usubl2 v25.8h, v2.16b, v3.16b + + ld1 {v4.16b}, [x0], x1 + ld1 {v5.16b}, [x2], x3 + usubl v18.8h, v4.8b, v5.8b + usubl2 v26.8h, v4.16b, v5.16b + + ld1 {v6.16b}, [x0], x1 + ld1 {v7.16b}, [x2], x3 + usubl v19.8h, v6.8b, v7.8b + usubl2 v27.8h, v6.16b, v7.16b + + add v0.8h, v16.8h, v17.8h + sub v1.8h, v16.8h, v17.8h + add v2.8h, v18.8h, v19.8h + sub v3.8h, v18.8h, v19.8h + + add v4.8h, v24.8h, v25.8h + sub v5.8h, v24.8h, v25.8h + add v6.8h, v26.8h, v27.8h + sub v7.8h, v26.8h, v27.8h + + add v16.8h, v0.8h, v2.8h + sub v18.8h, v0.8h, v2.8h + add v17.8h, v4.8h, v6.8h + sub v19.8h, v4.8h, v6.8h + + add v0.8h, v1.8h, v3.8h + sub v2.8h, v1.8h, v3.8h + add v1.8h, v5.8h, v7.8h + sub v3.8h, v5.8h, v7.8h + + trn1 v4.8h, v16.8h, v18.8h + trn2 v6.8h, v16.8h, v18.8h + trn1 v5.8h, v17.8h, v19.8h + trn2 v7.8h, v17.8h, v19.8h + + add v16.8h, v4.8h, v6.8h + sabd v18.8h, v4.8h, v6.8h + add v17.8h, v5.8h, v7.8h + sabd v19.8h, v5.8h, v7.8h + abs v16.8h, v16.8h + abs v17.8h, v17.8h + + trn1 v4.8h, v0.8h, v2.8h + trn2 v6.8h, v0.8h, v2.8h + trn1 v5.8h, v1.8h, v3.8h + trn2 v7.8h, v1.8h, v3.8h + + add v0.8h, v4.8h, v6.8h + sabd v2.8h, v4.8h, v6.8h + add v1.8h, v5.8h, v7.8h + sabd v3.8h, v5.8h, v7.8h + abs v0.8h, v0.8h + abs v1.8h, v1.8h + + trn1 v4.4s, v16.4s, v18.4s + trn2 v6.4s, v16.4s, v18.4s + trn1 v5.4s, v17.4s, v19.4s + trn2 v7.4s, v17.4s, v19.4s + + trn1 v16.4s, v0.4s, v2.4s + trn2 v18.4s, v0.4s, v2.4s + trn1 v17.4s, v1.4s, v3.4s + trn2 v19.4s, v1.4s, v3.4s + + smax v0.8h, v4.8h, v6.8h + smax v1.8h, v5.8h, v7.8h + smax v2.8h, v16.8h, v18.8h + smax v3.8h, v17.8h, v19.8h + add v0.8h, v0.8h, v1.8h + add v2.8h, v2.8h, v3.8h +.endm + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd16x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + SATD_16x4 + add v31.8h, v0.8h, v2.8h +.rept 3 + SATD_16x4 + add v31.8h, v31.8h, v0.8h + add v31.8h, v31.8h, v2.8h +.endr + uaddlv s4, v31.8h + fmov w0, s4 +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd16x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + SATD_16x4 + add v31.8h, v0.8h, v2.8h + + SATD_16x4 + add v31.8h, v31.8h, v0.8h + add v31.8h, v31.8h, v2.8h + + uaddlv s4, v31.8h + fmov w0, s4 +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd8x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + SATD_8x4 + add v31.8h, v0.8h, v1.8h +.rept 3 + SATD_8x4 + add v31.8h, v31.8h, v0.8h + add v31.8h, v31.8h, v1.8h +.endr + uaddlv s4, v31.8h + fmov w0, s4 +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd8x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + SATD_8x4 + add v31.8h, v0.8h, v1.8h + + SATD_8x4 + add v31.8h, v31.8h, v0.8h + add v31.8h, v31.8h, v1.8h + uaddlv s4, v31.8h + fmov w0, s4 +WELS_ASM_AARCH64_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/reconstruct_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/reconstruct_aarch64_neon.S new file mode 100644 index 000000000..667606840 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/reconstruct_aarch64_neon.S @@ -0,0 +1,717 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +.macro ZERO_COUNT_IN_2_QUARWORD arg0, arg1, arg2 +// { // input: coef_0 (identy to \arg3\() \arg4\()), coef_1(identy to \arg5\() \arg6\()), mask_q + cmeq \arg0\().8h, \arg0\().8h, #0 + cmeq \arg1\().8h, \arg1\().8h, #0 + uzp1 \arg0\().16b, \arg0\().16b, \arg1\().16b + ushr \arg0\().16b, \arg0\().16b, 7 + addv \arg2\(), \arg0\().16b +// } +.endm + +.macro NEWQUANT_COEF_EACH_16BITS arg0, arg1, arg2, arg3, arg4, arg5 +// if coef <= 0, - coef; else , coef; +// { // input: coef, ff (dst), mf + eor \arg3\().16b, \arg3\().16b, \arg3\().16b // init 0 , and keep 0; + saba \arg1\().8h, \arg0\().8h, \arg3\().8h // f + abs(coef - 0) + smull \arg4\().4s, \arg1\().4h, \arg2\().4h + smull2 \arg5\().4s, \arg1\().8h, \arg2\().8h + shrn \arg1\().4h, \arg4\().4s, #16 + shrn2 \arg1\().8h, \arg5\().4s, #16 + + cmgt \arg4\().8h, \arg0\().8h, #0 // if true, location of coef == 11111111 + bif \arg3\().16b, \arg1\().16b, \arg4\().16b // if (x<0) reserved part; else keep 0 untouched + shl \arg3\().8h, \arg3\().8h, #1 + sub \arg1\().8h, \arg1\().8h, \arg3\().8h // if x > 0, -= 0; else x-= 2x +// } +.endm + +.macro NEWQUANT_COEF_EACH_16BITS_MAX arg0, arg1, arg2, arg3, arg4, arg5, arg6 +// if coef <= 0, - coef; else , coef; +// { // input: coef, ff (dst), mf + eor \arg3\().16b, \arg3\().16b, \arg3\().16b // init 0 , and keep 0; + saba \arg1\().8h, \arg0\().8h, \arg3\().8h // f + abs(coef - 0) + smull \arg4\().4s, \arg1\().4h, \arg2\().4h + smull2 \arg5\().4s, \arg1\().8h, \arg2\().8h + shrn \arg1\().4h, \arg4\().4s, #16 + shrn2 \arg1\().8h, \arg5\().4s, #16 + + cmgt \arg4\().8h, \arg0\().8h, #0 // if true, location of coef == 11111111 + bif \arg3\().16b, \arg1\().16b, \arg4\().16b // if (x<0) reserved part; else keep 0 untouched + shl \arg3\().8h, \arg3\().8h, #1 + mov \arg6\().16b, \arg1\().16b + sub \arg1\().8h, \arg1\().8h, \arg3\().8h // if x > 0, -= 0; else x-= 2x +// } +.endm + +.macro QUANT_DUALWORD_COEF_EACH_16BITS arg0, arg1, arg2, arg3, arg4 +// if coef <= 0, - coef; else , coef; +// { // input: coef, ff (dst), mf + saba \arg1\().8h, \arg0\().8h, \arg3\().8h // f + abs(coef - 0) + smull \arg4\().4s, \arg1\().4h, \arg2\().4h + shrn \arg1\().4h, \arg4\().4s, #16 + + cmgt \arg4\().8h, \arg0\().8h, #0 // if true, location of coef == 11111111 + bif \arg3\().16b, \arg1\().16b, \arg4\().16b // if (x<0) reserved part; else keep 0 untouched + shl \arg3\().8h, \arg3\().8h, #1 + sub \arg1\().8h, \arg1\().8h, \arg3\().8h // if x > 0, -= 0; else x-= 2x +// } +.endm + +.macro SELECT_MAX_IN_ABS_COEF arg0, arg1, arg2, arg3, arg4, arg5 +// { // input: coef_0, coef_1, coef_2, coef_3, max_q (identy to follow two) + umax \arg0\().8h, \arg0\().8h, \arg1\().8h + umaxv \arg4\(), \arg0\().8h + umax \arg2\().8h, \arg2\().8h, \arg3\().8h + umaxv \arg5\(), \arg2\().8h +// } +.endm + +.macro HDM_QUANT_2x2_TOTAL_16BITS arg0, arg1, arg2 +// { // input: src_d[0][16][32][48], dst_d[0][16][32][48], working + sshr \arg1\().2d, \arg0\().2d, #32 + add \arg2\().4h, \arg0\().4h, \arg1\().4h // [0] = rs[0] + rs[32];[1] = rs[16] + rs[48]; + sub \arg1\().4h, \arg0\().4h, \arg1\().4h // [0] = rs[0] - rs[32];[1] = rs[16] - rs[48]; + zip1 \arg1\().4h, \arg2\().4h, \arg1\().4h +// } +.endm + + +.macro DC_ZERO_COUNT_IN_DUALWORD arg0, arg1, arg2 +// { // input: coef, dst_d, working_d (all 0x01) + cmeq \arg0\().4h, \arg0\().4h, #0 + and \arg0\().8b, \arg0\().8b, \arg2\().8b + addv \arg1\(), \arg0\().4h +// } +.endm + +.macro IHDM_4x4_TOTAL_16BITS arg0, arg1, arg2 +// { // input: each src_d[0]~[3](dst), working_q0, working_q1 + uzp2 \arg1\().4s, \arg0\().4s, \arg0\().4s + uzp1 \arg0\().4s, \arg0\().4s, \arg0\().4s + add \arg2\().8h, \arg0\().8h, \arg1\().8h // [0] = rs[0] + rs[2];[1] = rs[1] + rs[3];[2] = rs[4] + rs[6];[3] = rs[5] + rs[7]; + sub \arg1\().8h, \arg0\().8h, \arg1\().8h // [0] = rs[0] - rs[2];[1] = rs[1] - rs[3];[2] = rs[4] - rs[6];[3] = rs[5] - rs[7]; + zip1 \arg2\().8h, \arg2\().8h, \arg1\().8h // [0] = rs[0] + rs[2]; [1] = rs[0] - rs[2]; ... [2]; [3] + + uzp2 \arg1\().4s, \arg2\().4s, \arg2\().4s + uzp1 \arg0\().4s, \arg2\().4s, \arg2\().4s + add \arg2\().8h, \arg0\().8h, \arg1\().8h // [0] = rs[0] + rs[2];[1] = rs[1] + rs[3];[2] = rs[4] + rs[6];[3] = rs[5] + rs[7]; + sub \arg1\().8h, \arg0\().8h, \arg1\().8h // [0] = rs[0] - rs[2];[1] = rs[1] - rs[3];[2] = rs[4] - rs[6];[3] = rs[5] - rs[7]; + rev32 \arg1\().4h, \arg1\().4h // [0] = rs[1] - rs[3];[1] = rs[0] - rs[2];[2] = rs[5] - rs[7];[3] = rs[4] - rs[6]; + zip1 \arg0\().4s, \arg2\().4s, \arg1\().4s + // } +.endm + +.macro MATRIX_TRANSFORM_EACH_16BITS_2x8_OUT2 arg0, arg1, arg2, arg3 +// { // input & output: src_d[0]~[3];[0 1 2 3]+[4 5 6 7]+[8 9 10 11]+[12 13 14 15] + uzp1 \arg2\().4s, \arg0\().4s, \arg1\().4s //[0 1 4 5]+[8 9 12 13] + uzp2 \arg3\().4s, \arg0\().4s, \arg1\().4s //[2 3 6 7]+[10 11 14 15] + + uzp1 \arg0\().8h, \arg2\().8h, \arg3\().8h //[0 4 8 12]+[2 6 10 14] + uzp2 \arg2\().8h, \arg2\().8h, \arg3\().8h //[1 5 9 13]+[3 7 11 15] + zip2 \arg1\().2d, \arg0\().2d, \arg2\().2d //[2 6 10 14]+[3 7 11 15] + zip1 \arg0\().2d, \arg0\().2d, \arg2\().2d //[0 4 8 12]+[1 5 9 13] +// } +.endm + +.macro MATRIX_TRANSFORM_EACH_16BITS_OUT4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input & output: src_d[0]~[3];[0 4 8 12],[1 5 9 13],[2 6 10 14],[3 7 11 15] + trn1 \arg4\().8h, v0.8h, v1.8h + trn2 \arg5\().8h, v0.8h, v1.8h + trn1 \arg6\().8h, v2.8h, v3.8h + trn2 \arg7\().8h, v2.8h, v3.8h + + trn1 \arg0\().4s, v4.4s, v6.4s + trn2 \arg2\().4s, v4.4s, v6.4s + trn1 \arg1\().4s, v5.4s, v7.4s + trn2 \arg3\().4s, v5.4s, v7.4s +// } +.endm + +.macro MATRIX_TRANSFORM_EACH_16BITS_4x4_OUT2 arg0, arg1, arg2, arg3 +// { // input & output: src_d[0]~[3];[0 1 2 3],[4 5 6 7],[8 9 10 11],[12 13 14 15] + mov \arg0\().d[1], \arg1\().d[0] //[0 1 2 3]+[4 5 6 7] + mov \arg2\().d[1], \arg3\().d[0] //[8 9 10 11]+[12 13 14 15] + uzp1 \arg1\().4s, \arg0\().4s, \arg2\().4s //[0 1 4 5]+[8 9 12 13] + uzp2 \arg3\().4s, \arg0\().4s, \arg2\().4s //[2 3 6 7]+[10 11 14 15] + + uzp1 \arg0\().8h, \arg1\().8h, \arg3\().8h //[0 4 8 12]+[2 6 10 14] + uzp2 \arg2\().8h, \arg1\().8h, \arg3\().8h //[1 5 9 13]+[3 7 11 15] + zip2 \arg1\().2d, \arg0\().2d, \arg2\().2d //[2 6 10 14]+[3 7 11 15] + zip1 \arg0\().2d, \arg0\().2d, \arg2\().2d //[0 4 8 12]+[1 5 9 13] +// } +.endm + +.macro LOAD_4x4_DATA_FOR_DCT arg0, arg1, arg2, arg3, arg4, arg5 + ld1 {\arg0\().s}[0], [\arg2\()], \arg3\() + ld1 {\arg0\().s}[1], [\arg2\()], \arg3\() + ld1 {\arg0\().s}[2], [\arg2\()], \arg3\() + ld1 {\arg0\().s}[3], [\arg2\()] + + ld1 {\arg1\().s}[0], [\arg4\()], \arg5\() + ld1 {\arg1\().s}[1], [\arg4\()], \arg5\() + ld1 {\arg1\().s}[2], [\arg4\()], \arg5\() + ld1 {\arg1\().s}[3], [\arg4\()] +.endm + +.macro DCT_ROW_TRANSFORM_TOTAL_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_d[0]~[3], working: [4]~[7] + add \arg4\().8h, \arg0\().8h, \arg3\().8h //int16 s[0] = data[i] + data[i3]; + sub \arg7\().8h, \arg0\().8h, \arg3\().8h //int16 s[3] = data[i] - data[i3]; + add \arg5\().8h, \arg1\().8h, \arg2\().8h //int16 s[1] = data[i1] + data[i2]; + sub \arg6\().8h, \arg1\().8h, \arg2\().8h //int16 s[2] = data[i1] - data[i2]; + + add \arg0\().8h, \arg4\().8h, \arg5\().8h //int16 dct[i ] = s[0] + s[1]; + sub \arg2\().8h, \arg4\().8h, \arg5\().8h //int16 dct[i2] = s[0] - s[1]; + shl \arg1\().8h, \arg7\().8h, #1 + shl \arg3\().8h, \arg6\().8h, #1 + add \arg1\().8h, \arg1\().8h, \arg6\().8h //int16 dct[i1] = (s[3] << 1) + s[2]; + sub \arg3\().8h, \arg7\().8h, \arg3\().8h //int16 dct[i3] = s[3] - (s[2] << 1); +// } +.endm + +.macro LOAD_8x4_DATA_FOR_DCT arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9 +// { // input: \arg0\()~\arg3\(), src1*, src2*; untouched r2:src1_stride &r4:src2_stride + ld1 {\arg0\().d}[0], [\arg8\()], x2 + ld1 {\arg1\().d}[0], [\arg8\()], x2 + ld1 {\arg2\().d}[0], [\arg8\()], x2 + ld1 {\arg3\().d}[0], [\arg8\()], x2 + + ld1 {\arg4\().d}[0], [\arg9\()], x4 + ld1 {\arg5\().d}[0], [\arg9\()], x4 + ld1 {\arg6\().d}[0], [\arg9\()], x4 + ld1 {\arg7\().d}[0], [\arg9\()], x4 +// } +.endm + +.macro ROW_TRANSFORM_1_STEP_TOTAL_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_d[0]~[3], output: e_d[0]~[3]; + add \arg4\().8h, \arg0\().8h, \arg2\().8h //int16 e[i][0] = src[0] + src[2]; + sub \arg5\().8h, \arg0\().8h, \arg2\().8h //int16 e[i][1] = src[0] - src[2]; + sshr \arg6\().8h, \arg1\().8h, #1 + sshr \arg7\().8h, \arg3\().8h, #1 + sub \arg6\().8h, \arg6\().8h, \arg3\().8h //int16 e[i][2] = (src[1]>>1)-src[3]; + add \arg7\().8h, \arg1\().8h, \arg7\().8h //int16 e[i][3] = src[1] + (src[3]>>1); +// } +.endm + +.macro TRANSFORM_TOTAL_16BITS arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// both row & col transform used +// { // output: f_q[0]~[3], input: e_q[0]~[3]; + add \arg0\().8h, \arg4\().8h, \arg7\().8h //int16 f[i][0] = e[i][0] + e[i][3]; + add \arg1\().8h, \arg5\().8h, \arg6\().8h //int16 f[i][1] = e[i][1] + e[i][2]; + sub \arg2\().8h, \arg5\().8h, \arg6\().8h //int16 f[i][2] = e[i][1] - e[i][2]; + sub \arg3\().8h, \arg4\().8h, \arg7\().8h //int16 f[i][3] = e[i][0] - e[i][3]; +// } +.endm + +.macro ROW_TRANSFORM_0_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_d[0]~[3], output: e_q[0]~[3]; + saddl \arg4\().4s, \arg0\().4h, \arg2\().4h //int32 e[i][0] = src[0] + src[2]; + ssubl \arg5\().4s, \arg0\().4h, \arg2\().4h //int32 e[i][1] = src[0] - src[2]; + ssubl \arg6\().4s, \arg1\().4h, \arg3\().4h //int32 e[i][2] = src[1] - src[3]; + saddl \arg7\().4s, \arg1\().4h, \arg3\().4h //int32 e[i][3] = src[1] + src[3]; +// } +.endm + +.macro COL_TRANSFORM_0_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// { // input: src_q[0]~[3], output: e_q[0]~[3]; + add \arg4\().4s, \arg0\().4s, \arg2\().4s //int32 e[0][j] = f[0][j] + f[2][j]; + sub \arg5\().4s, \arg0\().4s, \arg2\().4s //int32 e[1][j] = f[0][j] - f[2][j]; + sub \arg6\().4s, \arg1\().4s, \arg3\().4s //int32 e[2][j] = (f[1][j]>>1) - f[3][j]; + add \arg7\().4s, \arg1\().4s, \arg3\().4s //int32 e[3][j] = f[1][j] + (f[3][j]>>1); +// } +.endm + +.macro TRANSFORM_4BYTES arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 +// both row & col transform used +// { // output: f_q[0]~[3], input: e_q[0]~[3]; + add \arg0\().4s, \arg4\().4s, \arg7\().4s //int16 f[i][0] = e[i][0] + e[i][3]; + add \arg1\().4s, \arg5\().4s, \arg6\().4s //int16 f[i][1] = e[i][1] + e[i][2]; + sub \arg2\().4s, \arg5\().4s, \arg6\().4s //int16 f[i][2] = e[i][1] - e[i][2]; + sub \arg3\().4s, \arg4\().4s, \arg7\().4s //int16 f[i][3] = e[i][0] - e[i][3]; +// } +.endm + +.macro MB_PRED_8BITS_ADD_DCT_16BITS_CLIP arg0, arg1, arg2, arg3, arg4 +// { // input: pred_d[0](output), dct_q0/1, working_q0/1; + uxtl \arg3\().8h, \arg0\().8b + uxtl2 \arg4\().8h, \arg0\().16b + add \arg3\().8h, \arg3\().8h, \arg1\().8h + add \arg4\().8h, \arg4\().8h, \arg2\().8h + sqxtun \arg0\().8b, \arg3\().8h + sqxtun2 \arg0\().16b,\arg4\().8h +// } +.endm + +WELS_ASM_AARCH64_FUNC_BEGIN WelsGetNoneZeroCount_AArch64_neon + ld1 {v0.8h, v1.8h}, [x0] + ZERO_COUNT_IN_2_QUARWORD v0, v1, b0 + mov x0, v0.d[0] + mov x1, #16 + subs x0, x1, x0 +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsQuant4x4_AArch64_neon + ld1 {v2.8h}, [x1] + ld1 {v0.8h, v1.8h}, [x0] + ld1 {v3.8h}, [x2] + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7 + st1 {v2.8h}, [x0], #16 + NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7 + st1 {v4.8h}, [x0], #16 +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsQuant4x4Dc_AArch64_neon + ld1 {v0.8h, v1.8h}, [x0] + dup v2.8h, w1 // even ff range [0, 768] + dup v3.8h, w2 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7 + st1 {v2.8h}, [x0], #16 + NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7 + st1 {v4.8h}, [x0], #16 +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsQuantFour4x4_AArch64_neon + ld1 {v2.8h}, [x1] + ld1 {v3.8h}, [x2] + mov x1, x0 + +.rept 4 + ld1 {v0.8h, v1.8h}, [x0], #32 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS v0, v4, v3, v5, v6, v7 + st1 {v4.8h}, [x1], #16 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7 + st1 {v4.8h}, [x1], #16 +.endr +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsQuantFour4x4Max_AArch64_neon + ld1 {v2.8h}, [x1] + ld1 {v3.8h}, [x2] + mov x1, x0 + + ld1 {v0.8h, v1.8h}, [x0], #32 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16 + st1 {v4.8h}, [x1], #16 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17 + st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17 + + ld1 {v0.8h, v1.8h}, [x0], #32 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18 + st1 {v4.8h}, [x1], #16 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19 + st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19 + + SELECT_MAX_IN_ABS_COEF v16, v17, v18, v19, h20, h21 + + ld1 {v0.8h, v1.8h}, [x0], #32 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16 + st1 {v4.8h}, [x1], #16 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17 + st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17 + + ld1 {v0.8h, v1.8h}, [x0], #32 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18 + st1 {v4.8h}, [x1], #16 + mov v4.16b, v2.16b + NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19 + st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19 + + SELECT_MAX_IN_ABS_COEF v16, v17, v18, v19, h22, h23 + + st4 {v20.h,v21.h,v22.h,v23.h}[0], [x3] +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDequant4x4_AArch64_neon + ld1 {v0.8h, v1.8h}, [x0] + ld1 {v2.8h}, [x1] + mul v3.8h, v0.8h, v2.8h + mul v4.8h, v1.8h, v2.8h + st1 {v3.8h, v4.8h}, [x0] +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDequantFour4x4_AArch64_neon + ld1 {v2.8h}, [x1] + mov x1, x0 +.rept 4 + ld1 {v0.8h,v1.8h}, [x0], #32 + mul v3.8h, v0.8h, v2.8h + mul v4.8h, v1.8h, v2.8h + st1 {v3.8h,v4.8h}, [x1], #32 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsHadamardQuant2x2SkipKernel_AArch64_neon + dup v4.8h, w1 + mov x1, #32 + ld1 {v0.h}[0], [x0], x1 //rs[0] + ld1 {v0.h}[1], [x0], x1 //rs[16] + ld1 {v0.h}[2], [x0], x1 //rs[32] + ld1 {v0.h}[3], [x0], x1 //rs[48] + + HDM_QUANT_2x2_TOTAL_16BITS v0, v1, v2 // output v1 + + HDM_QUANT_2x2_TOTAL_16BITS v1, v0, v2 // output v0 + + abs v1.4h, v0.4h + cmhi v0.4h, v1.4h, v4.4h // abs(dct[i])>threshold; + mov w0, v0.s[0] + mov w1, v0.s[1] + orr w0, w0, w1 +WELS_ASM_AARCH64_FUNC_END + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsHadamardQuant2x2_AArch64_neon + + dup v1.8h, w1 //ff + dup v2.8h, w2 //mf + eor v3.16b, v3.16b, v3.16b + + mov x1, #32 + mov x2, x0 + ld1 {v0.h}[0], [x0], x1 //rs[0] + st1 {v3.h}[0], [x2], x1 //rs[00]=0 + ld1 {v0.h}[1], [x0], x1 //rs[16] + st1 {v3.h}[1], [x2], x1 //rs[16]=0 + ld1 {v0.h}[2], [x0], x1 //rs[32] + st1 {v3.h}[2], [x2], x1 //rs[32]=0 + ld1 {v0.h}[3], [x0], x1 //rs[48] + st1 {v3.h}[3], [x2], x1 //rs[48]=0 + + + HDM_QUANT_2x2_TOTAL_16BITS v0, v4, v5 // output v4 + + HDM_QUANT_2x2_TOTAL_16BITS v4, v0, v5 // output v0 + + QUANT_DUALWORD_COEF_EACH_16BITS v0, v1, v2, v3, v4 + + st1 {v1.d}[0], [x3] // store to dct + st1 {v1.d}[0], [x4] // store to block + + movi v3.8h, #1, lsl #0 + + movi v0.16b, #255 + + DC_ZERO_COUNT_IN_DUALWORD v1, h0, v3 + + mov x0, v0.d[0] + mov x1, #4 + subs x0, x1, x0 +WELS_ASM_AARCH64_FUNC_END + + + +WELS_ASM_AARCH64_FUNC_BEGIN WelsDequantIHadamard4x4_AArch64_neon + ld1 {v0.8h, v1.8h}, [x0] + dup v4.8h, w1 + + IHDM_4x4_TOTAL_16BITS v0, v2, v3 + IHDM_4x4_TOTAL_16BITS v1, v2, v3 + + MATRIX_TRANSFORM_EACH_16BITS_2x8_OUT2 v0, v1, v2, v3 + + IHDM_4x4_TOTAL_16BITS v0, v2, v3 + mul v0.8h, v0.8h, v4.8h + + IHDM_4x4_TOTAL_16BITS v1, v2, v3 + mul v1.8h, v1.8h, v4.8h + + MATRIX_TRANSFORM_EACH_16BITS_2x8_OUT2 v0, v1, v2, v3 + st1 {v0.16b, v1.16b}, [x0] +WELS_ASM_AARCH64_FUNC_END + +//void WelsDctT4_AArch64_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +WELS_ASM_AARCH64_FUNC_BEGIN WelsDctT4_AArch64_neon + SIGN_EXTENSION x2, w2 + SIGN_EXTENSION x4, w4 + LOAD_4x4_DATA_FOR_DCT v0, v1, x1, x2, x3, x4 + usubl v2.8h, v0.8b, v1.8b + usubl2 v4.8h, v0.16b, v1.16b + uzp1 v3.8h, v2.8h, v4.8h + uzp2 v5.8h, v2.8h, v4.8h + uzp2 v2.8h, v3.8h, v5.8h // s[2, 6, 10, 14] [3, 7, 11, 15] + uzp1 v0.8h, v3.8h, v5.8h // s[0, 4, 8, 12] [1, 5, 9, 13] + mov v3.d[0], v2.d[1] // s[3, 7, 11, 15] + mov v1.d[0], v0.d[1] // s[1, 5, 9, 13] + + // horizontal transform + DCT_ROW_TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + // transform element + MATRIX_TRANSFORM_EACH_16BITS_OUT4 v0, v1, v2, v3, v4, v5, v6, v7 + // vertical transform + DCT_ROW_TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0] +WELS_ASM_AARCH64_FUNC_END + +//void WelsDctFourT4_AArch64_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +WELS_ASM_AARCH64_FUNC_BEGIN WelsDctFourT4_AArch64_neon + SIGN_EXTENSION x2,w2 + SIGN_EXTENSION x4,w4 +.rept 2 + LOAD_8x4_DATA_FOR_DCT v0, v1, v2, v3, v4, v5, v6, v7, x1, x3 + usubl v0.8h, v0.8b, v4.8b + usubl v1.8h, v1.8b, v5.8b + usubl v2.8h, v2.8b, v6.8b + usubl v3.8h, v3.8b, v7.8b + + MATRIX_TRANSFORM_EACH_16BITS_OUT4 v0, v1, v2, v3, v4, v5, v6, v7 + + // horizontal transform + DCT_ROW_TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + // transform element + MATRIX_TRANSFORM_EACH_16BITS_OUT4 v0, v1, v2, v3, v4, v5, v6, v7 + + // vertical transform + DCT_ROW_TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + uzp1 v4.2d, v0.2d, v1.2d + uzp2 v6.2d, v0.2d, v1.2d + uzp1 v5.2d, v2.2d, v3.2d + uzp2 v7.2d, v2.2d, v3.2d + st1 {v4.16b, v5.16b}, [x0], #32 + st1 {v6.16b, v7.16b}, [x0], #32 +.endr +WELS_ASM_AARCH64_FUNC_END +//void WelsIDctT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct) +WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctT4Rec_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + ld1 {v16.s}[0], [x2], x3 + ld1 {v16.s}[1], [x2], x3 + ld1 {v16.s}[2], [x2], x3 + ld1 {v16.s}[3], [x2], x3 // Pred + ld4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x4] // dct coeff + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + MATRIX_TRANSFORM_EACH_16BITS_OUT4 v0, v1, v2, v3, v4, v5, v6, v7 + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + ins v0.d[1], v1.d[0] + ins v2.d[1], v3.d[0] + srshr v0.8h, v0.8h, #6 + srshr v2.8h, v2.8h, #6 + //after rounding 6, clip into [0, 255] + uxtl v1.8h, v16.8b + add v0.8h, v0.8h, v1.8h + sqxtun v1.8b, v0.8h + st1 {v1.s}[0],[x0],x1 + st1 {v1.s}[1],[x0],x1 + + uxtl2 v1.8h, v16.16b + add v2.8h, v2.8h, v1.8h + sqxtun v1.8b, v2.8h + st1 {v1.s}[0],[x0],x1 + st1 {v1.s}[1],[x0],x1 +WELS_ASM_AARCH64_FUNC_END +//void WelsIDctFourT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctFourT4Rec_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 +.rept 2 + ld1 {v16.d}[0], [x2], x3 + ld1 {v16.d}[1], [x2], x3 + ld1 {v17.d}[0], [x2], x3 + ld1 {v17.d}[1], [x2], x3 // Pred + ld4 {v0.8h, v1.8h, v2.8h, v3.8h}, [x4], #64 // dct coeff + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + MATRIX_TRANSFORM_EACH_16BITS_OUT4 v0, v1, v2, v3, v4, v5, v6, v7 + + ROW_TRANSFORM_1_STEP_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + + TRANSFORM_TOTAL_16BITS v0, v1, v2, v3, v4, v5, v6, v7 + srshr v0.8h, v0.8h, #6 + srshr v1.8h, v1.8h, #6 + srshr v2.8h, v2.8h, #6 + srshr v3.8h, v3.8h, #6 + + //after rounding 6, clip into [0, 255] + uxtl v4.8h, v16.8b + add v0.8h, v0.8h, v4.8h + sqxtun v0.8b, v0.8h + st1 {v0.d}[0],[x0],x1 + + uxtl2 v5.8h, v16.16b + add v1.8h, v1.8h, v5.8h + sqxtun v1.8b, v1.8h + st1 {v1.d}[0],[x0],x1 + + uxtl v6.8h, v17.8b + add v2.8h, v2.8h, v6.8h + sqxtun v2.8b, v2.8h + st1 {v2.d}[0],[x0],x1 + + uxtl2 v7.8h, v17.16b + add v3.8h, v3.8h, v7.8h + sqxtun v3.8b, v3.8h + st1 {v3.d}[0],[x0],x1 + .endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsHadamardT4Dc_AArch64_neon + + mov x2, #32 + ld1 {v0.h}[0], [x1], x2 + ld1 {v1.h}[0], [x1], x2 + ld1 {v0.h}[1], [x1], x2 + ld1 {v1.h}[1], [x1], x2 + + ld1 {v2.h}[0], [x1], x2 + ld1 {v3.h}[0], [x1], x2 + ld1 {v2.h}[1], [x1], x2 + ld1 {v3.h}[1], [x1], x2 + + ld1 {v0.h}[2], [x1], x2 + ld1 {v1.h}[2], [x1], x2 + ld1 {v0.h}[3], [x1], x2 + ld1 {v1.h}[3], [x1], x2 + + ld1 {v2.h}[2], [x1], x2 + ld1 {v3.h}[2], [x1], x2 + ld1 {v2.h}[3], [x1], x2 + ld1 {v3.h}[3], [x1], x2 // v0[0 4 08 12],v1[1 5 09 13],v2[2 6 10 14],v3[3 7 11 15] + + ROW_TRANSFORM_0_STEP v0, v1, v3, v2, v4, v7, v6, v5 + TRANSFORM_4BYTES v0, v1, v3, v2, v4, v7, v6, v5 + + // transform element 32bits + uzp1 v4.4s, v0.4s, v1.4s // 0 2 4 6 + uzp2 v5.4s, v0.4s, v1.4s // 1 3 5 7 + uzp1 v6.4s, v2.4s, v3.4s // 8 10 12 14 + uzp2 v7.4s, v2.4s, v3.4s // 9 11 13 15 + + uzp1 v0.4s, v4.4s, v6.4s // 0 4 8 12 + uzp2 v2.4s, v4.4s, v6.4s // 2 6 10 14 + uzp1 v1.4s, v5.4s, v7.4s // 1 5 9 13 + uzp2 v3.4s, v5.4s, v7.4s // 3 7 11 15 + + COL_TRANSFORM_0_STEP v0, v1, v3, v2, v4, v7, v6, v5 + TRANSFORM_4BYTES v0, v1, v3, v2, v4, v7, v6, v5 + sqrshrn v4.4h, v0.4s, #1 + sqrshrn2 v4.8h, v1.4s, #1 + sqrshrn v5.4h, v2.4s, #1 + sqrshrn2 v5.8h, v3.4s, #1 + st1 {v4.16b, v5.16b}, [x0] //store +WELS_ASM_AARCH64_FUNC_END + +//void WelsIDctRecI16x16Dc_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, +// int16_t* pDctDc); +WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctRecI16x16Dc_AArch64_neon + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x3,w3 + ld1 {v16.16b,v17.16b}, [x4] + srshr v16.8h, v16.8h, #6 + srshr v17.8h, v17.8h, #6 + + dup v0.8h, v16.h[0] + dup v1.8h, v16.h[1] + ins v0.d[1], v1.d[0] + dup v1.8h, v16.h[2] + dup v2.8h, v16.h[3] + ins v1.d[1], v2.d[0] + +.rept 4 + ld1 {v3.16b}, [x2], x3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP v3, v0, v1, v4, v5 + st1 {v3.16b}, [x0], x1 +.endr + + dup v0.8h, v16.h[4] + dup v1.8h, v16.h[5] + ins v0.d[1], v1.d[0] + dup v1.8h, v16.h[6] + dup v2.8h, v16.h[7] + ins v1.d[1], v2.d[0] + +.rept 4 + ld1 {v3.16b}, [x2], x3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP v3, v0, v1, v4, v5 + st1 {v3.16b}, [x0], x1 +.endr + + dup v0.8h, v17.h[0] + dup v1.8h, v17.h[1] + ins v0.d[1], v1.d[0] + dup v1.8h, v17.h[2] + dup v2.8h, v17.h[3] + ins v1.d[1], v2.d[0] + +.rept 4 + ld1 {v3.16b}, [x2], x3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP v3, v0, v1, v4, v5 + st1 {v3.16b}, [x0], x1 +.endr + + dup v0.8h, v17.h[4] + dup v1.8h, v17.h[5] + ins v0.d[1], v1.d[0] + dup v1.8h, v17.h[6] + dup v2.8h, v17.h[7] + ins v1.d[1], v2.d[0] + +.rept 4 + ld1 {v3.16b}, [x2], x3 + MB_PRED_8BITS_ADD_DCT_16BITS_CLIP v3, v0, v1, v4, v5 + st1 {v3.16b}, [x0], x1 +.endr +WELS_ASM_AARCH64_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S new file mode 100644 index 000000000..422a5f844 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S @@ -0,0 +1,350 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" +//int32_t SumOf8x8SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); +WELS_ASM_AARCH64_FUNC_BEGIN SumOf8x8SingleBlock_AArch64_neon + SIGN_EXTENSION x1,w1 + ld1 {v0.d}[0], [x0], x1 + ld1 {v0.d}[1], [x0], x1 + ld1 {v1.d}[0], [x0], x1 + ld1 {v1.d}[1], [x0], x1 + ld1 {v2.d}[0], [x0], x1 + ld1 {v2.d}[1], [x0], x1 + ld1 {v3.d}[0], [x0], x1 + ld1 {v3.d}[1], [x0] + uaddlp v0.8h, v0.16b + uadalp v0.8h, v1.16b + uadalp v0.8h, v2.16b + uadalp v0.8h, v3.16b + uaddlv s0, v0.8h + mov x0, v0.d[0] +WELS_ASM_AARCH64_FUNC_END + +//int32_t SumOf16x16SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); +WELS_ASM_AARCH64_FUNC_BEGIN SumOf16x16SingleBlock_AArch64_neon + SIGN_EXTENSION x1,w1 + ld1 {v0.16b}, [x0], x1 + uaddlp v0.8h, v0.16b +.rept 15 + ld1 {v1.16b}, [x0], x1 + uadalp v0.8h, v1.16b +.endr + uaddlv s0, v0.8h + mov x0, v0.d[0] +WELS_ASM_AARCH64_FUNC_END + +//void SumOf8x8BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, +// const int32_t kiRefStride, +// uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +WELS_ASM_AARCH64_FUNC_BEGIN SumOf8x8BlockOfFrame_AArch64_neon +//(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,const int32_t kiRefStride,uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) + //x5: pTimesOfFeatureValue + //x4: pFeatureOfBlock + + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x2,w2 + SIGN_EXTENSION x3,w3 + mov x8, x0 + mov x6, x1 + add x8, x8, x6 + add x4, x4, x6, lsl #1 + + mov x7, x6 +_width_loop8x8_1: + subs x0, x8, x7 + ld1 {v0.d}[0], [x0], x3 + ld1 {v0.d}[1], [x0], x3 + ld1 {v1.d}[0], [x0], x3 + ld1 {v1.d}[1], [x0], x3 + ld1 {v2.d}[0], [x0], x3 + ld1 {v2.d}[1], [x0], x3 + ld1 {v3.d}[0], [x0], x3 + ld1 {v3.d}[1], [x0] + uaddlp v0.8h, v0.16b + uadalp v0.8h, v1.16b + uadalp v0.8h, v2.16b + uadalp v0.8h, v3.16b + uaddlv s0, v0.8h + + subs x1, x4, x7, lsl #1 + st1 {v0.h}[0], [x1] // sum -> pFeatureOfBlock[i] + mov w0, #0 + ins v0.s[1], w0 + mov x0, v0.d[0] + add x1, x5, x0, lsl #2 + ldr w0, [x1] + add w0, w0, #1 + str w0, [x1] + subs x7, x7, #1 + cbnz x7, _width_loop8x8_1 + + add x8, x8, x3 + add x4, x4, x6, lsl #1 + subs x2, x2, #1 + cbz x2, _SumOf8x8BlockOfFrame_AArch64_neon_end + +_height_loop8x8: + mov x7, x6 +_width_loop8x8_2: + subs x0, x8, x7 + subs x1, x4, x7, lsl #1 + subs x9, x1, x6, lsl #1 // last line of pFeatureOfBlock[i] + ldrh w10, [x9] // sum of last line of pFeatureOfBlock[i] + + subs x11, x0, x3 + ld1 {v0.d}[1], [x11] + add x0, x11, x3, lsl #3 + ld1 {v0.d}[0], [x0] // + + uaddlp v0.8h, v0.16b + addp v0.8h, v0.8h, v1.8h + uaddlp v0.4s, v0.8h + umov w11, v0.s[0] + umov w12, v0.s[1] + + subs w10, w10, w12 + mov x0, #0 + add w0, w10, w11 + strh w0, [x1] // sum -> pFeatureOfBlock[i] + add x1, x5, x0, lsl #2 + ldr w0, [x1] + add w0, w0, #1 + str w0, [x1] + subs x7, x7, #1 + cbnz x7, _width_loop8x8_2 + + add x8, x8, x3 + add x4, x4, x6, lsl #1 + subs x2, x2, #1 + cbnz x2, _height_loop8x8 +_SumOf8x8BlockOfFrame_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN SumOf16x16BlockOfFrame_AArch64_neon +//(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,const int32_t kiRefStride,uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) + //x5: pTimesOfFeatureValue + //x4: pFeatureOfBlock + + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x2,w2 + SIGN_EXTENSION x3,w3 + mov x8, x0 + mov x6, x1 + add x8, x8, x6 + add x4, x4, x6, lsl #1 + + mov x7, x6 +_width_loop16x16_1: + subs x0, x8, x7 + ld1 {v0.16b}, [x0], x3 + uaddlp v0.8h, v0.16b +.rept 15 + ld1 {v1.16b}, [x0], x3 + uadalp v0.8h, v1.16b +.endr + uaddlv s0, v0.8h + + subs x1, x4, x7, lsl #1 + st1 {v0.h}[0], [x1] // sum -> pFeatureOfBlock[i] + mov w0, #0 + ins v0.s[1], w0 + mov x0, v0.d[0] + add x1, x5, x0, lsl #2 + ldr w0, [x1] + add w0, w0, #1 + str w0, [x1] + subs x7, x7, #1 + cbnz x7, _width_loop16x16_1 + + add x8, x8, x3 + add x4, x4, x6, lsl #1 + subs x2, x2, #1 + cbz x2, _SumOf16x16BlockOfFrame_AArch64_neon_end + +_height_loop16x16: + mov x7, x6 +_width_loop16x16_2: + subs x0, x8, x7 + + subs x1, x4, x7, lsl #1 + subs x9, x1, x6, lsl #1 // last line of pFeatureOfBlock[i] + ldrh w10, [x9] // sum of last line of pFeatureOfBlock[i] + + subs x11, x0, x3 + ld1 {v1.16b}, [x11] + add x0, x11, x3, lsl #4 + ld1 {v0.16b}, [x0] // + + uaddlv h0, v0.16b + uaddlv h1, v1.16b + umov w11, v0.h[0] + umov w12, v1.h[0] + + subs w10, w10, w12 + mov x0, #0 + add w0, w10, w11 + strh w0, [x1] // sum -> pFeatureOfBlock[i] + add x1, x5, x0, lsl #2 + ldr w0, [x1] + add w0, w0, #1 + str w0, [x1] + subs x7, x7, #1 + cbnz x7, _width_loop16x16_2 + + add x8, x8, x3 + add x4, x4, x6, lsl #1 + subs x2, x2, #1 + cbnz x2, _height_loop16x16 +_SumOf16x16BlockOfFrame_AArch64_neon_end: +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN InitializeHashforFeature_AArch64_neon +// (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); + SIGN_EXTENSION x2,w2 + mov x9, #3 + bic x5, x2, x9 + mov x8, #0 +_hash_assign_loop_x4: + ld1 {v0.16b}, [x0], #16 + shl v0.4s, v0.4s, #2 + addv s1, v0.4s + umov w7, v1.s[0] + cbz w7, _hash_assign_with_copy_x4 + + ins v2.d[0], x1 + umov w8, v0.s[0] + add x1, x1, x8 + ins v2.d[1], x1 + umov w8, v0.s[1] + add x1, x1, x8 + ins v3.d[0], x1 + umov w8, v0.s[2] + add x1, x1, x8 + ins v3.d[1], x1 + umov w8, v0.s[3] + add x1, x1, x8 + st1 {v2.16b, v3.16b}, [x3], #32 + st1 {v2.16b, v3.16b}, [x4], #32 + b _assign_next +_hash_assign_with_copy_x4: + dup v2.2d, x1 + dup v3.2d, x1 + st1 {v2.16b, v3.16b}, [x3], #32 + st1 {v2.16b, v3.16b}, [x4], #32 + +_assign_next: + subs x5, x5, #4 + cbnz x5, _hash_assign_loop_x4 + + and x5, x2, x9 + cbz x5, _hash_assign_end + + +_hash_assign_loop_x4_rem: + str x1, [x3], #8 + str x1, [x4], #8 + ldr w8, [x0], #4 + lsl w8, w8, #2 + add x1, x1, x8 + subs x5, x5, #1 + cbnz x5, _hash_assign_loop_x4_rem + +_hash_assign_end: +WELS_ASM_AARCH64_FUNC_END + +.align 4 +mv_x_inc_x4: .short 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00 +mv_y_inc_x4: .short 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00 +mx_x_offset_x4: .short 0x00, 0x04, 0x08, 0x0c, 0x00, 0x00, 0x00, 0x00 + +WELS_ASM_AARCH64_FUNC_BEGIN FillQpelLocationByFeatureValue_AArch64_neon +// void (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList) + ldr q7, mv_x_inc_x4 + ldr q6, mv_y_inc_x4 + ldr q5, mx_x_offset_x4 + SIGN_EXTENSION x1,w1 + SIGN_EXTENSION x2,w2 + eor v4.16b, v4.16b, v4.16b + eor v3.16b, v3.16b, v3.16b + dup v16.2d, x3 // v8->v16 + +_hash_height_loop: + mov x7, x1 + mov v2.16b, v5.16b //mx_x_offset_x4 + +_hash_width_loop: + ld1 {v0.d}[0], [x0], #8 + + ushll v0.4s, v0.4h, #3 + uaddw v17.2d, v16.2d, v0.2s + uaddw2 v18.2d, v16.2d, v0.4s + zip1 v1.8h, v2.8h, v3.8h + + umov x4, v17.d[0] + ldr x5, [x4] + umov w6, v1.s[0] + str w6, [x5] + add x5, x5, #4 + str x5, [x4] + + umov x4, v17.d[1] + ldr x5, [x4] + umov w6, v1.s[1] + str w6, [x5] + add x5, x5, #4 + str x5, [x4] + + umov x4, v18.d[0] + ldr x5, [x4] + umov w6, v1.s[2] + str w6, [x5] + add x5, x5, #4 + str x5, [x4] + + umov x4, v18.d[1] + ldr x5, [x4] + umov w6, v1.s[3] + str w6, [x5] + add x5, x5, #4 + str x5, [x4] + + add v2.8h, v2.8h, v7.8h + subs x7, x7, #4 + cbnz x7, _hash_width_loop + + add v3.8h, v3.8h, v6.8h + subs x2, x2, #1 + cbnz x2, _hash_height_loop +WELS_ASM_AARCH64_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/as264_common.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/as264_common.h new file mode 100644 index 000000000..30b21a3d8 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/as264_common.h @@ -0,0 +1,143 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file common.h + * + * \brief common flag definitions + * + * \date 7/6/2009 Created + * + ************************************************************************************* + */ + +#ifndef AS264_COMMON_H_ +#define AS264_COMMON_H_ + +/* debug setting for console +$(TargetPath) +-iper 60 -lqp 26 -frin 2 -rc 1 -cf 4 -org desktop.bgra -sw 800 -sh 592 -bf desktop.h264 +.\..\..\..\..\bin +*/ + +/**************************************************************************** + * Options for algorithm, usually change bitrate + ****************************************************************************/ +#define DISABLE_FMO_FEATURE // + +/**************************************************************************** + * Options for optimization, not change bitrate + ****************************************************************************/ +//#undef X86_ASM // X86_ASM is included in project preprocessor definitions, undef it when need to disable asm code +#define SINGLE_REF_FRAME // need to disable it when use multi-reference + + +#if defined(WELS_TESTBED) // for SGE testing +#define ENABLE_FRAME_DUMP + +#ifdef FRAME_INFO_OUTPUT +#undef FRAME_INFO_OUTPUT +#endif//FRAME_INFO_OUTPUT +#endif//WELS_TESTBED + + +#if defined(__UNITTEST__) // for unittest +#ifndef ENABLE_FRAME_DUMP +#define ENABLE_FRAME_DUMP +#endif//ENABLE_FRAME_DUMP +#endif//__UNITTEST__ + +//#define ENABLE_PSNR_CALC +//#define STAT_OUTPUT +//#define MB_TYPES_CHECK +// +//#define FRAME_INFO_OUTPUT +//#define LAYER_INFO_OUTPUT +//#define SLICE_INFO_OUTPUT // useful in multiple slice coding track +//#define MB_TYPES_INFO_OUTPUT + + +/* macros dependencies check */ +//@if !FRAME_INFO_OUTPUT +#if !defined(FRAME_INFO_OUTPUT) + +#if defined(ENABLE_PSNR_CALC) +#undef ENABLE_PSNR_CALC +#endif//ENABLE_PSNR_CALC + +//#if defined(STAT_OUTPUT) +//#undef STAT_OUTPUT +//#endif//STAT_OUTPUT + +#if defined(LAYER_INFO_OUTPUT) +#undef LAYER_INFO_OUTPUT +#endif//LAYER_INFO_OUTPUT + +#if defined(SLICE_INFO_OUTPUT) +#undef SLICE_INFO_OUTPUT +#endif//SLICE_INFO_OUTPUT + +#if defined(MB_TYPES_INFO_OUTPUT) +#undef MB_TYPES_INFO_OUTPUT +#endif//MB_TYPES_INFO_OUTPUT + +#endif//FRAME_INFO_OUTPUT + +//@if SLICE_INFO_OUTPUT +#if defined(SLICE_INFO_OUTPUT) + +#if !defined(FRAME_INFO_OUTPUT) +#define FRAME_INFO_OUTPUT +#endif//FRAME_INFO_OUTPUT + +#if !defined(LAYER_INFO_OUTPUT) +#define LAYER_INFO_OUTPUT +#endif//LAYER_INFO_OUTPUT + +#endif//SLICE_INFO_OUTPUT + +#if defined(LAYER_INFO_OUTPUT) + +#if !defined(FRAME_INFO_OUTPUT) +#define FRAME_INFO_OUTPUT +#endif//!FRAME_INFO_OUTPUT + +#endif//LAYER_INFO_OUTPUT + +//@if MB_TYPES_INFO_OUTPUT +#if defined(MB_TYPES_INFO_OUTPUT) + +#if !defined(MB_TYPES_CHECK) +#define MB_TYPES_CHECK +#endif//MB_TYPES_CHECK +#endif//MB_TYPES_INFO_OUTPUT + +#endif // AS264_COMMON_H_ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/au_set.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/au_set.h new file mode 100644 index 000000000..a12106e9c --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/au_set.h @@ -0,0 +1,152 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file au_set.h + * + * \brief Interfaces introduced in Access Unit level based writer + * + * \date 05/18/2009 Created + * 05/21/2009 Added init_sps and init_pps + * + ************************************************************************************* + */ + +#ifndef WELS_ACCESS_UNIT_WRITER_H__ +#define WELS_ACCESS_UNIT_WRITER_H__ + +#include "parameter_sets.h" +#include "paraset_strategy.h" +#include "param_svc.h" +#include "utils.h" +namespace WelsEnc { +/*! + ************************************************************************************* + * \brief to write Sequence Parameter Set (SPS) + * + * \param pSps SWelsSPS to be wrote + * \param bs_aux bitstream writer auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case EWelsNalUnitType is SPS. + ************************************************************************************* + */ + +int32_t WelsWriteSpsNal (SWelsSPS* pSps, SBitStringAux* pBitStringAux, int32_t* pSpsIdDelta); + + +/*! + ************************************************************************************* + * \brief to write SubSet Sequence Parameter Set + * + * \param sub_sps subset pSps parsed + * \param bs_aux bitstream writer auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case EWelsNalUnitType is SubSet SPS. + ************************************************************************************* + */ +int32_t WelsWriteSubsetSpsSyntax (SSubsetSps* pSubsetSps, SBitStringAux* pBitStringAux , int32_t* pSpsIdDelta); + + +/*! + ************************************************************************************* + * \brief to write Picture Parameter Set (PPS) + * + * \param pPps pPps + * \param bs_aux bitstream writer auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case EWelsNalUnitType is PPS. + ************************************************************************************* + */ +int32_t WelsWritePpsSyntax (SWelsPPS* pPps, SBitStringAux* pBitStringAux, IWelsParametersetStrategy* pParametersetStrategy); + +/*! + * \brief initialize pSps based on configurable parameters in svc + * \param pSps SWelsSPS* + * \param pLayerParam SSpatialLayerConfig dependency layer parameter + * \param pLayerParamInternal SSpatialLayerInternal*, internal dependency layer parameter + * \param iSpsId SPS Id + * \return 0 - successful + * 1 - failed + */ +int32_t WelsInitSps (SWelsSPS* pSps, SSpatialLayerConfig* pLayerParam, SSpatialLayerInternal* pLayerParamInternal, + const uint32_t kuiIntraPeriod, const int32_t kiNumRefFrame, + const uint32_t kiSpsId, const bool kbEnableFrameCropping, bool bEnableRc, + const int32_t kiDlayerCount,bool bSVCBaselayer); + +/*! + * \brief initialize subset pSps based on configurable parameters in svc + * \param pSubsetSps SSubsetSps* + * \param pLayerParam SSpatialLayerConfig dependency layer parameter + * \param pLayerParamInternal SSpatialLayerInternal*, internal dependency layer parameter + * \param kiSpsId SPS Id + * \return 0 - successful + * 1 - failed + */ +int32_t WelsInitSubsetSps (SSubsetSps* pSubsetSps, SSpatialLayerConfig* pLayerParam, + SSpatialLayerInternal* pLayerParamInternal, + const uint32_t kuiIntraPeriod, const int32_t kiNumRefFrame, + const uint32_t kiSpsId, const bool kbEnableFrameCropping, bool bEnableRc, + const int32_t kiDlayerCount); + +/*! + * \brief initialize pPps based on configurable parameters and pSps(subset pSps) in svc + * \param pPps SWelsPPS* + * \param pSps SWelsSPS* + * \param pSubsetSps SSubsetSps* + * \param kbDeblockingFilterPresentFlag bool + * \param kiPpsId PPS Id + * \param kbUsingSubsetSps bool + * \return 0 - successful + * 1 - failed + */ +int32_t WelsInitPps (SWelsPPS* pPps, + SWelsSPS* pSps, + SSubsetSps* pSubsetSps, + const uint32_t kuiPpsId, + const bool kbDeblockingFilterPresentFlag, + const bool kbUsingSubsetSps, + const bool kbEntropyCodingModeFlag); + +int32_t WelsCheckRefFrameLimitationNumRefFirst (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam); +int32_t WelsCheckRefFrameLimitationLevelIdcFirst (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam); + +int32_t WelsAdjustLevel (SSpatialLayerConfig* pSpatialLayer,const SLevelLimits *pCurLevel); + +} +#endif//WELS_ACCESS_UNIT_PARSER_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/deblocking.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/deblocking.h new file mode 100644 index 000000000..55f64b5d1 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/deblocking.h @@ -0,0 +1,92 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file deblocking.h + * + * \brief Interfaces introduced in frame deblocking filtering + * + * \date 08/03/2009 Created + * + ************************************************************************************* + */ + +#ifndef WELS_DEBLOCKING_H_ +#define WELS_DEBLOCKING_H_ + +#include "encoder_context.h" +#include "wels_func_ptr_def.h" +#include "deblocking_common.h" +namespace WelsEnc { + + +//struct tagDeblockingFunc; + +typedef struct TagDeblockingFilter { + uint8_t* pCsData[3]; // pointer to reconstructed picture pData + int32_t iCsStride[3]; // Cs iStride + int16_t iMbStride; + int8_t iSliceAlphaC0Offset; + int8_t iSliceBetaOffset; + uint8_t uiLumaQP; + uint8_t uiChromaQP; + uint8_t uiFilterIdc; + uint8_t uiReserved; +} SDeblockingFilter; + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus +#if defined(HAVE_NEON) +void DeblockingBSCalcEnc_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride, + uint8_t (*pBS)[4][4]); +#endif +#if defined(HAVE_NEON_AARCH64) +void DeblockingBSCalcEnc_AArch64_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride, + uint8_t (*pBS)[4][4]); +#endif +#if defined(__cplusplus) +} +#endif//__cplusplus +void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu); + +void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu); + +void PerformDeblockingFilter (sWelsEncCtx* pEnc); + +void DeblockingFilterFrameAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc); + +void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice); +void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice); +} + +#endif + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/decode_mb_aux.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/decode_mb_aux.h new file mode 100644 index 000000000..893c4a3ba --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/decode_mb_aux.h @@ -0,0 +1,107 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef DECODE_MB_AUX_H +#define DECODE_MB_AUX_H + +#include "typedefs.h" +#include "macros.h" +#include "wels_func_ptr_def.h" + +namespace WelsEnc { +void WelsDequantLumaDc4x4 (int16_t* pRes, const int32_t kiQp); +void WelsIHadamard4x4Dc (int16_t* pRes); + +void WelsInitReconstructionFuncs (SWelsFuncPtrList* pList, uint32_t iCpuFlags); +void WelsGetEncBlockStrideOffset (int32_t* pBlock, const int32_t kiStrideY, const int32_t kiStrideUV); + +void WelsDequantFour4x4_c (int16_t* pRes, const uint16_t* kpQpTable); +void WelsDequant4x4_c (int16_t* pRes, const uint16_t* kpQpTable); +void WelsDequantIHadamard4x4_c (int16_t* pRes, const uint16_t kuiMF); +void WelsDequantIHadamard2x2Dc (int16_t* pDct, const uint16_t kuiMF); + +void WelsIDctT4RecOnMb (uint8_t* pDst, int32_t iDstStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct, + PIDctFunc pfIDctFourT4); +void WelsIDctT4Rec_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct); +void WelsIDctFourT4Rec_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct); +void WelsIDctRecI16x16Dc_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDctDc); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +void WelsDequant4x4_sse2 (int16_t* pDct, const uint16_t* kpMF); +void WelsDequantFour4x4_sse2 (int16_t* pDct, const uint16_t* kpMF); +void WelsDequantIHadamard4x4_sse2 (int16_t* pRes, const uint16_t kuiMF); + +void WelsIDctT4Rec_mmx (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctT4Rec_sse2 (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctFourT4Rec_sse2 (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctRecI16x16Dc_sse2 (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, + int16_t* pDctDc); +void WelsIDctT4Rec_avx2 (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctFourT4Rec_avx2 (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +#endif//X86_ASM + +#ifdef HAVE_NEON +void WelsDequantFour4x4_neon (int16_t* pDct, const uint16_t* kpMF); +void WelsDequant4x4_neon (int16_t* pDct, const uint16_t* kpMF); +void WelsDequantIHadamard4x4_neon (int16_t* pRes, const uint16_t kuiMF); + +void WelsIDctT4Rec_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctFourT4Rec_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctRecI16x16Dc_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, + int16_t* pDctDc); +#endif + +#ifdef HAVE_NEON_AARCH64 +void WelsDequantFour4x4_AArch64_neon (int16_t* pDct, const uint16_t* kpMF); +void WelsDequant4x4_AArch64_neon (int16_t* pDct, const uint16_t* kpMF); +void WelsDequantIHadamard4x4_AArch64_neon (int16_t* pRes, const uint16_t kuiMF); + +void WelsIDctT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctFourT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctRecI16x16Dc_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, + int16_t* pDctDc); +#endif + +#if defined(HAVE_MMI) +void WelsIDctT4Rec_mmi (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctFourT4Rec_mmi (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +void WelsIDctRecI16x16Dc_mmi (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDctDc); +#endif//HAVE_MMI +#if defined(__cplusplus) +} +#endif//__cplusplus +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/dq_map.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/dq_map.h new file mode 100644 index 000000000..ba83fab24 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/dq_map.h @@ -0,0 +1,58 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file dq_map.h + * + * \brief Dependency Quality layer IDC mapping for cross layer selection and jumpping. + * DQ layer idc map for svc encoding, might be a better scheme than that of design before, + * can aware idc of referencing layer and that idc of successive layer to be coded + * + * \date 4/22/2009 Created + * + ************************************************************************************* + */ +#if !defined(WELS_ENCODER_DEPENDENCY_QUAILITY_IDC_MAP_H__) +#define WELS_ENCODER_DEPENDENCY_QUAILITY_IDC_MAP_H__ + +namespace WelsEnc { +/* + * Dependency Quality IDC + */ + +typedef struct TagDqIdc { +uint16_t iPpsId; // pPps id +uint8_t iSpsId; // pSps id +int8_t uiSpatialId; // spatial id +} SDqIdc; + +} + +#endif//WELS_ENCODER_DEPENDENCY_QUAILITY_IDC_MAP_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encode_mb_aux.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encode_mb_aux.h new file mode 100644 index 000000000..296d2e380 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encode_mb_aux.h @@ -0,0 +1,185 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef ENCODE_MB_AUX_H +#define ENCODE_MB_AUX_H + +#include "typedefs.h" +#include "wels_func_ptr_def.h" +#include "copy_mb.h" + +namespace WelsEnc { +void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag); +int32_t WelsGetNoneZeroCount_c (int16_t* pLevel); + +/**************************************************************************** + * Scan and Score functions + ****************************************************************************/ +void WelsScan4x4Ac_c (int16_t* pZigValue, int16_t* pDct); +void WelsScan4x4Dc (int16_t* pLevel, int16_t* pDct); +void WelsScan4x4DcAc_c (int16_t* pLevel, int16_t* pDct); +int32_t WelsCalculateSingleCtr4x4_c (int16_t* pDct); + +/**************************************************************************** + * HDM and Quant functions + ****************************************************************************/ +void WelsHadamardT4Dc_c (int16_t* pLumaDc, int16_t* pDct); +int32_t WelsHadamardQuant2x2_c (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock); +int32_t WelsHadamardQuant2x2Skip_c (int16_t* pRes, int16_t iFF, int16_t iMF); + +void WelsQuant4x4_c (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuant4x4Dc_c (int16_t* pDct, int16_t iFF, int16_t iMF); +void WelsQuantFour4x4_c (int16_t* pDct, const int16_t* pFF, const int16_t* pQpTable); +void WelsQuantFour4x4Max_c (int16_t* pDct, const int16_t* pF, const int16_t* pQpTable, int16_t* pMax); + + +/**************************************************************************** + * DCT functions + ****************************************************************************/ +void WelsDctT4_c (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +// dct_data is no-use here, just for the same interface with dct_save functions +void WelsDctFourT4_c (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#ifdef X86_ASM + +int32_t WelsGetNoneZeroCount_sse2 (int16_t* pLevel); +int32_t WelsGetNoneZeroCount_sse42 (int16_t* pLevel); + +/**************************************************************************** + * Scan and Score functions + ****************************************************************************/ +void WelsScan4x4Ac_sse2 (int16_t* zig_value, int16_t* pDct); +void WelsScan4x4DcAc_ssse3 (int16_t* pLevel, int16_t* pDct); +void WelsScan4x4DcAc_sse2 (int16_t* pLevel, int16_t* pDct); +int32_t WelsCalculateSingleCtr4x4_sse2 (int16_t* pDct); + +/**************************************************************************** + * DCT functions + ****************************************************************************/ +void WelsDctT4_mmx (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +void WelsDctT4_sse2 (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +void WelsDctFourT4_sse2 (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +void WelsDctT4_avx2 (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +void WelsDctFourT4_avx2 (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); + +/**************************************************************************** + * HDM and Quant functions + ****************************************************************************/ +int32_t WelsHadamardQuant2x2_mmx (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock); +void WelsHadamardT4Dc_sse2 (int16_t* pLumaDc, int16_t* pDct); +int32_t WelsHadamardQuant2x2Skip_mmx (int16_t* pRes, int16_t iFF, int16_t iMF); + +void WelsQuant4x4_sse2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuant4x4Dc_sse2 (int16_t* pDct, int16_t iFF, int16_t iMF); +void WelsQuantFour4x4_sse2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuantFour4x4Max_sse2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax); + +void WelsQuant4x4_avx2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuant4x4Dc_avx2 (int16_t* pDct, int16_t iFF, int16_t iMF); +void WelsQuantFour4x4_avx2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuantFour4x4Max_avx2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax); + +#endif + +#ifdef HAVE_NEON +void WelsHadamardT4Dc_neon (int16_t* pLumaDc, int16_t* pDct); +int32_t WelsHadamardQuant2x2_neon (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock); +int32_t WelsHadamardQuant2x2Skip_neon (int16_t* pRes, int16_t iFF, int16_t iMF); +int32_t WelsHadamardQuant2x2SkipKernel_neon (int16_t* pRes, int16_t iThreshold); // avoid divide operator + +void WelsDctT4_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +void WelsDctFourT4_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); + +int32_t WelsGetNoneZeroCount_neon (int16_t* pLevel); + +void WelsQuant4x4_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuant4x4Dc_neon (int16_t* pDct, int16_t iFF, int16_t iMF); +void WelsQuantFour4x4_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuantFour4x4Max_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax); +#endif + +#ifdef HAVE_NEON_AARCH64 +void WelsHadamardT4Dc_AArch64_neon (int16_t* pLumaDc, int16_t* pDct); +int32_t WelsHadamardQuant2x2_AArch64_neon (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock); +int32_t WelsHadamardQuant2x2Skip_AArch64_neon (int16_t* pRes, int16_t iFF, int16_t iMF); +int32_t WelsHadamardQuant2x2SkipKernel_AArch64_neon (int16_t* pRes, int16_t iThreshold); // avoid divide operator + +void WelsDctT4_AArch64_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +void WelsDctFourT4_AArch64_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); + +int32_t WelsGetNoneZeroCount_AArch64_neon (int16_t* pLevel); + +void WelsQuant4x4_AArch64_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuant4x4Dc_AArch64_neon (int16_t* pDct, int16_t iFF, int16_t iMF); +void WelsQuantFour4x4_AArch64_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuantFour4x4Max_AArch64_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax); +#endif + +#ifdef HAVE_MMI +int32_t WelsGetNoneZeroCount_mmi (int16_t* pLevel); + +/**************************************************************************** + * * Scan and Score functions + * ****************************************************************************/ +void WelsScan4x4Ac_mmi (int16_t* zig_value, int16_t* pDct); +void WelsScan4x4DcAc_mmi (int16_t* pLevel, int16_t* pDct); +int32_t WelsCalculateSingleCtr4x4_mmi (int16_t* pDct); + +/**************************************************************************** + * * DCT functions + * ****************************************************************************/ +void WelsDctT4_mmi (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +void WelsDctFourT4_mmi (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); + +/**************************************************************************** + * * HDM and Quant functions + * ****************************************************************************/ +void WelsHadamardT4Dc_mmi (int16_t* pLumaDc, int16_t* pDct); + +void WelsQuant4x4_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuant4x4Dc_mmi (int16_t* pDct, int16_t iFF, int16_t iMF); +void WelsQuantFour4x4_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +void WelsQuantFour4x4Max_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax); +#endif//HAVE_MMI +#if defined(__cplusplus) +} +#endif//__cplusplus + +ALIGNED_DECLARE (extern const int16_t, g_kiQuantInterFF[58][8], 16); +#define g_iQuantIntraFF (g_kiQuantInterFF +6 ) +ALIGNED_DECLARE (extern const int16_t, g_kiQuantMF[52][8], 16); +} +#endif//ENCODE_MB_AUX_H diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encoder.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encoder.h new file mode 100644 index 000000000..893b5389d --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encoder.h @@ -0,0 +1,148 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file encoder.h + * + * \brief core encoder + * + * \date 5/14/2009 + * + ************************************************************************************* + */ +#if !defined(WELS_CORE_ENCODER_H__) +#define WELS_CORE_ENCODER_H__ + +#include "encoder_context.h" + +namespace WelsEnc { +/*! + * \brief request specific memory for SVC + * \param pEncCtx sWelsEncCtx* + * \return successful - 0; otherwise none 0 for failed + */ +int32_t RequestMemorySvc (sWelsEncCtx** ppCtx, SExistingParasetList* pExistingParasetList); + +/*! + * \brief free memory in SVC core encoder + * \param pEncCtx sWelsEncCtx** + * \return none + */ +void FreeMemorySvc (sWelsEncCtx** ppCtx); + +/*! + * \brief allocate or reallocate the output bs buffer + * \return: successful - 0; otherwise none 0 for failed + */ +int32_t AllocateBsOutputBuffer (CMemoryAlign* pMa, const int32_t iNeededLen, int32_t iOrigLen, const char* kpTag, + uint8_t*& pOutputBuffer); +//TODO: to finish this function and call it + +/*! + * \brief initialize function pointers that potentially used in Wels encoding + * \param pEncCtx sWelsEncCtx* + * \return successful - 0; otherwise none 0 for failed + */ +int32_t InitFunctionPointers (sWelsEncCtx* pEncCtx, SWelsSvcCodingParam* _param, uint32_t uiCpuFlag); + +///*! +// * \brief decide frame type (IDR/P frame) +// * \param uiFrameType frame type output +// * \param frame_idx frame index elapsed currently +// * \param idr IDR interval +// * \return successful - 0; otherwise none 0 for failed +// */ +/*! + * \brief initialize frame coding + */ +void InitFrameCoding (sWelsEncCtx* pEncCtx, const EVideoFrameType keFrameType, const int32_t kiDidx); +void LoadBackFrameNum (sWelsEncCtx* pEncCtx, const int32_t kiDidx); + +EVideoFrameType DecideFrameType (sWelsEncCtx* pEncCtx, const int8_t kiSpatialNum, const int32_t kiDidx, + bool bSkipFrameFlag); +void InitBitStream (sWelsEncCtx* pEncCtx); +int32_t GetTemporalLevel (SSpatialLayerInternal* fDlp, const int32_t kiFrameNum, const int32_t kiGopSize); +/*! + * \brief Dump reconstruction for dependency layer + */ + +extern "C" void DumpDependencyRec (SPicture* pSrcPic, const char* kpFileName, const int8_t kiDid, bool bAppend, + SDqLayer* pDqLayer, bool bSimulCastAVC); + +/*! + * \brief Dump the reconstruction pictures + */ +void DumpRecFrame (SPicture* pSrcPic, const char* kpFileName, const int8_t kiDid, bool bAppend, SDqLayer* pDqLayer); + + +/*! + * \brief encode overall slices pData in a frame + * \param pEncCtx sWelsEncCtx*, encoder context + * \param count_slice_num count number of slices in a frame + * \param eNalType EWelsNalUnitType for a frame + * \param nal_idc EWelsNalRefIdc for a frame + * \return successful - 0; otherwise none 0 for failed + */ +int32_t EncodeFrame (sWelsEncCtx* pEncCtx, + const int32_t kiSliceNumCount, + const EWelsNalUnitType keNalType, + const EWelsNalRefIdc keNalIdc); + + +/********************************************************************************** + * memzero Function +***********************************************************************************/ +void WelsSetMemZero_c (void* pDst, int32_t iSize); // confirmed_safe_unsafe_usage + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#ifdef X86_ASM +void WelsSetMemZeroAligned64_sse2 (void* pDst, int32_t iSize); +void WelsSetMemZeroSize64_mmx (void* pDst, int32_t iSize); +void WelsSetMemZeroSize8_mmx (void* pDst, int32_t iSize); +void WelsPrefetchZero_mmx (int8_t const* kpDst); +#elif defined(HAVE_NEON) +void WelsSetMemZero_neon (void* pDst, int32_t iSize); +#elif defined(HAVE_NEON_AARCH64) +void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize); +#endif + +#if defined(__cplusplus) +} +#endif//__cplusplus + +/********************************************************************************** + * Function points type +***********************************************************************************/ +} + +#endif//WELS_CORE_ENCODER_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encoder_context.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encoder_context.h new file mode 100644 index 000000000..90a909e76 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/encoder_context.h @@ -0,0 +1,240 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file encoder_context.h + * + * \brief Main pData to be operated over Wels encoder all modules + * + * \date 2/4/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_ENCODER_CONTEXT_H__ +#define WELS_ENCODER_CONTEXT_H__ + +#include +#include "typedefs.h" +#include "param_svc.h" +#include "nal_encap.h" +#include "picture.h" +#include "paraset_strategy.h" +#include "dq_map.h" +#include "stat.h" +#include "macros.h" +#include "rc.h" +#include "as264_common.h" +#include "wels_preprocess.h" +#include "wels_func_ptr_def.h" +#include "crt_util_safe_x.h" +#include "utils.h" + +#include "mt_defs.h" // for multiple threadin, +#include "WelsThreadLib.h" +#include "wels_task_management.h" + +namespace WelsEnc { + +class IWelsTaskManage; +class IWelsReferenceStrategy; + +/* + * reference list for each quality layer in SVC + */ +typedef struct TagRefList { + SPicture* pShortRefList[1 + MAX_SHORT_REF_COUNT]; // reference list 0 - int16_t + SPicture* pLongRefList[1 + MAX_REF_PIC_COUNT]; // reference list 1 - int32_t + SPicture* pNextBuffer; + SPicture* pRef[1 + MAX_REF_PIC_COUNT]; // plus 1 for swap intend + uint8_t uiShortRefCount; + uint8_t uiLongRefCount; // dependend on pRef pic module +} SRefList; + +typedef struct TagLTRState { +// LTR mark feedback + uint32_t uiLtrMarkState; // LTR mark state, indicate whether there is a LTR mark feedback unsolved + int32_t iLtrMarkFbFrameNum;// the unsolved LTR mark feedback, the marked iFrameNum feedback from decoder + +// LTR used as recovery reference + int32_t iLastRecoverFrameNum; // reserve the last LTR or IDR recover iFrameNum + int32_t iLastCorFrameNumDec; // reserved the last correct position in decoder side, use to select valid LTR to recover or to decide the LTR mark validation + int32_t iCurFrameNumInDec; // current iFrameNum in decoder side, use to select valid LTR to recover or to decide the LTR mark validation + +// LTR mark + int32_t iLTRMarkMode; // direct mark or delay mark + int32_t iLTRMarkSuccessNum; //successful marked num, for mark mode switch + int32_t iCurLtrIdx;// current int32_t term reference index to mark + int32_t iLastLtrIdx[MAX_TEMPORAL_LAYER_NUM]; + int32_t iSceneLtrIdx;// related to Scene LTR, used by screen content + + uint32_t uiLtrMarkInterval;// the interval from the last int32_t term pRef mark + + bool bLTRMarkingFlag; //decide whether current frame marked as LTR + bool bLTRMarkEnable; //when LTR is confirmed and the interval is no smaller than the marking period + bool bReceivedT0LostFlag; // indicate whether a t0 lost feedback is recieved, for LTR recovery +} SLTRState; + +typedef struct TagSpatialPicIndex { + SPicture* pSrc; // I420 based and after color space converted + int32_t iDid; // dependency id +} SSpatialPicIndex; + +typedef struct TagStrideTables { + int32_t* pStrideDecBlockOffset[MAX_DEPENDENCY_LAYER][2]; // [iDid][tid==0][24 x 4]: luma+chroma= 24 x 4 + int32_t* pStrideEncBlockOffset[MAX_DEPENDENCY_LAYER]; // [iDid][24 x 4]: luma+chroma= 24 x 4 + int16_t* pMbIndexX[MAX_DEPENDENCY_LAYER]; // [iDid][iMbX]: map for iMbX in each spatial layer coding + int16_t* pMbIndexY[MAX_DEPENDENCY_LAYER]; // [iDid][iMbY]: map for iMbY in each spatial layer coding +} SStrideTables; + +typedef struct TagWelsEncCtx { + SLogContext sLogCtx; +// Input + SWelsSvcCodingParam* pSvcParam; // SVC parameter, WelsSVCParamConfig in svc_param_settings.h + + int32_t* pSadCostMb; + /* MVD cost tables for Inter MB */ + int32_t iMvRange; + uint16_t* pMvdCostTable; //[52]; // adaptive to spatial layers + int32_t iMvdCostTableSize; //the size of above table + int32_t iMvdCostTableStride; //the stride of above table + SMVUnitXY* + pMvUnitBlock4x4; // (*pMvUnitBlock4x4[2])[MB_BLOCK4x4_NUM]; // for store each 4x4 blocks' mv unit, the two swap after different d layer + int8_t* + pRefIndexBlock4x4; // (*pRefIndexBlock4x4[2])[MB_BLOCK8x8_NUM]; // for store each 4x4 blocks' pRef index, the two swap after different d layer + int8_t* pNonZeroCountBlocks; // (*pNonZeroCountBlocks)[MB_LUMA_CHROMA_BLOCK4x4_NUM]; + int8_t* + pIntra4x4PredModeBlocks; // (*pIntra4x4PredModeBlocks)[INTRA_4x4_MODE_NUM]; //last byte is not used; the first 4 byte is for the bottom 12,13,14,15 4x4 block intra mode, and 3 byte for (3,7,11) + + SMB** ppMbListD; // [MAX_DEPENDENCY_LAYER]; + SStrideTables* pStrideTab; // stride tables for internal coding used + SWelsFuncPtrList* pFuncList; + + SSliceThreading* pSliceThreading; + IWelsTaskManage* pTaskManage; //was planning to put it under CWelsH264SVCEncoder but it may be updated (lock/no lock) when param is changed + IWelsReferenceStrategy* pReferenceStrategy; + + // pointers + SPicture* pEncPic; // pointer to current picture to be encoded + SPicture* pDecPic; // pointer to current picture being reconstructed + SPicture* pRefPic; // pointer to current reference picture + + SDqLayer* pCurDqLayer; // DQ layer context used to being encoded currently, for reference base layer to refer: pCurDqLayer->pRefLayer if applicable + SDqLayer** ppDqLayerList; // overall DQ layers encoded for storage + + SRefList** ppRefPicListExt; // reference picture list for SVC + SPicture* pRefList0[16]; + SLTRState* pLtr;//[MAX_DEPENDENCY_LAYER]; + bool bCurFrameMarkedAsSceneLtr; +// Derived + + EWelsSliceType eSliceType; // currently coding slice type + EWelsNalUnitType eNalType; // NAL type + EWelsNalRefIdc eNalPriority; // NAL_Reference_Idc currently + EWelsNalRefIdc eLastNalPriority[MAX_DEPENDENCY_LAYER]; // NAL_Reference_Idc in last frame + uint8_t iNumRef0; + + uint8_t uiDependencyId; // Idc of dependecy layer to be coded + uint8_t uiTemporalId; // Idc of temporal layer to be coded + bool bNeedPrefixNalFlag; // whether add prefix nal + +// Rate control routine + SWelsSvcRc* pWelsSvcRc; + bool bCheckWindowStatusRefreshFlag; + int64_t iCheckWindowStartTs; + int64_t iCheckWindowCurrentTs; + int32_t iCheckWindowInterval; + int32_t iCheckWindowIntervalShift; + bool bCheckWindowShiftResetFlag; + int32_t iGlobalQp; // global qp + +// VAA + SVAAFrameInfo* pVaa; // VAA information of reference + CWelsPreProcess* pVpp; + + SWelsSPS* pSpsArray; // MAX_SPS_COUNT by standard compatible + SWelsSPS* pSps; + SWelsPPS* pPPSArray; // MAX_PPS_COUNT by standard compatible + SWelsPPS* pPps; + /* SVC only */ + SSubsetSps* pSubsetArray; // MAX_SPS_COUNT by standard compatible + SSubsetSps* pSubsetSps; + int32_t iSpsNum; // number of pSps used + int32_t iSubsetSpsNum; // number of pSps used + int32_t iPpsNum; // number of pPps used + +// Output + SWelsEncoderOutput* pOut; // for NAL raw pData (need allocating memory for sNalList internal) + uint8_t* pFrameBs; // restoring bitstream pBuffer of all NALs in a frame + int32_t iFrameBsSize; // count size of frame bs in bytes allocated + int32_t iPosBsBuffer; // current writing position of frame bs pBuffer + + SSpatialPicIndex sSpatialIndexMap[MAX_DEPENDENCY_LAYER]; + int32_t iSliceBufferSize[MAX_DEPENDENCY_LAYER]; + + bool bRefOfCurTidIsLtr[MAX_DEPENDENCY_LAYER][MAX_TEMPORAL_LEVEL]; + // uint16_t uiIdrPicId; // IDR picture id: [0, 65535], this one is used for LTR + int32_t iMaxSliceCount;// maximal count number of slices for all layers observation + int16_t iActiveThreadsNum; // number of threads active so far + + /* + * DQ layer idc map for svc encoding, might be a better scheme than that of design before, + * can aware idc of referencing layer and that idc of successive layer to be coded + */ + /* SVC only */ + SDqIdc* pDqIdcMap; // overall DQ map of full scalability in specific frame (All full D/T/Q layers involved) // pDqIdcMap[dq_index] for each SDqIdc pData + + SParaSetOffset sPSOVector; + SParaSetOffset* pPSOVector; + CMemoryAlign* pMemAlign; + +#if defined(STAT_OUTPUT) +// overall stat pData, refer to SStatData in stat.h, in case avc to use stat[0][0] + SStatData sStatData [ MAX_DEPENDENCY_LAYER ] [ MAX_QUALITY_LEVEL ]; + SStatSliceInfo sPerInfo; +#endif//STAT_OUTPUT + + //related to Statistics + int64_t uiStartTimestamp; + SEncoderStatistics sEncoderStatistics[MAX_DEPENDENCY_LAYER]; + int32_t iStatisticsLogInterval; + int64_t iLastStatisticsLogTs; + + int32_t iEncoderError; + WELS_MUTEX mutexEncoderError; + bool bDeliveryFlag; + SStateCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT]; +#ifdef ENABLE_FRAME_DUMP + bool bDependencyRecFlag[MAX_DEPENDENCY_LAYER]; +#endif + int64_t uiLastTimestamp; + uint8_t* pDynamicBsBuffer[MAX_THREADS_NUM]; +} sWelsEncCtx/*, *PWelsEncCtx*/; +} +#endif//sWelsEncCtx_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/extern.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/extern.h new file mode 100644 index 000000000..8790a48f8 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/extern.h @@ -0,0 +1,124 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file extern.h + * + * \brief extern interfaces between core and plus of wels encoder + * + * \date 4/21/2009 Created + * + ************************************************************************************* + */ +#if !defined(WELS_ENCODER_EXTERN_H__) +#define WELS_ENCODER_EXTERN_H__ + +#include "typedefs.h" +#include "encoder_context.h" + +namespace WelsEnc { + +/*! + * \brief initialize source picture body + * \param kpSrc SSourcePicture* + * \param kiCsp internal csp format + * \param kiWidth widht of picture in pixels + * \param kiHeight height of picture in pixels + * \return successful - 0; otherwise none 0 for failed + */ +int32_t InitPic (const void* kpSrc, const int32_t kiCsp, const int32_t kiWidth, const int32_t kiHeight); + +/* + * SVC core encoder external interfaces + */ + +/*! + * \brief validate checking in parameter configuration + * \pParam pParam SWelsSvcCodingParam* + * \return successful - 0; otherwise none 0 for failed + */ +int32_t ParamValidationExt (SLogContext* pCtx, SWelsSvcCodingParam* pParam); + +// GOM based RC related for uiSliceNum decision +void GomValidCheck (const int32_t kiMbWidth, const int32_t kiMbHeight, int32_t* pSliceNum); + +/*! + * \brief initialize Wels avc encoder core library + * \param ppCtx sWelsEncCtx** + * \param para SWelsSvcCodingParam* + * \return successful - 0; otherwise none 0 for failed + */ +int32_t WelsInitEncoderExt (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pPara, SLogContext* pLogCtx, + SExistingParasetList* pExistingParasetList); + +/*! + * \brief uninitialize Wels encoder core library + * \param pEncCtx sWelsEncCtx* + * \return none + */ +void WelsUninitEncoderExt (sWelsEncCtx** ppCtx); + +/*! + * \brief core svc encoding process + * + * \param h sWelsEncCtx*, encoder context + * \param pFbi FrameBSInfo* + * \param kpSrcPic Source picture + * \return EFrameType (videoFrameTypeIDR/videoFrameTypeI/videoFrameTypeP) + */ +int32_t WelsEncoderEncodeExt (sWelsEncCtx*, SFrameBSInfo* pFbi, const SSourcePicture* kpSrcPic); + +int32_t WelsEncoderEncodeParameterSets (sWelsEncCtx* pCtx, void* pDst); + +/* + * Force coding IDR as follows + */ +int32_t ForceCodingIDR (sWelsEncCtx* pCtx,int32_t iLayerId); + +/*! + * \brief Wels SVC encoder parameters adjustment + * SVC adjustment results in new requirement in memory blocks adjustment + */ +int32_t WelsBitRateVerification(SLogContext* pLogCtx,SSpatialLayerConfig* pLayerParam,int32_t iLayerId); +int32_t WelsEncoderParamAdjust (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pNew); +void WelsEncoderApplyFrameRate (SWelsSvcCodingParam* pParam); +int32_t WelsEncoderApplyBitRate (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iLayer); +int32_t WelsEncoderApplyBitVaryRang(SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iRang); +int32_t WelsEncoderApplyLTR (SLogContext* pLogCtx, sWelsEncCtx** ppCtx, SLTRConfig* pLTRValue); +int32_t DynSliceRealloc(sWelsEncCtx* pCtx,SFrameBSInfo* pFrameBsInfo,SLayerBSInfo* pLayerBsInfo); +int32_t FilterLTRRecoveryRequest (sWelsEncCtx* pCtx, SLTRRecoverRequest* pLTRRecoverRequest); +void CheckProfileSetting (SLogContext* pLogCtx,SWelsSvcCodingParam* pParam,int32_t iLayer, EProfileIdc uiProfileIdc); +void CheckLevelSetting (SLogContext* pLogCtx,SWelsSvcCodingParam* pParam,int32_t iLayer, ELevelIdc uiLevelIdc); +void CheckReferenceNumSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam,int32_t iNumRef); +void FilterLTRMarkingFeedback (sWelsEncCtx* pCtx, SLTRMarkingFeedback* pLTRMarkingFeedback); +} + +#endif//WELS_ENCODER_CALLBACK_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/get_intra_predictor.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/get_intra_predictor.h new file mode 100644 index 000000000..ae9dc3989 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/get_intra_predictor.h @@ -0,0 +1,175 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file get_intra_predictor.h + * + * \brief interfaces for get intra predictor about 16x16, 4x4, chroma. + * + * \date 4/2/2009 Created + * + ************************************************************************************* + */ + +#ifndef GET_INTRA_PREDICTOR_H +#define GET_INTRA_PREDICTOR_H + +#include "typedefs.h" +#include "wels_func_ptr_def.h" + +namespace WelsEnc { +void WelsI4x4LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsI4x4LumaPredDDL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsI4x4LumaPredVR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHD_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHU_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + + +void WelsIChromaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsI16x16ChromaPredVer (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16ChromaPredHor (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +void WelsFillingPred8to16_mmx (uint8_t* pPred, uint8_t* pValue); +void WelsFillingPred8x2to16_mmx (uint8_t* pPred, uint8_t* pValue); +void WelsFillingPred1to16_mmx (uint8_t* pPred, const uint8_t kuiValue); +void WelsFillingPred8x2to16_sse2 (uint8_t* pPred, uint8_t* pValue); +void WelsFillingPred1to16_sse2 (uint8_t* pPred, const uint8_t kuiValue); + +//for intra-prediction ASM functions +void WelsI16x16LumaPredDc_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredPlane_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsIChromaPredH_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDc_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredPlane_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsI4x4LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDc_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDL_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDR_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVR_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHD_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVL_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHU_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//X86_ASM + +#if defined(HAVE_NEON) +void WelsI16x16LumaPredDc_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsI4x4LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDL_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDR_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVL_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVR_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHU_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHD_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsIChromaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDc_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_NEON + +#if defined(HAVE_NEON_AARCH64) +void WelsI16x16LumaPredDc_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredDcTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredDcLeft_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsI4x4LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDL_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDDLTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVL_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVLTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredVR_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHU_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredHD_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDc_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI4x4LumaPredDcTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsIChromaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDc_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDcTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_NEON_AARCH64 + +#if defined(HAVE_MMI) +void WelsI16x16LumaPredDc_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredPlane_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +void WelsIChromaPredH_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredV_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredDc_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsIChromaPredPlane_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_MMI +#if defined(__cplusplus) +} +#endif//__cplusplus + +void WelsInitIntraPredFuncs (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag); + +} +#endif //GET_INTRA_PREDICTOR_H + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mb_cache.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mb_cache.h new file mode 100644 index 000000000..d7c0494f5 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mb_cache.h @@ -0,0 +1,141 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//mb_cache.h +#ifndef WELS_MACROBLOCK_CACHE_H__ +#define WELS_MACROBLOCK_CACHE_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "macros.h" + +namespace WelsEnc { + +/* + * MB Cache information, such one cache should be defined within a slice + */ +/* + * Cache for Luma Cache for Chroma(Cb, Cr) + * + * TL T T T T TL T T + * L - - - - L - - + * L - - - - L - - TR + * L - - - - + * L - - - - TR + * + */ + +////////////////////////mapping scan index//////////////////////// + +extern const uint8_t g_kuiSmb4AddrIn256[16]; +extern const uint8_t g_kuiCache12_8x8RefIdx[4]; + +typedef struct TagDCTCoeff { +//ALIGNED_DECLARE( int16_t, residual_ac[16], 16 ); //I_16x16 +int16_t iLumaBlock[16][16]; //based on block4x4 luma DC/AC +//ALIGNED_DECLARE( int16_t, iLumaI16x16Dc[16], 16 ); //I_16x16 DC +int16_t iLumaI16x16Dc[16]; +//ALIGNED_DECLARE( int16_t, iChromaDc[2][4], 16 ); //chroma DC +int16_t iChromaBlock[8][16]; //based on block4x4 chroma DC/AC +int16_t iChromaDc[2][4]; +} SDCTCoeff ; + +typedef struct TagMbCache { +//the followed pData now is promised aligned to 16 bytes +ALIGNED_DECLARE (SMVComponentUnit, sMvComponents, 16); + +ALIGNED_DECLARE (int8_t, iNonZeroCoeffCount[48], 16); // Cache line size +// int8_t iNonZeroCoeffCount[6 * 8]; // Right luma, Chroma(Left Top Cb, Left btm Cr); must follow by iIntraPredMode! +ALIGNED_DECLARE (int8_t, iIntraPredMode[48], 16); +// must follow with iNonZeroCoeffCount! + +int32_t iSadCost[4]; //avail 1; unavail 0 +SMVUnitXY sMbMvp[MB_BLOCK4x4_NUM];// for write bs + +//for residual decoding (recovery) at the side of Encoder +int16_t* pCoeffLevel; // tmep +//malloc memory for prediction +uint8_t* pSkipMb; + +//ALIGNED_DECLARE(uint8_t, pMemPredMb[2][256], 16);//One: Best I_16x16 Luma and refine frac_pixel pBuffer; another: PingPong I_8x8&&Inter Cb + Cr +uint8_t* pMemPredMb; +uint8_t* pMemPredLuma;// inter && intra share same pointer; +//ALIGNED_DECLARE(uint8_t, pMemPredChroma[2][64*2], 16); //another PingPong pBuffer: Best Cb + Cr; +uint8_t* pMemPredChroma;// inter && intra share same pointer; +uint8_t* pBestPredIntraChroma; //Cb:0~63; Cr:64~127 + +//ALIGNED_DECLARE(uint8_t, pMemPredBlk4[2][16], 16); //I_4x4 +uint8_t* pMemPredBlk4; + +uint8_t* pBestPredI4x4Blk4;//I_4x4 + +//ALIGNED_DECLARE(uint8_t, pBufferInterPredMe[4][400], 16);//inter type pBuffer for ME h & v & hv +uint8_t* pBufferInterPredMe; // [4][400] is enough because only h&v or v&hv or h&hv. but if both h&v&hv is needed when 8 quart pixel, future we have to use [5][400]. + +//no scan4[] order, just as memory order to store +//ALIGNED_DECLARE(bool, pPrevIntra4x4PredModeFlag[16], 16);//if 1, means no rem_intra4x4_pred_mode; if 0, means rem_intra4x4_pred_mode != 0 +bool* pPrevIntra4x4PredModeFlag; +//ALIGNED_DECLARE(int8_t, pRemIntra4x4PredModeFlag[16], 16);//-1 as default; if pPrevIntra4x4PredModeFlag==0, +//pRemIntra4x4PredModeFlag or added by 1 is the best pred_mode +int8_t* pRemIntra4x4PredModeFlag; + +int32_t iSadCostSkip[4]; //avail 1; unavail 0 +bool bMbTypeSkip[4]; //1: skip; 0: non-skip +int32_t* pEncSad; + +//for residual encoding at the side of Encoder +SDCTCoeff* pDct; + +uint8_t uiNeighborIntra; // LEFT_MB_POS:0x01, TOP_MB_POS:0x02, TOPLEFT_MB_POS = 0x04 ,TOPRIGHT_MB_POS = 0x08; +uint8_t uiLumaI16x16Mode; +uint8_t uiChmaI8x8Mode; + +bool bCollocatedPredFlag;//denote if current MB is collocated predicted (MV==0). +uint32_t uiRefMbType; + +struct { + /* pointer of current mb location in original frame */ + uint8_t* pEncMb[3]; + /* pointer of current mb location in recovery frame */ + uint8_t* pDecMb[3]; + /* pointer of co-located mb location in reference frame */ + uint8_t* pRefMb[3]; + //for SVC + uint8_t* pCsMb[3];//locating current mb's CS in whole frame +// int16_t *p_rs[3];//locating current mb's RS in whole frame + +} SPicData; +} SMbCache; + +}//end of namespace + +#endif//WELS_MACROBLOCK_CACHE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/md.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/md.h new file mode 100644 index 000000000..7e2771069 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/md.h @@ -0,0 +1,181 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file md.h + * + * \brief mode decision + * + * \date 2009.5.14 Created + * + ************************************************************************************* + */ +#ifndef WELS_MACROBLOCK_MODE_DECISION_H__ +#define WELS_MACROBLOCK_MODE_DECISION_H__ + +#include "svc_motion_estimate.h" +#include "svc_enc_macroblock.h" +#include "encode_mb_aux.h" +#include "wels_func_ptr_def.h" + +namespace WelsEnc { +#define ME_REFINE_BUF_STRIDE 32 +#define ME_REFINE_BUF_WIDTH_BLK4 8 +#define ME_REFINE_BUF_WIDTH_BLK8 16 +#define ME_REFINE_BUF_STRIDE_BLK4 160 +#define ME_REFINE_BUF_STRIDE_BLK8 320 + +#define REFINE_ME_NO_BEST_HALF_PIXEL 0 //( 0, 0) +#define REFINE_ME_HALF_PIXEL_LEFT 3 //(-2, 0) +#define REFINE_ME_HALF_PIXEL_RIGHT 4 //( 2, 0) +#define REFINE_ME_HALF_PIXEL_TOP 1 //( 0, -2) +#define REFINE_ME_HALF_PIXEL_BOTTOM 2 //( 0, 2) + +#define ME_NO_BEST_QUAR_PIXEL 1 //( 0, 0) or best half pixel +#define ME_QUAR_PIXEL_LEFT 2 //(-1, 0) +#define ME_QUAR_PIXEL_RIGHT 3 //( 1, 0) +#define ME_QUAR_PIXEL_TOP 4 //( 0, -1) +#define ME_QUAR_PIXEL_BOTTOM 5 //( 0, 1) + +#define NO_BEST_FRAC_PIX 1 // REFINE_ME_NO_BEST_HALF_PIXEL + ME_NO_BEST_QUAR_PIXEL + +//for vaa constants +#define MBVAASIGN_FLAT 15 +#define MBVAASIGN_HOR1 3 +#define MBVAASIGN_HOR2 12 +#define MBVAASIGN_VER1 5 +#define MBVAASIGN_VER2 10 +#define MBVAASIGN_CMPX1 6 +#define MBVAASIGN_CMPX2 9 + +extern const int32_t g_kiQpCostTable[52]; +extern const int8_t g_kiMapModeI16x16[7]; +//extern const int8_t g_kiMapModeI4x4[14]; +extern const int8_t g_kiMapModeIntraChroma[7]; + +///////////////////////////// + +// if we want keep total sizeof(SWelsMD) <= 256, we maybe need to seperate three member of SWelsME. +typedef struct TagWelsMD { +int32_t iLambda; +uint16_t* pMvdCost; + +int32_t iCostLuma; +int32_t iCostChroma;//satd+lambda(best_pred_mode) //i_sad_chroma; +int32_t iSadPredMb; + +uint8_t uiRef; //uiRefIndex appointed by Encoder, used for MC +bool bMdUsingSad; +uint16_t uiReserved; + +int32_t iCostSkipMb; +int32_t iSadPredSkip; + +int32_t iMbPixX; // pixel position of MB in horizontal axis +int32_t iMbPixY; // pixel position of MB in vertical axis +int32_t iBlock8x8StaticIdc[4]; + +//NO B frame in our Wels, we can ignore list1 + +struct { + SWelsME sMe16x16; //adjust each SWelsME for 8 D-word! + SWelsME sMe8x8[4]; + SWelsME sMe16x8[2]; + SWelsME sMe8x16[2]; + SWelsME sMe4x4[4][4]; + SWelsME sMe8x4[4][2]; + SWelsME sMe4x8[4][2]; +// SMVUnitXY i_mvbs[MB_BLOCK8x8_NUM]; //scaled MVB +} sMe; + +} SWelsMD; + +typedef struct TagMeRefinePointer { +uint8_t* pHalfPixH; +uint8_t* pHalfPixV; +uint8_t* pHalfPixHV; + +uint8_t* pQuarPixBest; +uint8_t* pQuarPixTmp; + +PCopyFunc pfCopyBlockByMode; +} SMeRefinePointer; + +void FillNeighborCacheIntra (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth/*, bool constrained_intra_pred_flag*/); +void FillNeighborCacheInterWithoutBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, + int8_t* pVaaBgMbFlag); //BGD spatial func +void FillNeighborCacheInterWithBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag); +void InitFillNeighborCacheInterFunc (SWelsFuncPtrList* pFuncList, const int32_t kiFlag); + +void MvdCostInit (uint16_t* pMvdCostInter, const int32_t kiMvdSz); + +void PredictSad (int8_t* pRefIndexCache, int32_t* pSadCostCache, int32_t uiRef, int32_t* pSadPred); + + +void PredictSadSkip (int8_t* pRefIndexCache, bool* pMbSkipCache, int32_t* pSadCostCache, int32_t uiRef, + int32_t* iSadPredSkip); + +// for pfGetVarianceFromIntraVaa function ptr adaptive by CPU features, 6/7/2010 +void InitIntraAnalysisVaaInfo (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag); +bool MdIntraAnalysisVaaInfo (sWelsEncCtx* pEncCtx, uint8_t* pEncMb); + +uint8_t MdInterAnalysisVaaInfo_c (int32_t* pSad8x8); + + +void InitMeRefinePointer (SMeRefinePointer* pMeRefine, SMbCache* pMbCache, int32_t iStride); +void MeRefineFracPixel (sWelsEncCtx* pEncCtx, uint8_t* pMemPredInterMb, SWelsME* pMe, + SMeRefinePointer* pMeRefine, int32_t iWidth, int32_t iHeight); + +void InitBlkStrideWithRef (int32_t* pBlkStride, const int32_t kiStrideRef); + +void UpdateMbMv_c (SMVUnitXY* pMvBuffer, const SMVUnitXY ksMv); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) + +// for pfGetVarianceFromIntraVaa SIMD optimization, 6/7/2010 +int32_t AnalysisVaaInfoIntra_sse2 (uint8_t* pDataY, const int32_t kiLineSize); +int32_t AnalysisVaaInfoIntra_ssse3 (uint8_t* pDataY, const int32_t kiLineSize); +uint8_t MdInterAnalysisVaaInfo_sse2 (int32_t* pSad8x8); +uint8_t MdInterAnalysisVaaInfo_sse41 (int32_t* pSad8x8); +void UpdateMbMv_sse2 (SMVUnitXY* pMvBuffer, const SMVUnitXY ksMv); + +#endif//X86_ASM + +#if defined(__cplusplus) +} +#endif//__cplusplus + +} +#endif//WELS_MACROBLOCK_MODE_DECISION_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mt_defs.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mt_defs.h new file mode 100644 index 000000000..a6ed27e51 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mt_defs.h @@ -0,0 +1,91 @@ +/*! + * \copy + * Copyright (c) 2010-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file mt_defs.h + * + * \brief Main macros for multiple threading implementation + * + * \date 2/26/2010 Created + * + ************************************************************************************* + */ +#if !defined(MULTIPLE_THREADING_DEFINES_H__) +#define MULTIPLE_THREADING_DEFINES_H__ + +#include "typedefs.h" +#include "codec_app_def.h" +#include "wels_const.h" +#include "WelsThreadLib.h" +#include "slice.h" + +using namespace WelsEnc; +/* + * MT_DEBUG: output trace MT related into log file + */ +//#define MT_DEBUG +//#define ENABLE_TRACE_MT + +#define THRESHOLD_RMSE_CORE8 0.0320f // v1.1: 0.0320f; v1.0: 0.02f +#define THRESHOLD_RMSE_CORE4 0.0215f // v1.1: 0.0215f; v1.0: 0.03f +#define THRESHOLD_RMSE_CORE2 0.0200f // v1.1: 0.0200f; v1.0: 0.04f + +typedef struct TagSliceThreadPrivateData { +void* pWelsPEncCtx; +SFrameBSInfo* pFrameBsInfo; +int32_t iSliceIndex; // slice index, zero based +int32_t iThreadIndex; // thread index, zero based + +} SSliceThreadPrivateData; + +typedef struct TagSliceThreading { +SSliceThreadPrivateData* pThreadPEncCtx;// thread context, [iThreadIdx] +char eventNamespace[100]; +WELS_THREAD_HANDLE pThreadHandles[MAX_THREADS_NUM];// thread handles, [iThreadIdx] +WELS_EVENT pSliceCodedEvent[MAX_THREADS_NUM];// events for slice coded state, [iThreadIdx] +WELS_EVENT pSliceCodedMasterEvent; // events for signalling that some event in pSliceCodedEvent has been signalled +WELS_EVENT pReadySliceCodingEvent[MAX_THREADS_NUM]; // events for slice coding ready, [iThreadIdx] +WELS_EVENT pUpdateMbListEvent[MAX_THREADS_NUM]; // signal to update mb list neighbor for various slices +WELS_EVENT pFinUpdateMbListEvent[MAX_THREADS_NUM]; // signal to indicate finish updating mb list + +WELS_MUTEX mutexSliceNumUpdate; // for dynamic slicing mode MT + +#ifdef MT_DEBUG +FILE* pFSliceDiff; // file handle for debug +#endif//MT_DEBUG + +uint8_t* pThreadBsBuffer[MAX_THREADS_NUM]; //actual memory for slice buffer +bool bThreadBsBufferUsage[MAX_THREADS_NUM]; +WELS_MUTEX mutexThreadBsBufferUsage; +WELS_MUTEX mutexEvent; +WELS_MUTEX mutexThreadSlcBuffReallocate; +} SSliceThreading; + +#endif//MULTIPLE_THREADING_DEFINES_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mv_pred.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mv_pred.h new file mode 100644 index 000000000..e0bc18a30 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/mv_pred.h @@ -0,0 +1,191 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file mv_pred.h + * + * \brief Get MV predictor and update motion vector of mb cache + * + * \date 05/22/2009 Created + * + ************************************************************************************* + */ + +#ifndef WELS_MV_PRED_H__ +#define WELS_MV_PRED_H__ + + +#include "svc_enc_macroblock.h" +#include "mb_cache.h" + +namespace WelsEnc { +/*! + * \brief update pMv and uiRefIndex cache for current MB, only for P_16x16 (SKIP inclusive) + * \param + * \param + */ + +/*! + * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_16x16 (SKIP inclusive) + * \param + * \param + */ +void UpdateP16x16MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int8_t kiRef, SMVUnitXY* pMv); //for encoder + +/*! + * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_16x8 + * \param + * \param + */ +void UpdateP16x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv); + +/*! + * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_8x16 + * \param + * \param + */ +void update_P8x16_motion_info (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv); + +/*! + * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_8x8 + * \param + * \param + */ +void UpdateP8x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv); + +/*! + * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_4x4 + * \param + * \param + */ +void UpdateP4x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv); + +/*! + * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_8x4 + * \param + * \param + */ +void UpdateP8x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv); + +/*! + * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_4x8 + * \param + * \param + */ +void UpdateP4x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv); + +/*! + * \brief get the motion predictor for 4*4 or 8*8 or 16*16 block + * \param + * \param output mvp_x and mvp_y + */ +void PredMv (const SMVComponentUnit* kpMvComp, int8_t iPartIdx, int8_t iPartW, int32_t iRef, SMVUnitXY* sMvp); + + +/*! + * \brief get the motion predictor for SKIP MB + * \param + * \param output mvp_x and mvp_y + */ +void PredSkipMv (SMbCache* pMbCache, SMVUnitXY* sMvp); + + +/*! + * \brief get the motion predictor for inter16x8 MB + * \param + * \param output mvp_x and mvp_y + */ +void PredInter16x8Mv (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* sMvp); + + +/*! + * \brief get the motion predictor for inter8x16 MB + * \param + * \param output mvp_x and mvp_y + */ +void PredInter8x16Mv (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* sMvp); + +//=========================update motion info(MV and ref_idx) into Mb_cache========================== +/*! + * \brief only update pMv cache for current MB, only for P_16x16 + * \param + * \param + */ +//void update_p16x16_motion2cache(SMbCache* pMbCache, int8_t pRef, SMVUnitXY* pMv); + +/*! + * \brief only update pMv cache for current MB, only for P_16x8 + * \param + * \param + */ +void UpdateP16x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv); + +/*! + * \brief only update pMv cache for current MB, only for P_8x16 + * \param + * \param + */ +void UpdateP8x16Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv); + +/*! + * \brief only update pMv cache for current MB, only for P_8x8 + * \param + * \param + */ +void UpdateP8x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv); + +/*! + * \brief only update pMv cache for current MB, only for P_4x4 + * \param + * \param + */ +void UpdateP4x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv); + +/*! + * \brief only update pMv cache for current MB, only for P_8x4 + * \param + * \param + */ +void UpdateP8x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv); + +/*! + * \brief only update pMv cache for current MB, only for P_4x8 + * \param + * \param + */ +void UpdateP4x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv); +} +#endif//WELS_MV_PRED_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/nal_encap.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/nal_encap.h new file mode 100644 index 000000000..70a9cad23 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/nal_encap.h @@ -0,0 +1,143 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file nal_encap.h + * + * \brief NAL pRawNal pData encapsulation + * + * \date 2/4/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_NAL_UNIT_ENCAPSULATION_H__ +#define WELS_NAL_UNIT_ENCAPSULATION_H__ + +#include "typedefs.h" +#include "wels_common_defs.h" +#include "wels_const.h" + +using namespace WelsCommon; + +//SBitStringAux +namespace WelsEnc { + +#define NAL_HEADER_SIZE (4) +/* + * Raw payload pData for NAL unit, AVC/SVC compatible + */ +typedef struct TagWelsNalRaw { +uint8_t* pRawData; // pRawNal payload for slice pData +int32_t iPayloadSize; // size of pRawNal pData + +SNalUnitHeaderExt sNalExt; // NAL header information + +int32_t iStartPos; //NAL start position in buffer +} SWelsNalRaw; + +/* + * Encoder majoy output pData + */ +typedef struct TagWelsEncoderOutput { +uint8_t* pBsBuffer; // overall bitstream pBuffer allocation for a coded picture, recycling use intend. +uint32_t uiSize; // size of allocation pBuffer above + +SBitStringAux sBsWrite; + +// SWelsNalRaw raw_nals[MAX_DEPENDENCY_LAYER*2+MAX_DEPENDENCY_LAYER*MAX_QUALITY_LEVEL]; // AVC: max up to SPS+PPS+max_slice_idc (2 + 8) for FMO; +SWelsNalRaw* sNalList; // nal list, adaptive for AVC/SVC in case single slice, multiple slices or fmo +int32_t* pNalLen; +int32_t iCountNals; // count number of NAL in list +// SVC: num_sps (MAX_D) + num_pps (MAX_D) + num_vcl (MAX_D * MAX_Q) +int32_t iNalIndex; // coding NAL currently, 0 based +int32_t iLayerBsIndex; // layer index of bit stream for SFrameBsIfo +// bool bAnnexBFlag; // annexeb flag, to figure it pOut the packetization mode whether need 4 bytes (0 0 0 1) of start code prefix +} SWelsEncoderOutput; + +//#define MT_DEBUG_BS_WR 0 // for MT debugging if needed + +typedef struct TagWelsSliceBs { +uint8_t* pBs; // output bitstream, pBitStringAux not needed for slice 0 due to no dependency of pFrameBs available +uint32_t uiBsPos; // position of output bitstream +uint8_t* pBsBuffer; // overall bitstream pBuffer allocation for a coded slice, recycling use intend. +uint32_t uiSize; // size of allocation pBuffer above + +SBitStringAux sBsWrite; + +SWelsNalRaw sNalList[2]; // nal list, PREFIX NAL(if applicable) + SLICE NAL +// int32_t iCountNals; // count number of NAL in list +int32_t iNalLen[2]; +int32_t iNalIndex; // coding NAL currently, 0 based + +// bool bAnnexBFlag; // annexeb flag, to figure it pOut the packetization mode whether need 4 bytes (0 0 0 1) of start code prefix +#if MT_DEBUG_BS_WR +bool bSliceCodedFlag; +#endif//MT_DEBUG_BS_WR +} SWelsSliceBs; + +/*! + * \brief load an initialize NAL pRawNal pData + */ +void WelsLoadNal (SWelsEncoderOutput* pEncoderOuput, const int32_t/*EWelsNalUnitType*/ kiType, + const int32_t/*EWelsNalRefIdc*/ kiNalRefIdc); + +/*! + * \brief unload pRawNal NAL + */ +void WelsUnloadNal (SWelsEncoderOutput* pEncoderOuput); + +/*! + * \brief load an initialize NAL pRawNal pData + */ +void WelsLoadNalForSlice (SWelsSliceBs* pSliceBs, const int32_t/*EWelsNalUnitType*/ kiType, + const int32_t/*EWelsNalRefIdc*/ kiNalRefIdc); + +/*! + * \brief unload pRawNal NAL + */ +void WelsUnloadNalForSlice (SWelsSliceBs* pSliceBs); + +/*! + * \brief encode NAL with emulation forbidden three bytes checking + * \param pDst pDst NAL pData + * \param pDstLen length of pDst NAL output + * \param annexeb annexeb flag + * \param pRawNal pRawNal NAL pData + * \return ERR_CODE + */ +int32_t WelsEncodeNal (SWelsNalRaw* pRawNal, void* pNalHeaderExt, const int32_t kiDstBufferLen, void* pDst, + int32_t* pDstLen); + +/*! + * \brief write prefix nal + */ +int32_t WelsWriteSVCPrefixNal (SBitStringAux* pBitStringAux, const int32_t keNalRefIdc, const bool kbIdrFlag); +} +#endif//WELS_NAL_UNIT_ENCAPSULATION_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/param_svc.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/param_svc.h new file mode 100644 index 000000000..780b4df41 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/param_svc.h @@ -0,0 +1,569 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file param_svc.h + * + * \brief Configurable parameters in H.264/SVC Encoder + * + * \date 4/20/2009 Created + * + ************************************************************************************* + */ +#if !defined(WELS_ENCODER_PARAMETER_SVC_H__) +#define WELS_ENCODER_PARAMETER_SVC_H__ + +#include +#include +#include "typedefs.h" +#include "codec_def.h" +#include "macros.h" +#include "wels_const.h" +#include "rc.h" +#include "svc_enc_slice_segment.h" +#include "as264_common.h" + +namespace WelsEnc { + +#define INVALID_TEMPORAL_ID ((uint8_t)0xff) + +extern const uint8_t g_kuiTemporalIdListTable[MAX_TEMPORAL_LEVEL][MAX_GOP_SIZE + 1]; + +/*! +* \brief get Logarithms base 2 of (upper/base) +* \param base based scaler +* \param upper input upper value +* \return 2 based scaling factor +*/ +static inline uint32_t GetLogFactor (float base, float upper) { +#if defined(_M_X64) && _MSC_VER == 1800 + _set_FMA3_enable(0); +#endif + const double dLog2factor = log10 (1.0 * upper / base) / log10 (2.0); + const double dEpsilon = 0.0001; + const double dRound = floor (dLog2factor + 0.5); + + if (dLog2factor < dRound + dEpsilon && dRound < dLog2factor + dEpsilon) { + return (uint32_t) (dRound); + } + return UINT_MAX; +} + +/* + * Dependency Layer Parameter + */ +typedef struct TagDLayerParam { + int32_t iActualWidth; // input source picture actual width + int32_t iActualHeight; // input source picture actual height + int32_t iTemporalResolution; + int32_t iDecompositionStages; + uint8_t uiCodingIdx2TemporalId[ (1 << MAX_TEMPORAL_LEVEL) + 1]; + + int8_t iHighestTemporalId; + float fInputFrameRate; // input frame rate + float fOutputFrameRate; // output frame rate + uint16_t uiIdrPicId; // IDR picture id: [0, 65535], this one is used for LTR + int32_t iCodingIndex; + int32_t iFrameIndex; // count how many frames elapsed during coding context currently + bool bEncCurFrmAsIdrFlag; + int32_t iFrameNum; // current frame number coding + int32_t iPOC; // frame iPOC +#ifdef ENABLE_FRAME_DUMP + char sRecFileName[MAX_FNAME_LEN]; // file to be constructed +#endif//ENABLE_FRAME_DUMP +} SSpatialLayerInternal; + +/* + * Cisco OpenH264 Encoder Parameter Configuration + */ +typedef struct TagWelsSvcCodingParam: SEncParamExt { + SSpatialLayerInternal sDependencyLayers[MAX_DEPENDENCY_LAYER]; + + /* General */ + uint32_t uiGopSize; // GOP size (at maximal frame rate: 16) + struct { + int32_t iLeft; + int32_t iTop; + int32_t iWidth; + int32_t iHeight; + } SUsedPicRect; // the rect in input picture that encoder actually used + + char* pCurPath; // record current lib path such as:/pData/pData/com.wels.enc/lib/ + + bool bDeblockingParallelFlag; // deblocking filter parallelization control flag + int32_t iBitsVaryPercentage; + + int8_t iDecompStages; // GOP size dependency + int32_t iMaxNumRefFrame; + + public: + TagWelsSvcCodingParam() { + FillDefault(); + } + ~TagWelsSvcCodingParam() {} + + static void FillDefault (SEncParamExt& param) { + memset (¶m, 0, sizeof (param)); + param.uiIntraPeriod = 0; // intra period (multiple of GOP size as desired) + param.iNumRefFrame = AUTO_REF_PIC_COUNT;// number of reference frame used + + param.iPicWidth = 0; // actual input picture width + param.iPicHeight = 0; // actual input picture height + + param.fMaxFrameRate = MAX_FRAME_RATE; // maximal frame rate [Hz / fps] + + param.iComplexityMode = LOW_COMPLEXITY; + param.iTargetBitrate = UNSPECIFIED_BIT_RATE; // overall target bitrate introduced in RC module + param.iMaxBitrate = UNSPECIFIED_BIT_RATE; + param.iMultipleThreadIdc = 1; + param.bUseLoadBalancing = true; + + param.iLTRRefNum = 0; + param.iLtrMarkPeriod = 30; //the min distance of two int32_t references + + param.bEnableSSEI = false; + param.bSimulcastAVC = false; + param.bEnableFrameCroppingFlag = true; // enable frame cropping flag: true alwayse in application + // false: Streaming Video Sharing; true: Video Conferencing Meeting; + + /* Deblocking loop filter */ + param.iLoopFilterDisableIdc = 0; // 0: on, 1: off, 2: on except for slice boundaries + param.iLoopFilterAlphaC0Offset = 0; // AlphaOffset: valid range [-6, 6], default 0 + param.iLoopFilterBetaOffset = 0; // BetaOffset: valid range [-6, 6], default 0 + + /* Rate Control */ + param.iRCMode = RC_QUALITY_MODE; + param.iPaddingFlag = 0; + param.iEntropyCodingModeFlag = 0; + param.bEnableDenoise = false; // denoise control + param.bEnableSceneChangeDetect = true; // scene change detection control + param.bEnableBackgroundDetection = true; // background detection control + param.bEnableAdaptiveQuant = true; // adaptive quantization control + param.bEnableFrameSkip = true; // frame skipping + param.bEnableLongTermReference = false; // long term reference control + param.eSpsPpsIdStrategy = INCREASING_ID;// pSps pPps id addition control + param.bPrefixNalAddingCtrl = false; // prefix NAL adding control + param.iSpatialLayerNum = 1; // number of dependency(Spatial/CGS) layers used to be encoded + param.iTemporalLayerNum = 1; // number of temporal layer specified + + param.iMaxQp = QP_MAX_VALUE; + param.iMinQp = QP_MIN_VALUE; + param.iUsageType = CAMERA_VIDEO_REAL_TIME; + param.uiMaxNalSize = 0; + param.bIsLosslessLink = false; + for (int32_t iLayer = 0; iLayer < MAX_SPATIAL_LAYER_NUM; iLayer++) { + param.sSpatialLayers[iLayer].uiProfileIdc = PRO_UNKNOWN; + param.sSpatialLayers[iLayer].uiLevelIdc = LEVEL_UNKNOWN; + param.sSpatialLayers[iLayer].iDLayerQp = SVC_QUALITY_BASE_QP; + param.sSpatialLayers[iLayer].fFrameRate = param.fMaxFrameRate; + + param.sSpatialLayers[iLayer].iMaxSpatialBitrate = UNSPECIFIED_BIT_RATE; + + param.sSpatialLayers[iLayer].sSliceArgument.uiSliceMode = SM_SINGLE_SLICE; + param.sSpatialLayers[iLayer].sSliceArgument.uiSliceNum = 0; //AUTO, using number of CPU cores + param.sSpatialLayers[iLayer].sSliceArgument.uiSliceSizeConstraint = 1500; + + param.sSpatialLayers[iLayer].bAspectRatioPresent = false; // do not write any of the following information to the header + param.sSpatialLayers[iLayer].eAspectRatio = ASP_UNSPECIFIED; + param.sSpatialLayers[iLayer].sAspectRatioExtWidth = 0; + param.sSpatialLayers[iLayer].sAspectRatioExtHeight = 0; + + const int32_t kiLesserSliceNum = ((MAX_SLICES_NUM < MAX_SLICES_NUM_TMP) ? MAX_SLICES_NUM : MAX_SLICES_NUM_TMP); + for (int32_t idx = 0; idx < kiLesserSliceNum; idx++) + param.sSpatialLayers[iLayer].sSliceArgument.uiSliceMbNum[idx] = 0; //default, using one row a slice if uiSliceMode is SM_RASTER_MODE + + // See codec_app_def.h for more info about members bVideoSignalTypePresent through uiColorMatrix. The default values + // used below preserve the previous behavior; i.e., no additional information will be written to the output file. + param.sSpatialLayers[iLayer].bVideoSignalTypePresent = false; // do not write any of the following information to the header + param.sSpatialLayers[iLayer].uiVideoFormat = VF_UNDEF; // undefined + param.sSpatialLayers[iLayer].bFullRange = false; // analog video data range [16, 235] + param.sSpatialLayers[iLayer].bColorDescriptionPresent = false; // do not write any of the following three items to the header + param.sSpatialLayers[iLayer].uiColorPrimaries = CP_UNDEF; // undefined + param.sSpatialLayers[iLayer].uiTransferCharacteristics = TRC_UNDEF; // undefined + param.sSpatialLayers[iLayer].uiColorMatrix = CM_UNDEF; // undefined + } + } + + void FillDefault() { + FillDefault (*this); + uiGopSize = 1; // GOP size (at maximal frame rate: 16) + iMaxNumRefFrame = AUTO_REF_PIC_COUNT; + SUsedPicRect.iLeft = + SUsedPicRect.iTop = + SUsedPicRect.iWidth = + SUsedPicRect.iHeight = 0; // the rect in input picture that encoder actually used + + pCurPath = NULL; // record current lib path such as:/pData/pData/com.wels.enc/lib/ + + bDeblockingParallelFlag = false;// deblocking filter parallelization control flag + + iDecompStages = 0; // GOP size dependency, unknown here and be revised later + iBitsVaryPercentage = 10; + } + + int32_t ParamBaseTranscode (const SEncParamBase& pCodingParam) { + + fMaxFrameRate = WELS_CLIP3 (pCodingParam.fMaxFrameRate, MIN_FRAME_RATE, MAX_FRAME_RATE); + iTargetBitrate = pCodingParam.iTargetBitrate; + iUsageType = pCodingParam.iUsageType; + iPicWidth = pCodingParam.iPicWidth; + iPicHeight = pCodingParam.iPicHeight; + + SUsedPicRect.iLeft = 0; + SUsedPicRect.iTop = 0; + SUsedPicRect.iWidth = ((iPicWidth >> 1) * (1 << 1)); + SUsedPicRect.iHeight = ((iPicHeight >> 1) * (1 << 1)); + + iRCMode = pCodingParam.iRCMode; // rc mode + + int8_t iIdxSpatial = 0; + EProfileIdc uiProfileIdc = PRO_UNKNOWN; + if (iEntropyCodingModeFlag) + uiProfileIdc = PRO_MAIN; + SSpatialLayerInternal* pDlp = &sDependencyLayers[0]; + + while (iIdxSpatial < iSpatialLayerNum) { + + sSpatialLayers->uiProfileIdc = uiProfileIdc; + sSpatialLayers->uiLevelIdc = LEVEL_UNKNOWN; + sSpatialLayers[iIdxSpatial].fFrameRate = WELS_CLIP3 (pCodingParam.fMaxFrameRate, + MIN_FRAME_RATE, MAX_FRAME_RATE); + pDlp->fInputFrameRate = + pDlp->fOutputFrameRate = WELS_CLIP3 (sSpatialLayers[iIdxSpatial].fFrameRate, MIN_FRAME_RATE, + MAX_FRAME_RATE); +#ifdef ENABLE_FRAME_DUMP + pDlp->sRecFileName[0] = '\0'; // file to be constructed +#endif//ENABLE_FRAME_DUMP + pDlp->iActualWidth = sSpatialLayers[iIdxSpatial].iVideoWidth = iPicWidth; + pDlp->iActualHeight = sSpatialLayers[iIdxSpatial].iVideoHeight = iPicHeight; + + sSpatialLayers->iSpatialBitrate = + sSpatialLayers[iIdxSpatial].iSpatialBitrate = pCodingParam.iTargetBitrate; // target bitrate for current spatial layer + + sSpatialLayers->iMaxSpatialBitrate = UNSPECIFIED_BIT_RATE; + sSpatialLayers->iDLayerQp = SVC_QUALITY_BASE_QP; + + uiProfileIdc = (!bSimulcastAVC) ? PRO_SCALABLE_BASELINE : uiProfileIdc; + ++ pDlp; + ++ iIdxSpatial; + } + SetActualPicResolution(); + + return 0; + } + void GetBaseParams (SEncParamBase* pCodingParam) { + pCodingParam->iUsageType = iUsageType; + pCodingParam->iPicWidth = iPicWidth; + pCodingParam->iPicHeight = iPicHeight; + pCodingParam->iTargetBitrate = iTargetBitrate; + pCodingParam->iRCMode = iRCMode; + pCodingParam->fMaxFrameRate = fMaxFrameRate; + } + int32_t ParamTranscode (const SEncParamExt& pCodingParam) { + float fParamMaxFrameRate = WELS_CLIP3 (pCodingParam.fMaxFrameRate, MIN_FRAME_RATE, MAX_FRAME_RATE); + + iUsageType = pCodingParam.iUsageType; + iPicWidth = pCodingParam.iPicWidth; + iPicHeight = pCodingParam.iPicHeight; + fMaxFrameRate = fParamMaxFrameRate; + iComplexityMode = pCodingParam.iComplexityMode; + + SUsedPicRect.iLeft = 0; + SUsedPicRect.iTop = 0; + SUsedPicRect.iWidth = ((iPicWidth >> 1) << 1); + SUsedPicRect.iHeight = ((iPicHeight >> 1) << 1); + + iMultipleThreadIdc = pCodingParam.iMultipleThreadIdc; + bUseLoadBalancing = pCodingParam.bUseLoadBalancing; + + /* Deblocking loop filter */ + iLoopFilterDisableIdc = pCodingParam.iLoopFilterDisableIdc; // 0: on, 1: off, 2: on except for slice boundaries, + iLoopFilterAlphaC0Offset = pCodingParam.iLoopFilterAlphaC0Offset; // AlphaOffset: valid range [-6, 6], default 0 + iLoopFilterBetaOffset = pCodingParam.iLoopFilterBetaOffset; // BetaOffset: valid range [-6, 6], default 0 + iEntropyCodingModeFlag = pCodingParam.iEntropyCodingModeFlag; + bEnableFrameCroppingFlag = pCodingParam.bEnableFrameCroppingFlag; + + /* Rate Control */ + iRCMode = pCodingParam.iRCMode; // rc mode + bSimulcastAVC = pCodingParam.bSimulcastAVC; + iPaddingFlag = pCodingParam.iPaddingFlag; + + iTargetBitrate = pCodingParam.iTargetBitrate; // target bitrate + iMaxBitrate = pCodingParam.iMaxBitrate; + if ((iMaxBitrate != UNSPECIFIED_BIT_RATE) && (iMaxBitrate < iTargetBitrate)) { + iMaxBitrate = iTargetBitrate; + } + iMaxQp = pCodingParam.iMaxQp; + iMinQp = pCodingParam.iMinQp; + uiMaxNalSize = pCodingParam.uiMaxNalSize; + /* Denoise Control */ + bEnableDenoise = pCodingParam.bEnableDenoise ? true : false; // Denoise Control // only support 0 or 1 now + + /* Scene change detection control */ + bEnableSceneChangeDetect = pCodingParam.bEnableSceneChangeDetect; + + /* Background detection Control */ + bEnableBackgroundDetection = pCodingParam.bEnableBackgroundDetection ? true : false; + + /* Adaptive quantization control */ + bEnableAdaptiveQuant = pCodingParam.bEnableAdaptiveQuant ? true : false; + + /* Frame skipping */ + bEnableFrameSkip = pCodingParam.bEnableFrameSkip ? true : false; + + /* Enable int32_t term reference */ + bEnableLongTermReference = pCodingParam.bEnableLongTermReference ? true : false; + iLtrMarkPeriod = pCodingParam.iLtrMarkPeriod; + bIsLosslessLink = pCodingParam.bIsLosslessLink; + if (iUsageType == SCREEN_CONTENT_REAL_TIME && !bIsLosslessLink && bEnableLongTermReference) { + bEnableLongTermReference = false; + } + + /* For ssei information */ + bEnableSSEI = pCodingParam.bEnableSSEI; + bSimulcastAVC = pCodingParam.bSimulcastAVC; + + /* Layer definition */ + iSpatialLayerNum = (int8_t)WELS_CLIP3 (pCodingParam.iSpatialLayerNum, 1, + MAX_DEPENDENCY_LAYER); // number of dependency(Spatial/CGS) layers used to be encoded + iTemporalLayerNum = (int8_t)WELS_CLIP3 (pCodingParam.iTemporalLayerNum, 1, + MAX_TEMPORAL_LEVEL); // number of temporal layer specified + + uiGopSize = 1 << (iTemporalLayerNum - 1); // Override GOP size based temporal layer + iDecompStages = iTemporalLayerNum - 1; // WELS_LOG2( uiGopSize );// GOP size dependency + uiIntraPeriod = pCodingParam.uiIntraPeriod;// intra period (multiple of GOP size as desired) + if (uiIntraPeriod == (uint32_t) (-1)) + uiIntraPeriod = 0; + else if (uiIntraPeriod & (uiGopSize - 1)) // none multiple of GOP size + uiIntraPeriod = ((uiIntraPeriod + uiGopSize - 1) / uiGopSize) * uiGopSize; + + if (((pCodingParam.iNumRefFrame != AUTO_REF_PIC_COUNT) + && !((pCodingParam.iNumRefFrame > MAX_REF_PIC_COUNT) || (pCodingParam.iNumRefFrame < MIN_REF_PIC_COUNT))) + || ((iNumRefFrame != AUTO_REF_PIC_COUNT) && (pCodingParam.iNumRefFrame == AUTO_REF_PIC_COUNT))) { + iNumRefFrame = pCodingParam.iNumRefFrame; + } + if ((iNumRefFrame != AUTO_REF_PIC_COUNT) && (iNumRefFrame > iMaxNumRefFrame)) { + iMaxNumRefFrame = iNumRefFrame; + } + iLTRRefNum = (pCodingParam.bEnableLongTermReference ? pCodingParam.iLTRRefNum : 0); + iLtrMarkPeriod = pCodingParam.iLtrMarkPeriod; + + bPrefixNalAddingCtrl = pCodingParam.bPrefixNalAddingCtrl; + + if ( (CONSTANT_ID == pCodingParam.eSpsPpsIdStrategy) + || (INCREASING_ID == pCodingParam.eSpsPpsIdStrategy) + || (SPS_LISTING == pCodingParam.eSpsPpsIdStrategy) + || (SPS_LISTING_AND_PPS_INCREASING == pCodingParam.eSpsPpsIdStrategy) + || (SPS_PPS_LISTING == pCodingParam.eSpsPpsIdStrategy)) { + eSpsPpsIdStrategy = + pCodingParam.eSpsPpsIdStrategy;//For SVC meeting application, to avoid mosaic issue caused by cross-IDR reference. + //SHOULD enable this feature. + } else { + // keep the default value + } + + SSpatialLayerInternal* pDlp = &sDependencyLayers[0]; + SSpatialLayerConfig* pSpatialLayer = &sSpatialLayers[0]; + EProfileIdc uiProfileIdc = iEntropyCodingModeFlag ? PRO_HIGH : PRO_BASELINE; + int8_t iIdxSpatial = 0; + while (iIdxSpatial < iSpatialLayerNum) { + pSpatialLayer->uiProfileIdc = (pCodingParam.sSpatialLayers[iIdxSpatial].uiProfileIdc == PRO_UNKNOWN) ? uiProfileIdc : + pCodingParam.sSpatialLayers[iIdxSpatial].uiProfileIdc; + pSpatialLayer->uiLevelIdc = pCodingParam.sSpatialLayers[iIdxSpatial].uiLevelIdc; + + float fLayerFrameRate = WELS_CLIP3 (pCodingParam.sSpatialLayers[iIdxSpatial].fFrameRate, + MIN_FRAME_RATE, fParamMaxFrameRate); + pDlp->fInputFrameRate = fParamMaxFrameRate; + pSpatialLayer->fFrameRate = + pDlp->fOutputFrameRate = WELS_CLIP3 (fLayerFrameRate, MIN_FRAME_RATE, fParamMaxFrameRate); + +#ifdef ENABLE_FRAME_DUMP + pDlp->sRecFileName[0] = '\0'; // file to be constructed +#endif//ENABLE_FRAME_DUMP + pSpatialLayer->iVideoWidth = WELS_CLIP3 (pCodingParam.sSpatialLayers[iIdxSpatial].iVideoWidth, 0, + iPicWidth); // frame width + pSpatialLayer->iVideoHeight = WELS_CLIP3 (pCodingParam.sSpatialLayers[iIdxSpatial].iVideoHeight, 0, + iPicHeight);// frame height + + pSpatialLayer->iSpatialBitrate = + pCodingParam.sSpatialLayers[iIdxSpatial].iSpatialBitrate; // target bitrate for current spatial layer + pSpatialLayer->iMaxSpatialBitrate = + pCodingParam.sSpatialLayers[iIdxSpatial].iMaxSpatialBitrate; + + if ((iSpatialLayerNum==1) && (iIdxSpatial==0)) { + if (pSpatialLayer->iVideoWidth == 0) { + pSpatialLayer->iVideoWidth = iPicWidth; + } + if (pSpatialLayer->iVideoHeight == 0) { + pSpatialLayer->iVideoHeight = iPicHeight; + } + if (pSpatialLayer->iSpatialBitrate == 0) { + pSpatialLayer->iSpatialBitrate = iTargetBitrate; + } + if (pSpatialLayer->iMaxSpatialBitrate == 0) { + pSpatialLayer->iMaxSpatialBitrate = iMaxBitrate; + } + } + + //multi slice + pSpatialLayer->sSliceArgument = pCodingParam.sSpatialLayers[iIdxSpatial].sSliceArgument; + + memcpy (&(pSpatialLayer->sSliceArgument), + &(pCodingParam.sSpatialLayers[iIdxSpatial].sSliceArgument), // confirmed_safe_unsafe_usage + sizeof (SSliceArgument)) ; + + pSpatialLayer->iDLayerQp = pCodingParam.sSpatialLayers[iIdxSpatial].iDLayerQp; + + // See codec_app_def.h and parameter_sets.h for more info about members bVideoSignalTypePresent through uiColorMatrix. + pSpatialLayer->bVideoSignalTypePresent = pCodingParam.sSpatialLayers[iIdxSpatial].bVideoSignalTypePresent; + pSpatialLayer->uiVideoFormat = pCodingParam.sSpatialLayers[iIdxSpatial].uiVideoFormat; + pSpatialLayer->bFullRange = pCodingParam.sSpatialLayers[iIdxSpatial].bFullRange; + pSpatialLayer->bColorDescriptionPresent = pCodingParam.sSpatialLayers[iIdxSpatial].bColorDescriptionPresent; + pSpatialLayer->uiColorPrimaries = pCodingParam.sSpatialLayers[iIdxSpatial].uiColorPrimaries; + pSpatialLayer->uiTransferCharacteristics = pCodingParam.sSpatialLayers[iIdxSpatial].uiTransferCharacteristics; + pSpatialLayer->uiColorMatrix = pCodingParam.sSpatialLayers[iIdxSpatial].uiColorMatrix; + + pSpatialLayer->bAspectRatioPresent = pCodingParam.sSpatialLayers[iIdxSpatial].bAspectRatioPresent; + pSpatialLayer->eAspectRatio = pCodingParam.sSpatialLayers[iIdxSpatial].eAspectRatio; + pSpatialLayer->sAspectRatioExtWidth = pCodingParam.sSpatialLayers[iIdxSpatial].sAspectRatioExtWidth; + pSpatialLayer->sAspectRatioExtHeight = pCodingParam.sSpatialLayers[iIdxSpatial].sAspectRatioExtHeight; + + uiProfileIdc = (!bSimulcastAVC) ? PRO_SCALABLE_BASELINE : uiProfileIdc; //it is used in the D>0 layer if SVC is applied, so set to PRO_SCALABLE_BASELINE + ++ pDlp; + ++ pSpatialLayer; + ++ iIdxSpatial; + } + + SetActualPicResolution(); + + return 0; + } + +// assuming that the width/height ratio of all spatial layers are the same + + void SetActualPicResolution() { + int32_t iSpatialIdx = iSpatialLayerNum - 1; + for (; iSpatialIdx >= 0; iSpatialIdx --) { + SSpatialLayerInternal* pDlayerInternal = &sDependencyLayers[iSpatialIdx]; + SSpatialLayerConfig* pDlayer = &sSpatialLayers[iSpatialIdx]; + + pDlayerInternal->iActualWidth = pDlayer->iVideoWidth; + pDlayerInternal->iActualHeight = pDlayer->iVideoHeight; + pDlayer->iVideoWidth = WELS_ALIGN (pDlayerInternal->iActualWidth, MB_WIDTH_LUMA); + pDlayer->iVideoHeight = WELS_ALIGN (pDlayerInternal->iActualHeight, MB_HEIGHT_LUMA); + } + } + + /*! + * \brief determined key coding tables for temporal scalability, uiProfileIdc etc for each spatial layer settings + * \param SWelsSvcCodingParam, and carried with known GOP size, max, input and output frame rate of each spatial + * \return NONE (should ensure valid parameter before this procedure) + */ + int32_t DetermineTemporalSettings() { + const int32_t iDecStages = WELS_LOG2 (uiGopSize); // (int8_t)GetLogFactor(1.0f, 1.0f * pcfg->uiGopSize); //log2(uiGopSize) + const uint8_t* pTemporalIdList = &g_kuiTemporalIdListTable[iDecStages][0]; + SSpatialLayerInternal* pDlp = &sDependencyLayers[0]; + SSpatialLayerConfig* pSpatialLayer = &sSpatialLayers[0]; + int8_t i = 0; + + while (i < iSpatialLayerNum) { + const uint32_t kuiLogFactorInOutRate = GetLogFactor (pDlp->fOutputFrameRate, pDlp->fInputFrameRate); + const uint32_t kuiLogFactorMaxInRate = GetLogFactor (pDlp->fInputFrameRate, fMaxFrameRate); + if (UINT_MAX == kuiLogFactorInOutRate || UINT_MAX == kuiLogFactorMaxInRate) { + return ENC_RETURN_INVALIDINPUT; + } + int32_t iNotCodedMask = 0; + int8_t iMaxTemporalId = 0; + + memset (pDlp->uiCodingIdx2TemporalId, INVALID_TEMPORAL_ID, sizeof (pDlp->uiCodingIdx2TemporalId)); + iNotCodedMask = (1 << (kuiLogFactorInOutRate + kuiLogFactorMaxInRate)) - 1; + for (uint32_t uiFrameIdx = 0; uiFrameIdx <= uiGopSize; ++ uiFrameIdx) { + if (0 == (uiFrameIdx & iNotCodedMask)) { + const int8_t kiTemporalId = pTemporalIdList[uiFrameIdx]; + pDlp->uiCodingIdx2TemporalId[uiFrameIdx] = kiTemporalId; + if (kiTemporalId > iMaxTemporalId) { + iMaxTemporalId = kiTemporalId; + } + } + } + + pDlp->iHighestTemporalId = iMaxTemporalId; + pDlp->iTemporalResolution = kuiLogFactorMaxInRate + kuiLogFactorInOutRate; + pDlp->iDecompositionStages = iDecStages - kuiLogFactorMaxInRate - kuiLogFactorInOutRate; + if (pDlp->iDecompositionStages < 0) { + return ENC_RETURN_INVALIDINPUT; + } + ++ pDlp; + ++ pSpatialLayer; + ++ i; + } + iDecompStages = (int8_t)iDecStages; + return ENC_RETURN_SUCCESS; + } + +} SWelsSvcCodingParam; + + +typedef struct TagExistingParasetList { + SWelsSPS sSps[MAX_SPS_COUNT]; + SSubsetSps sSubsetSps[MAX_SPS_COUNT]; + SWelsPPS sPps[MAX_PPS_COUNT]; + + uint32_t uiInUseSpsNum; + uint32_t uiInUseSubsetSpsNum; + uint32_t uiInUsePpsNum; +} SExistingParasetList; + + +static inline int32_t FreeCodingParam (SWelsSvcCodingParam** pParam, CMemoryAlign* pMa) { + if (pParam == NULL || *pParam == NULL || pMa == NULL) + return 1; + pMa->WelsFree (*pParam, "SWelsSvcCodingParam"); + *pParam = NULL; + return 0; +} + +static inline int32_t AllocCodingParam (SWelsSvcCodingParam** pParam, CMemoryAlign* pMa) { + if (pParam == NULL || pMa == NULL) + return 1; + if (*pParam != NULL) { + FreeCodingParam (pParam, pMa); + } + SWelsSvcCodingParam* pCodingParam = (SWelsSvcCodingParam*)pMa->WelsMallocz (sizeof (SWelsSvcCodingParam), + "SWelsSvcCodingParam"); + if (NULL == pCodingParam) + return 1; + *pParam = pCodingParam; + return 0; +} + +}//end of namespace WelsEnc + +#endif//WELS_ENCODER_PARAMETER_SVC_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/parameter_sets.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/parameter_sets.h new file mode 100644 index 000000000..1f85da055 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/parameter_sets.h @@ -0,0 +1,181 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_PARAMETER_SETS_H__ +#define WELS_PARAMETER_SETS_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "wels_common_basis.h" + +namespace WelsEnc { + +/* Sequence Parameter Set, refer to Page 57 in JVT X201wcm */ +typedef struct TagWelsSPS { +uint32_t uiSpsId; +int16_t iMbWidth; +int16_t iMbHeight; +uint32_t uiLog2MaxFrameNum; +// uint32_t uiPocType; +/* POC type 0 */ +int32_t iLog2MaxPocLsb; +/* POC type 1 */ +// int32_t iOffsetForNonRefPic; + +// int32_t iOffsetForTopToBottomField; +// int32_t iNumRefFramesInPocCycle; +// int8_t iOffsetForRefFrame[256]; +SCropOffset sFrameCrop; +int16_t iNumRefFrames; +// uint32_t uiNumUnitsInTick; +// uint32_t uiTimeScale; + +uint8_t uiProfileIdc; +uint8_t iLevelIdc; +// uint8_t uiChromaFormatIdc; +// uint8_t uiChromaArrayType; //support =1 + +// uint8_t uiBitDepthLuma; //=8, only used in decoder, encoder in general_***; it can be removed when removed general up_sample +// uint8_t uiBitDepthChroma; //=8 +/* TO BE CONTINUE: POC type 1 */ +// bool bDeltaPicOrderAlwaysZeroFlag; +bool bGapsInFrameNumValueAllowedFlag; + +// bool bFrameMbsOnlyFlag; +// bool bMbaffFlag; // MB Adapative Frame Field +// bool bDirect8x8InferenceFlag; +bool bFrameCroppingFlag; + +bool bVuiParamPresentFlag; +// bool bTimingInfoPresentFlag; +// bool bFixedFrameRateFlag; + +// Note: members bVideoSignalTypePresent through uiColorMatrix below are also defined in SSpatialLayerConfig in codec_app_def.h, +// along with definitions for enumerators EVideoFormatSPS, EColorPrimaries, ETransferCharacteristics, and EColorMatrix. +bool bVideoSignalTypePresent; // false => do not write any of the following information to the header +uint8_t uiVideoFormat; // EVideoFormatSPS; 3 bits in header; 0-5 => component, kpal, ntsc, secam, mac, undef +bool bFullRange; // false => analog video data range [16, 235]; true => full data range [0,255] +bool bColorDescriptionPresent; // false => do not write any of the following three items to the header +uint8_t uiColorPrimaries; // EColorPrimaries; 8 bits in header; 0 - 9 => ???, bt709, undef, ???, bt470m, bt470bg, + // smpte170m, smpte240m, film, bt2020 +uint8_t uiTransferCharacteristics; // ETransferCharacteristics; 8 bits in header; 0 - 15 => ???, bt709, undef, ???, bt470m, bt470bg, smpte170m, + // smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1, bt2020-10, bt2020-12 +uint8_t uiColorMatrix; // EColorMatrix; 8 bits in header (corresponds to FFmpeg "colorspace"); 0 - 10 => GBR, bt709, + // undef, ???, fcc, bt470bg, smpte170m, smpte240m, YCgCo, bt2020nc, bt2020c + +bool bConstraintSet0Flag; +bool bConstraintSet1Flag; +bool bConstraintSet2Flag; +bool bConstraintSet3Flag; +// bool bSeparateColorPlaneFlag; // =false,: only used in decoder, encoder in general_***; it can be removed when removed general up_sample + +// aspect ratio in VUI +bool bAspectRatioPresent; +ESampleAspectRatio eAspectRatio; +uint16_t sAspectRatioExtWidth; +uint16_t sAspectRatioExtHeight; + +} SWelsSPS, *PWelsSPS; + + +/* Sequence Parameter Set SVC extension syntax, refer to Page 391 in JVT X201wcm */ +typedef struct TagSpsSvcExt { +// SCropOffset sSeqScaledRefLayer; + +uint8_t iExtendedSpatialScalability; // ESS +// uint8_t uiChromaPhaseXPlus1Flag; +// uint8_t uiChromaPhaseYPlus1; +// uint8_t uiSeqRefLayerChromaPhaseXPlus1Flag; +// uint8_t uiSeqRefLayerChromaPhaseYPlus1; +// bool bInterLayerDeblockingFilterCtrlPresentFlag; +bool bSeqTcoeffLevelPredFlag; +bool bAdaptiveTcoeffLevelPredFlag; +bool bSliceHeaderRestrictionFlag; +} SSpsSvcExt, *PSpsSvcExt; + +/* Subset sequence parameter set syntax, refer to Page 391 in JVT X201wcm */ +typedef struct TagSubsetSps { +SWelsSPS pSps; +SSpsSvcExt sSpsSvcExt; + +// bool bSvcVuiParamPresentFlag; +// bool bAdditionalExtension2Flag; +// bool bAdditionalExtension2DataFlag; +} SSubsetSps, *PSubsetSps; + +/* Picture parameter set syntax, refer to Page 59 in JVT X201wcm */ +typedef struct TagWelsPPS { +uint32_t iSpsId; +uint32_t iPpsId; + +#if !defined(DISABLE_FMO_FEATURE) +uint32_t uiNumSliceGroups; +uint32_t uiSliceGroupMapType; +/* uiSliceGroupMapType = 0 */ +uint32_t uiRunLength[MAX_SLICEGROUP_IDS]; +/* uiSliceGroupMapType = 2 */ +uint32_t uiTopLeft[MAX_SLICEGROUP_IDS]; +uint32_t uiBottomRight[MAX_SLICEGROUP_IDS]; +/* uiSliceGroupMapType = 3, 4 or 5 */ +/* uiSliceGroupMapType = 3, 4 or 5 */ +bool bSliceGroupChangeDirectionFlag; +uint32_t uiSliceGroupChangeRate; +/* uiSliceGroupMapType = 6 */ +uint32_t uiPicSizeInMapUnits; +uint32_t uiSliceGroupId[MAX_SLICEGROUP_IDS]; +#endif//!DISABLE_FMO_FEATURE + +// uint32_t uiNumRefIdxL0Active; +// uint32_t uiNumRefIdxL1Active; + +int8_t iPicInitQp; +int8_t iPicInitQs; +uint8_t uiChromaQpIndexOffset; + +/* potential application for High profile */ +// int32_t iSecondChromaQpIndexOffset; +// /* potential application for High profile */ + +// bool bPicOrderPresentFlag; +bool bEntropyCodingModeFlag; +bool bDeblockingFilterControlPresentFlag; + +// bool bConstainedIntraPredFlag; +// bool bRedundantPicCntPresentFlag; +// bool bWeightedPredFlag; +// uint8_t uiWeightedBiPredIdc; + +} SWelsPPS, *PWelsPPPS; + +} + +#endif //WELS_PARAMETER_SETS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/paraset_strategy.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/paraset_strategy.h new file mode 100644 index 000000000..77a83570d --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/paraset_strategy.h @@ -0,0 +1,310 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_PARASET_STRATEGY_H +#define WELS_PARASET_STRATEGY_H + +#include "param_svc.h" +#include "utils.h" + +namespace WelsEnc { + +class IWelsParametersetStrategy { + public: + virtual ~IWelsParametersetStrategy() { } + + static IWelsParametersetStrategy* CreateParametersetStrategy (EParameterSetStrategy eSpsPpsIdStrategy, + const bool bSimulcastAVC, const int32_t kiSpatialLayerNum); + + //virtual SParaSetOffset* GetParaSetOffset() = 0; + + virtual int32_t GetPpsIdOffset (const int32_t iPpsId) = 0; + virtual int32_t GetSpsIdOffset (const int32_t iPpsId, const int32_t iSpsId) = 0; + virtual int32_t* GetSpsIdOffsetList (const int iParasetType) = 0; + + virtual uint32_t GetAllNeededParasetNum() = 0; + + virtual uint32_t GetNeededSpsNum() = 0; + virtual uint32_t GetNeededSubsetSpsNum() = 0; + virtual uint32_t GetNeededPpsNum() = 0; + + virtual void LoadPrevious (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray, + SWelsPPS* pPpsArray) = 0; + + virtual void Update (const uint32_t kuiId, const int iParasetType) = 0; + virtual void UpdatePpsList (sWelsEncCtx* pCtx) = 0; + + virtual bool CheckParamCompatibility (SWelsSvcCodingParam* pCodingParam, SLogContext* pLogCtx) = 0; + + virtual uint32_t GenerateNewSps (sWelsEncCtx* pCtx, const bool kbUseSubsetSps, const int32_t iDlayerIndex, + const int32_t iDlayerCount, + uint32_t kuiSpsId, + SWelsSPS*& pSps, SSubsetSps*& pSubsetSps, bool bSVCBaselayer) = 0; + + virtual uint32_t InitPps (sWelsEncCtx* pCtx, uint32_t kiSpsId, + SWelsSPS* pSps, + SSubsetSps* pSubsetSps, + uint32_t kuiPpsId, + const bool kbDeblockingFilterPresentFlag, + const bool kbUsingSubsetSps, + const bool kbEntropyCodingModeFlag) = 0; + + virtual void SetUseSubsetFlag (const uint32_t iPpsId, const bool bUseSubsetSps) = 0; + + virtual void UpdateParaSetNum (sWelsEncCtx* pCtx) = 0; + + virtual int32_t GetCurrentPpsId (const int32_t iPpsId, const int32_t iIdrLoop) = 0; + + virtual void OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList, + sWelsEncCtx* pCtx, SExistingParasetList* pExistingParasetList) = 0; + virtual void LoadPreviousStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList) = 0; + + virtual int32_t GetSpsIdx (const int32_t iIdx) = 0; +}; + + +class CWelsParametersetIdConstant : public IWelsParametersetStrategy { + public: + + CWelsParametersetIdConstant (const bool bSimulcastAVC, const int32_t kiSpatialLayerNum); + virtual ~ CWelsParametersetIdConstant(); + + virtual int32_t GetPpsIdOffset (const int32_t iPpsId); + virtual int32_t GetSpsIdOffset (const int32_t iPpsId, const int32_t iSpsId); + int32_t* GetSpsIdOffsetList (const int iParasetType); + + uint32_t GetAllNeededParasetNum(); + + virtual uint32_t GetNeededSpsNum(); + virtual uint32_t GetNeededSubsetSpsNum(); + virtual uint32_t GetNeededPpsNum(); + + virtual void LoadPrevious (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray, + SWelsPPS* pPpsArray); + + virtual void Update (const uint32_t kuiId, const int iParasetType); + virtual void UpdatePpsList (sWelsEncCtx* pCtx) {}; + + bool CheckParamCompatibility (SWelsSvcCodingParam* pCodingParam, SLogContext* pLogCtx) { + return true; + }; + + virtual uint32_t GenerateNewSps (sWelsEncCtx* pCtx, const bool kbUseSubsetSps, const int32_t iDlayerIndex, + const int32_t iDlayerCount, uint32_t kuiSpsId, + SWelsSPS*& pSps, SSubsetSps*& pSubsetSps, bool bSVCBaselayer); + + virtual uint32_t InitPps (sWelsEncCtx* pCtx, uint32_t kiSpsId, + SWelsSPS* pSps, + SSubsetSps* pSubsetSps, + uint32_t kuiPpsId, + const bool kbDeblockingFilterPresentFlag, + const bool kbUsingSubsetSps, + const bool kbEntropyCodingModeFlag); + + virtual void SetUseSubsetFlag (const uint32_t iPpsId, const bool bUseSubsetSps); + + virtual void UpdateParaSetNum (sWelsEncCtx* pCtx) {}; + + virtual int32_t GetCurrentPpsId (const int32_t iPpsId, const int32_t iIdrLoop) { + return iPpsId; + }; + + virtual void OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList, + sWelsEncCtx* pCtx, + SExistingParasetList* pExistingParasetList) {}; + virtual void LoadPreviousStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList) {}; + + virtual int32_t GetSpsIdx (const int32_t iIdx) { + return 0; + }; + protected: + + virtual void LoadPreviousSps (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray) {}; + virtual void LoadPreviousPps (SExistingParasetList* pExistingParasetList, SWelsPPS* pPpsArray) {}; + + protected: + SParaSetOffset m_sParaSetOffset; + bool m_bSimulcastAVC; + int32_t m_iSpatialLayerNum; + + uint32_t m_iBasicNeededSpsNum; + uint32_t m_iBasicNeededPpsNum; +}; + +/* + typedef struct TagParaSetOffsetVariable { + int32_t iParaSetIdDelta[MAX_DQ_LAYER_NUM+1];//mark delta between SPS_ID_in_bs and sps_id_in_encoder, can be minus, for each dq-layer + //need not extra +1 due no MGS and FMO case so far + bool bUsedParaSetIdInBs[MAX_PPS_COUNT]; //mark the used SPS_ID with 1 + uint32_t uiNextParaSetIdToUseInBs; //mark the next SPS_ID_in_bs, for all layers + } SParaSetOffsetVariable; + + typedef struct TagParaSetOffset { + //in PS0 design, "sParaSetOffsetVariable" record the previous paras before current IDR, AND NEED to be stacked and recover across IDR + SParaSetOffsetVariable + sParaSetOffsetVariable[PARA_SET_TYPE]; //PARA_SET_TYPE=3; paraset_type = 0: AVC_SPS; =1: Subset_SPS; =2: PPS + //in PSO design, "bPpsIdMappingIntoSubsetsps" uses the current para of current IDR period + bool + bPpsIdMappingIntoSubsetsps[MAX_DQ_LAYER_NUM+1]; // need not extra +1 due no MGS and FMO case so far + + int32_t iPpsIdList[MAX_DQ_LAYER_NUM][MAX_PPS_COUNT]; //index0: max pps types; index1: for differnt IDRs, if only index0=1, index1 can reach MAX_PPS_COUNT + + //#if _DEBUG + int32_t eSpsPpsIdStrategy; + //#endif + + uint32_t uiNeededSpsNum; + uint32_t uiNeededSubsetSpsNum; + uint32_t uiNeededPpsNum; + + uint32_t uiInUseSpsNum; + uint32_t uiInUseSubsetSpsNum; + uint32_t uiInUsePpsNum; + } SParaSetOffset; + */ + +class CWelsParametersetIdNonConstant : public CWelsParametersetIdConstant { + public: + CWelsParametersetIdNonConstant (const bool bSimulcastAVC, + const int32_t kiSpatialLayerNum): CWelsParametersetIdConstant (bSimulcastAVC, kiSpatialLayerNum) {}; + + virtual void OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList, + sWelsEncCtx* pCtx, + SExistingParasetList* pExistingParasetList); + virtual void LoadPreviousStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList); +}; + +class CWelsParametersetIdIncreasing : public CWelsParametersetIdNonConstant { + public: + CWelsParametersetIdIncreasing (const bool bSimulcastAVC, + const int32_t kiSpatialLayerNum): CWelsParametersetIdNonConstant (bSimulcastAVC, kiSpatialLayerNum) {}; + + + virtual int32_t GetPpsIdOffset (const int32_t iPpsId); + virtual int32_t GetSpsIdOffset (const int32_t iPpsId, const int32_t iSpsId); + + virtual void Update (const uint32_t kuiId, const int iParasetType); + + protected: + + //void ParasetIdAdditionIdAdjust (SParaSetOffsetVariable* sParaSetOffsetVariable, const int32_t kiCurEncoderParaSetId, + // const uint32_t kuiMaxIdInBs); + + private: + void DebugPps (const int32_t kiPpsId); + void DebugSpsPps (const int32_t iPpsId, const int32_t iSpsId); +}; + + +class CWelsParametersetSpsListing : public CWelsParametersetIdNonConstant { + public: + CWelsParametersetSpsListing (const bool bSimulcastAVC, const int32_t kiSpatialLayerNum); + + virtual uint32_t GetNeededSubsetSpsNum(); + + virtual void LoadPrevious (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray, + SWelsPPS* pPpsArray); + + bool CheckParamCompatibility (SWelsSvcCodingParam* pCodingParam, SLogContext* pLogCtx); + + virtual uint32_t GenerateNewSps (sWelsEncCtx* pCtx, const bool kbUseSubsetSps, const int32_t iDlayerIndex, + const int32_t iDlayerCount, uint32_t kuiSpsId, + SWelsSPS*& pSps, SSubsetSps*& pSubsetSps, bool bSVCBaselayer); + + virtual void UpdateParaSetNum (sWelsEncCtx* pCtx); + + int32_t GetSpsIdx (const int32_t iIdx) { + return iIdx; + }; + + virtual void OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList, + sWelsEncCtx* pCtx, + SExistingParasetList* pExistingParasetList); + protected: + virtual void LoadPreviousSps (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray); + virtual bool CheckPpsGenerating(); + virtual int32_t SpsReset (sWelsEncCtx* pCtx, bool kbUseSubsetSps); +}; + +class CWelsParametersetSpsPpsListing : public CWelsParametersetSpsListing { + public: + CWelsParametersetSpsPpsListing (const bool bSimulcastAVC, const int32_t kiSpatialLayerNum); + + //uint32_t GetNeededPpsNum(); + + virtual void UpdatePpsList (sWelsEncCtx* pCtx); + + virtual uint32_t InitPps (sWelsEncCtx* pCtx, uint32_t kiSpsId, + SWelsSPS* pSps, + SSubsetSps* pSubsetSps, + uint32_t kuiPpsId, + const bool kbDeblockingFilterPresentFlag, + const bool kbUsingSubsetSps, + const bool kbEntropyCodingModeFlag); + + virtual void UpdateParaSetNum (sWelsEncCtx* pCtx); + + virtual int32_t GetCurrentPpsId (const int32_t iPpsId, const int32_t iIdrLoop); + + virtual void OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList, + sWelsEncCtx* pCtx, + SExistingParasetList* pExistingParasetList); + virtual void LoadPreviousStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, int32_t* pPpsIdList); + protected: + virtual void LoadPreviousPps (SExistingParasetList* pExistingParasetList, SWelsPPS* pPpsArray); + + virtual bool CheckPpsGenerating(); + virtual int32_t SpsReset (sWelsEncCtx* pCtx, bool kbUseSubsetSps); +}; + +class CWelsParametersetSpsListingPpsIncreasing : public CWelsParametersetSpsListing { + public: + CWelsParametersetSpsListingPpsIncreasing (const bool bSimulcastAVC, + const int32_t kiSpatialLayerNum): CWelsParametersetSpsListing (bSimulcastAVC, kiSpatialLayerNum) {}; + + virtual int32_t GetPpsIdOffset (const int32_t kiPpsId); + virtual void Update (const uint32_t kuiId, const int iParasetType); +}; + +int32_t FindExistingSps (SWelsSvcCodingParam* pParam, const bool kbUseSubsetSps, const int32_t iDlayerIndex, + const int32_t iDlayerCount, const int32_t iSpsNumInUse, + SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray, bool bSVCBaseLayer); + +} + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/picture.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/picture.h new file mode 100644 index 000000000..fdd4a9043 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/picture.h @@ -0,0 +1,137 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//picture.h - reconstruction picture/ reference picture/ residual picture are declared here +#ifndef WELS_PICTURE_H__ +#define WELS_PICTURE_H__ + +#include "typedefs.h" +#include "as264_common.h" +#include "wels_common_basis.h" + +namespace WelsEnc { +#define LIST_SIZE 0x10000 //(256*256) +typedef struct TagScreenBlockFeatureStorage { +//Input +uint16_t* pFeatureOfBlockPointer; // Pointer to pFeatureOfBlock +int32_t iIs16x16; //Feature block size +uint8_t uiFeatureStrategyIndex;// index of hash strategy + +//Modify +uint32_t* pTimesOfFeatureValue; // times of every value in Feature +uint16_t** +pLocationOfFeature; // uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i; +uint16_t* pLocationPointer; // buffer of position array +int32_t iActualListSize; // actual list size +uint32_t uiSadCostThreshold[BLOCK_SIZE_ALL]; +bool bRefBlockFeatureCalculated; // flag of whether pre-process is done +uint16_t **pFeatureValuePointerList;//uint16_t* pFeatureValuePointerList[WELS_MAX (LIST_SIZE_SUM_16x16, LIST_SIZE_MSE_16x16)] +} SScreenBlockFeatureStorage; //should be stored with RefPic, one for each frame + +/* + * Reconstructed Picture definition + * It is used to express reference picture, also consequent reconstruction picture for output + */ +typedef struct TagPicture { +/************************************payload pData*********************************/ +uint8_t* pBuffer; // pointer to the first allocated byte, basical offset of pBuffer, dimension: +uint8_t* pData[3]; // pointer to picture planes respectively +int32_t iLineSize[3]; // iLineSize of picture planes respectively + +// picture information +/*******************************from other standard syntax****************************/ +/*from pSps*/ +int32_t iWidthInPixel; // picture width in pixel +int32_t iHeightInPixel;// picture height in pixel +int32_t iPictureType; // got from sSliceHeader(): eSliceType +int32_t iFramePoc; // frame POC + +float fFrameRate; // MOVE +int32_t iFrameNum; // frame number //for pRef pic management + +uint32_t* uiRefMbType; // for iMbWidth*iMbHeight +uint8_t* pRefMbQp; // for iMbWidth*iMbHeight + +int32_t* pMbSkipSad; //for iMbWidth*iMbHeight + +SMVUnitXY* sMvList; + +/*******************************sef_definition for misc use****************************/ +int32_t iMarkFrameNum; +int32_t iLongTermPicNum; + +bool bUsedAsRef; //for pRef pic management +bool bIsLongRef; // long term reference frame flag //for pRef pic management +bool bIsSceneLTR; //long term reference & large scene change +uint8_t uiRecieveConfirmed; +uint8_t uiTemporalId; +uint8_t uiSpatialId; +int32_t iFrameAverageQp; + +/*******************************for screen reference frames****************************/ +SScreenBlockFeatureStorage* pScreenBlockFeatureStorage; + + /* + * set picture as unreferenced + */ + void SetUnref () { + iFramePoc = -1; + iFrameNum = -1; + uiTemporalId = + uiSpatialId = + iLongTermPicNum = -1; + bIsLongRef = false; + uiRecieveConfirmed = RECIEVE_FAILED; + iMarkFrameNum = -1; + bUsedAsRef = false; + + if (NULL != pScreenBlockFeatureStorage) + pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false; + } + +} SPicture; + +/* + * Residual Picture + */ +//typedef struct Rs_Picture_s{ +// int16_t *pBuffer[4]; // base pBuffer +// int16_t *pData[4]; // pData pBuffer +// int32_t real_linesize[4];// actual iLineSize of picture planes respectively +// int32_t used_linesize[4];// iLineSize of picture planes respectively used currently +// int32_t planes; // planes of YUV +//}Rs_Picture_t; + +} // end of namespace WelsEnc { + +#endif//WELS_PICTURE_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/picture_handle.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/picture_handle.h new file mode 100644 index 000000000..018ddbc24 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/picture_handle.h @@ -0,0 +1,66 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file picture_handle.h + * + * \brief picture pData handling + * + * \date 5/20/2009 Created + * + *************************************************************************************/ +#if !defined(WELS_ENCODER_PICTURE_HANDLE_H__) +#define WELS_ENCODER_PICTURE_HANDLE_H__ + +#include "picture.h" +#include "typedefs.h" +#include "memory_align.h" + +namespace WelsEnc { +/*! + * \brief alloc picture pData with borders for each plane based width and height of picture + * \param kiWidth width of picture in pixels + * \param kiHeight height of picture in pixels + * \param bNeedMbInfo need pData allocation + * \pram iNeedFeatureStorage need storage for FME + * \return successful if effective picture pointer returned, otherwise failed with NULL + */ +SPicture* AllocPicture (CMemoryAlign* pMa, const int32_t kiWidth, const int32_t kiHeight, bool bNeedMbInfo, + int32_t iNeedFeatureStorage); + +/*! + * \brief free picture pData planes + * \param pic picture pointer to be destoryed + * \return none + */ +void FreePicture (CMemoryAlign* pMa, SPicture** ppPic); + +} +#endif//WELS_ENCODER_PICTURE_HANDLE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/rc.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/rc.h new file mode 100644 index 000000000..00a6570bb --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/rc.h @@ -0,0 +1,283 @@ +/*! + * \copy + * Copyright (c) 2004-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * ratectl.c + * + * Abstract + * Include file for ratectl.c + * + * History + * 9/8/2004 Created + * 12/26/2011 Modified + * + * + *************************************************************************/ +#ifndef RC_H +#define RC_H + + +#include "codec_app_def.h" +#include "svc_enc_macroblock.h" +#include "slice.h" + +namespace WelsEnc { + +typedef struct TagWelsEncCtx sWelsEncCtx; + +//trace +#define GOM_TRACE_FLAG 0 +#define GOM_H_SCC 8 + +enum { + BITS_NORMAL, + BITS_LIMITED, + BITS_EXCEEDED +}; + +enum { +//virtual gop size + VGOP_SIZE = 8, + +//qp information + GOM_MIN_QP_MODE = 12, + GOM_MAX_QP_MODE = 36, + MAX_LOW_BR_QP = 42, + MIN_IDR_QP = 26, + MAX_IDR_QP = 32, + MIN_SCREEN_QP = 26, + MAX_SCREEN_QP = 35, + DELTA_QP = 2, + DELTA_QP_BGD_THD = 3, + QP_MIN_VALUE = 0, + QP_MAX_VALUE = 51, + +//frame skip constants + SKIP_QP_90P = 24, + SKIP_QP_180P = 24, + SKIP_QP_360P = 31, + SKIP_QP_720P = 31, + LAST_FRAME_QP_RANGE_UPPER_MODE0 = 3, + LAST_FRAME_QP_RANGE_LOWER_MODE0 = 2, + LAST_FRAME_QP_RANGE_UPPER_MODE1 = 5, + LAST_FRAME_QP_RANGE_LOWER_MODE1 = 3, + + MB_WIDTH_THRESHOLD_90P = 15, + MB_WIDTH_THRESHOLD_180P = 30, + MB_WIDTH_THRESHOLD_360P = 60, + +//Mode 0 parameter + GOM_ROW_MODE0_90P = 2, + GOM_ROW_MODE0_180P = 2, + GOM_ROW_MODE0_360P = 4, + GOM_ROW_MODE0_720P = 4, + QP_RANGE_MODE0 = 3, + +//Mode 1 parameter + GOM_ROW_MODE1_90P = 1, + GOM_ROW_MODE1_180P = 1, + GOM_ROW_MODE1_360P = 2, + GOM_ROW_MODE1_720P = 2, + QP_RANGE_UPPER_MODE1 = 9, + QP_RANGE_LOWER_MODE1 = 4, + QP_RANGE_INTRA_MODE1 = 3 +}; + +//bits allocation +#define MAX_BITS_VARY_PERCENTAGE 100 //bits vary range in percentage +#define MAX_BITS_VARY_PERCENTAGE_x3d2 150 //bits vary range in percentage * 3/2 +#define INT_MULTIPLY 100 // use to multiply in Double to Int Conversion, should be same as AQ_QSTEP_INT_MULTIPLY in WelsVP +#define WEIGHT_MULTIPLY 2000 +#define REMAIN_BITS_TH (1) +#define VGOP_BITS_PERCENTAGE_DIFF 5 +#define IDR_BITRATE_RATIO 4 +#define FRAME_iTargetBits_VARY_RANGE 50 // *INT_MULTIPLY +//R-Q Model +#define LINEAR_MODEL_DECAY_FACTOR 80 // *INT_MULTIPLY +#define FRAME_CMPLX_RATIO_RANGE 20 // *INT_MULTIPLY +#define SMOOTH_FACTOR_MIN_VALUE 2 // *INT_MULTIPLY +//#define VGOP_BITS_MIN_RATIO 0.8 +//skip and padding +#define TIME_CHECK_WINDOW 5000 // ms +#define SKIP_RATIO 50 // *INT_MULTIPLY +#define LAST_FRAME_PREDICT_WEIGHT 0.5 +#define PADDING_BUFFER_RATIO 50 // *INT_MULTIPLY +#define PADDING_THRESHOLD 5 //*INT_MULTIPLY + +#define VIRTUAL_BUFFER_LOW_TH 120 //*INT_MULTIPLY +#define VIRTUAL_BUFFER_HIGH_TH 180 //*INT_MULTIPLY + +#define _BITS_RANGE 0 + +enum { + EVEN_TIME_WINDOW =0, + ODD_TIME_WINDOW =1, + TIME_WINDOW_TOTAL =2 +}; + +typedef struct TagRCTemporal { +int32_t iMinBitsTl; +int32_t iMaxBitsTl; +int32_t iTlayerWeight; +int32_t iGopBitsDq; +//P frame level R-Q Model +int64_t iLinearCmplx; // *INT_MULTIPLY +int32_t iPFrameNum; +int64_t iFrameCmplxMean; +int32_t iMaxQp; +int32_t iMinQp; +} SRCTemporal; + +typedef struct TagWelsRc { +int32_t iRcVaryPercentage; +int32_t iRcVaryRatio; + +int32_t iInitialQp; //initial qp +int64_t iBitRate; // Note: although the max bit rate is 240000*1200 which can be represented by int32, but there are many multipler of this iBitRate in the calculation of RC, so use int64 to avoid type conversion at all such places +int32_t iPreviousBitrate; +int32_t iPreviousGopSize; +double fFrameRate; +int32_t iBitsPerFrame; +int32_t iMaxBitsPerFrame; +double dPreviousFps; + +// bits allocation and status +int32_t iRemainingBits; +int32_t iBitsPerMb; +int32_t iTargetBits; +int32_t iCurrentBitsLevel;//0:normal; 1:limited; 2:exceeded. + +int32_t iIdrNum; +int64_t iIntraComplexity; //255*255(MaxMbSAD)*36864(MaxFS) make the highest bit of 32-bit integer 1 +int32_t iIntraMbCount; +int64_t iIntraComplxMean; + +int8_t iTlOfFrames[VGOP_SIZE]; +int32_t iRemainingWeights; +int32_t iFrameDqBits; + +bool bGomRC; +double* pGomComplexity; +int32_t* pGomForegroundBlockNum; +int32_t* pCurrentFrameGomSad; +int32_t* pGomCost; + +int32_t bEnableGomQp; +int32_t iAverageFrameQp; +int32_t iMinFrameQp; +int32_t iMaxFrameQp; +int32_t iNumberMbFrame; +int32_t iNumberMbGom; +int32_t iGomSize; + +int32_t iSkipFrameNum; +int32_t iFrameCodedInVGop; +int32_t iSkipFrameInVGop; +int32_t iGopNumberInVGop; +int32_t iGopIndexInVGop; + +int32_t iSkipQpValue; +int32_t iQpRangeUpperInFrame; +int32_t iQpRangeLowerInFrame; +int32_t iMinQp; +int32_t iMaxQp; +//int32_t delta_adaptive_qp; +int32_t iSkipBufferRatio; + +int32_t iQStep; // *INT_MULTIPLY +int32_t iFrameDeltaQpUpper; +int32_t iFrameDeltaQpLower; +int32_t iLastCalculatedQScale; + +//for skip frame and padding +int32_t iBufferSizeSkip; +int64_t iBufferFullnessSkip; +int64_t iBufferMaxBRFullness[TIME_WINDOW_TOTAL];//0: EVEN_TIME_WINDOW; 1: ODD_TIME_WINDOW +int32_t iPredFrameBit; +bool bNeedShiftWindowCheck[TIME_WINDOW_TOTAL]; +int32_t iBufferSizePadding; +int32_t iBufferFullnessPadding; +int32_t iPaddingSize; +int32_t iPaddingBitrateStat; +bool bSkipFlag; +int32_t iContinualSkipFrames; +SRCTemporal* pTemporalOverRc; + +//for scc +int64_t iAvgCost2Bits; +int64_t iCost2BitsIntra; +int32_t iBaseQp; +long long uiLastTimeStamp; + +//for statistics and online adjustments +int32_t iActualBitRate; // TODO: to complete later +float fLatestFrameRate; // TODO: to complete later +} SWelsSvcRc; + +typedef void (*PWelsRCPictureInitFunc) (sWelsEncCtx* pCtx,long long uiTimeStamp); +typedef void (*PWelsRCPictureDelayJudgeFunc) (sWelsEncCtx* pCtx,long long uiTimeStamp,int32_t iDidIdx); +typedef void (*PWelsRCPictureInfoUpdateFunc) (sWelsEncCtx* pCtx, int32_t iLayerSize); +typedef void (*PWelsRCMBInfoUpdateFunc) (sWelsEncCtx* pCtx, SMB* pCurMb, int32_t iCostLuma, SSlice* pSlice); +typedef void (*PWelsRCMBInitFunc) (sWelsEncCtx* pCtx, SMB* pCurMb, SSlice* pSlice); +typedef void (*PWelsCheckFrameSkipBasedMaxbrFunc) (sWelsEncCtx* pCtx, const long long uiTimeStamp, int32_t iDidIdx); +typedef void (*PWelsUpdateBufferWhenFrameSkippedFunc)(sWelsEncCtx* pCtx, int32_t iSpatialNum); +typedef void (*PWelsUpdateMaxBrCheckWindowStatusFunc)(sWelsEncCtx* pCtx, int32_t iSpatialNum, const long long uiTimeStamp); +typedef bool (*PWelsRCPostFrameSkippingFunc)(sWelsEncCtx* pCtx, const int32_t iDid, const long long uiTimeStamp); + +typedef struct WelsRcFunc_s { +PWelsRCPictureInitFunc pfWelsRcPictureInit; +PWelsRCPictureDelayJudgeFunc pfWelsRcPicDelayJudge; +PWelsRCPictureInfoUpdateFunc pfWelsRcPictureInfoUpdate; +PWelsRCMBInitFunc pfWelsRcMbInit; +PWelsRCMBInfoUpdateFunc pfWelsRcMbInfoUpdate; +PWelsCheckFrameSkipBasedMaxbrFunc pfWelsCheckSkipBasedMaxbr; +PWelsUpdateBufferWhenFrameSkippedFunc pfWelsUpdateBufferWhenSkip; +PWelsUpdateMaxBrCheckWindowStatusFunc pfWelsUpdateMaxBrWindowStatus; + +PWelsRCPostFrameSkippingFunc pfWelsRcPostFrameSkipping; +} SWelsRcFunc; + +void GomRCInitForOneSlice(SSlice* pSlice, const int32_t kiBitsPerMb); +void CheckFrameSkipBasedMaxbr (sWelsEncCtx* pCtx,const long long uiTimeStamp, int32_t iDidIdx); +void UpdateBufferWhenFrameSkipped(sWelsEncCtx* pCtx, int32_t iSpatialNum); +void UpdateMaxBrCheckWindowStatus(sWelsEncCtx* pCtx, int32_t iSpatialNum, const long long uiTimeStamp); +bool WelsRcPostFrameSkipping(sWelsEncCtx* pCtx, const int32_t iDid, const long long uiTimeStamp); +void WelsRcPostFrameSkippedUpdate (sWelsEncCtx* pCtx, const int32_t iDid); + +void RcTraceFrameBits (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_t iFrameSize); +void WelsRcInitModule (sWelsEncCtx* pCtx, RC_MODES iRcMode); +void WelsRcInitFuncPointers (sWelsEncCtx* pEncCtx, RC_MODES iRcMode); +void WelsRcFreeMemory (sWelsEncCtx* pCtx); +bool WelsRcCheckFrameStatus (sWelsEncCtx* pEncCtx,long long uiTimeStamp,int32_t iSpatialNum,int32_t iCurDid); +bool WelsUpdateSkipFrameStatus(); +long long GetTimestampForRc(const long long uiTimeStamp, const long long uiLastTimeStamp, const float fFrameRate); + +} +#endif //RC_H diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/ref_list_mgr_svc.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/ref_list_mgr_svc.h new file mode 100644 index 000000000..ad82e7e0d --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/ref_list_mgr_svc.h @@ -0,0 +1,148 @@ +/*! + * \copy + * Copyright (c) 2008-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * ref_list_mgr_svc.h + * + * Abstract + * Interface for managing reference picture in svc encoder side + * + * History + * 09/01/2008 Created + * 08/07/2009 Ported + * + *****************************************************************************/ +#if !defined(REFERENCE_PICTURE_LIST_MANAGEMENT_SVC_H__) +#define REFERENCE_PICTURE_LIST_MANAGEMENT_SVC_H__ + +#include "typedefs.h" +#include "encoder_context.h" +#include "codec_app_def.h" + +namespace WelsEnc { + +typedef enum { +LTR_DIRECT_MARK = 0, +LTR_DELAY_MARK = 1 +} LTR_MARKING_PROCESS_MODE; + +typedef enum { +FRAME_NUM_EQUAL = 0x01, +FRAME_NUM_BIGGER = 0x02, +FRAME_NUM_SMALLER = 0x04, +FRAME_NUM_OVER_MAX = 0x08 +} COMPARE_FRAME_NUM; + +/* +* reset LTR marking , recovery ,feedback state to default +*/ +void ResetLtrState (SLTRState* pLtr); +/* + * reset reference picture list + */ +void WelsResetRefList (sWelsEncCtx* pCtx); + +/* + * update reference picture list + */ +bool WelsUpdateRefList (sWelsEncCtx* pCtx); +/* + * build reference picture list + */ +bool WelsBuildRefList (sWelsEncCtx* pCtx, const int32_t kiPOC, int32_t iBestLtrRefIdx); + +/* + * update syntax for reference base related + */ +void WelsUpdateRefSyntax (sWelsEncCtx* pCtx, const int32_t kiPOC, const int32_t kiFrameType); + + +/* +* check current mark iFrameNum used in LTR list or not +*/ +bool CheckCurMarkFrameNumUsed (sWelsEncCtx* pCtx); +/* +* decide whether current frame include long term reference mark and update long term reference mark syntax +*/ +void WelsMarkPic (sWelsEncCtx* pCtx); + +#ifdef LONG_TERM_REF_DUMP +void DumpRef (sWelsEncCtx* ctx); +#endif + +class IWelsReferenceStrategy { + public: + IWelsReferenceStrategy() {}; + virtual ~IWelsReferenceStrategy() { }; + + static IWelsReferenceStrategy* CreateReferenceStrategy (sWelsEncCtx* pCtx, const EUsageType keUsageType, + const bool kbLtrEnabled); + virtual bool BuildRefList (const int32_t iPOC, int32_t iBestLtrRefIdx) = 0; + virtual void MarkPic() = 0; + virtual bool UpdateRefList() = 0; + virtual void EndofUpdateRefList() = 0; + virtual void AfterBuildRefList() = 0; + + protected: + virtual void Init (sWelsEncCtx* pCtx) = 0; +}; + +class CWelsReference_TemporalLayer : public IWelsReferenceStrategy { + public: + virtual bool BuildRefList (const int32_t iPOC, int32_t iBestLtrRefIdx); + virtual void MarkPic(); + virtual bool UpdateRefList(); + virtual void EndofUpdateRefList(); + virtual void AfterBuildRefList(); + + void Init (sWelsEncCtx* pCtx); + protected: + sWelsEncCtx* m_pEncoderCtx; + +}; + +class CWelsReference_Screen : public CWelsReference_TemporalLayer { + public: + virtual bool BuildRefList (const int32_t iPOC, int32_t iBestLtrRefIdx); + virtual void MarkPic(); + virtual bool UpdateRefList(); + virtual void EndofUpdateRefList(); + virtual void AfterBuildRefList(); +}; + +class CWelsReference_LosslessWithLtr : public CWelsReference_Screen { + public: + virtual bool BuildRefList (const int32_t iPOC, int32_t iBestLtrRefIdx); + virtual void MarkPic(); + virtual bool UpdateRefList(); + virtual void EndofUpdateRefList(); +}; +} +#endif//REFERENCE_PICTURE_LIST_MANAGEMENT_SVC_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/sample.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/sample.h new file mode 100644 index 000000000..c953a50b0 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/sample.h @@ -0,0 +1,143 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef SAMPLE_H_ +#define SAMPLE_H_ + +#include "typedefs.h" +#include "wels_func_ptr_def.h" + +namespace WelsEnc { + +//======================SATD======================// +int32_t WelsSampleSatd16x16_c (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x8_c (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x16_c (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x8_c (uint8_t*, int32_t, uint8_t*, int32_t); +//int32_t WelsSampleSatd8x4( uint8_t *, int32_t, uint8_t *, int32_t ); +//int32_t WelsSampleSatd4x8( uint8_t *, int32_t, uint8_t *, int32_t ); +int32_t WelsSampleSatd4x4_c (uint8_t*, int32_t, uint8_t*, int32_t); + +int32_t WelsSampleSatdIntra4x4Combined3_c (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, int32_t, + int32_t); +int32_t WelsSampleSatdIntra16x16Combined3_c (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsSampleSadIntra16x16Combined3_c (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsSampleSatdIntra8x8Combined3_c (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, + uint8_t*, uint8_t*); +int32_t WelsSampleSadIntra8x8Combined3_c (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, + uint8_t*, uint8_t*); + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined (X86_ASM) + + +int32_t WelsSampleSatd8x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd4x4_sse2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatdThree4x4_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, int32_t, + int32_t); + +int32_t WelsSampleSatd8x8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x16_sse41 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x16_sse41 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd4x4_sse41 (uint8_t*, int32_t, uint8_t*, int32_t); + +int32_t WelsIntra16x16Combined3Satd_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsIntra16x16Combined3Sad_ssse3 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsIntraChroma8x8Combined3Satd_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, + uint8_t*, uint8_t*); + +int32_t WelsSampleSatd8x8_avx2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x16_avx2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x8_avx2 (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x16_avx2 (uint8_t*, int32_t, uint8_t*, int32_t); + +#endif//X86_ASM + +#if defined (HAVE_NEON) + + +int32_t WelsSampleSatd8x8_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x8_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x16_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x16_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd4x4_neon (uint8_t*, int32_t, uint8_t*, int32_t); + +int32_t WelsIntra16x16Combined3Satd_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsIntra16x16Combined3Sad_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsIntra8x8Combined3Satd_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*, + uint8_t*); +int32_t WelsIntra8x8Combined3Sad_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*, + uint8_t*); +int32_t WelsIntra4x4Combined3Satd_neon (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, int32_t, + int32_t); + +#endif + +#if defined (HAVE_NEON_AARCH64) +int32_t WelsSampleSatd4x4_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsIntra16x16Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsIntra16x16Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +int32_t WelsIntra8x8Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*, + uint8_t*); +int32_t WelsIntra8x8Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*, + uint8_t*); +int32_t WelsIntra4x4Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, int32_t, + int32_t); +#endif + +#if defined (HAVE_MMI) +int32_t WelsSampleSatd8x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd8x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd16x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSatd4x4_mmi (uint8_t*, int32_t, uint8_t*, int32_t); +#endif//HAVE_MMI +#if defined(__cplusplus) +} +#endif//__cplusplus + +void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag); + +} + +#endif //SAMPLE_H_ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/set_mb_syn_cabac.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/set_mb_syn_cabac.h new file mode 100644 index 000000000..61299d48f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/set_mb_syn_cabac.h @@ -0,0 +1,127 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file set_mb_syn_cabac.h + * + * \brief Seting all syntax elements of mb and encoding residual with cabac + * + * \date 09/27/2014 Created + * + ************************************************************************************* + */ + +#ifndef SET_MB_SYN_CABAC_H_ +#define SET_MB_SYN_CABAC_H_ + +#include "typedefs.h" +#include "wels_common_defs.h" + +using namespace WelsCommon; + +namespace WelsEnc { + +#define WELS_QP_MAX 51 + +typedef uint64_t cabac_low_t; +enum { CABAC_LOW_WIDTH = sizeof (cabac_low_t) / sizeof (uint8_t) * 8 }; + +typedef struct TagStateCtx { + // Packed representation of state and MPS as state << 1 | MPS. + uint8_t m_uiStateMps; + + uint8_t Mps() const { return m_uiStateMps & 1; } + uint8_t State() const { return m_uiStateMps >> 1; } + void Set (uint8_t uiState, uint8_t uiMps) { m_uiStateMps = uiState * 2 + uiMps; } +} SStateCtx; +typedef struct TagCabacCtx { + cabac_low_t m_uiLow; + int32_t m_iLowBitCnt; + int32_t m_iRenormCnt; + uint32_t m_uiRange; + SStateCtx m_sStateCtx[WELS_CONTEXT_COUNT]; + uint8_t* m_pBufStart; + uint8_t* m_pBufEnd; + uint8_t* m_pBufCur; +} SCabacCtx; + + +void WelsCabacContextInit (void* pCtx, SCabacCtx* pCbCtx, int32_t iModel); +void WelsCabacEncodeInit (SCabacCtx* pCbCtx, uint8_t* pBuf, uint8_t* pEnd); +inline void WelsCabacEncodeDecision (SCabacCtx* pCbCtx, int32_t iCtx, uint32_t uiBin); +inline void WelsCabacEncodeBypassOne (SCabacCtx* pCbCtx, int32_t uiBin); +void WelsCabacEncodeTerminate (SCabacCtx* pCbCtx, uint32_t uiBin); +void WelsCabacEncodeUeBypass (SCabacCtx* pCbCtx, int32_t iExpBits, uint32_t uiVal); +void WelsCabacEncodeFlush (SCabacCtx* pCbCtx); +uint8_t* WelsCabacEncodeGetPtr (SCabacCtx* pCbCtx); +int32_t WriteBlockResidualCabac (void* pEncCtx, int16_t* pCoffLevel, int32_t iEndIdx, + int32_t iCalRunLevelFlag, + int32_t iResidualProperty, int8_t iNC, SBitStringAux* pBs); + + +// private functions used by public inline functions. +void WelsCabacEncodeDecisionLps_ (SCabacCtx* pCbCtx, int32_t iCtx); +void WelsCabacEncodeUpdateLowNontrivial_ (SCabacCtx* pCbCtx); +inline void WelsCabacEncodeUpdateLow_ (SCabacCtx* pCbCtx) { + if (pCbCtx->m_iLowBitCnt + pCbCtx->m_iRenormCnt < CABAC_LOW_WIDTH) { + pCbCtx->m_iLowBitCnt += pCbCtx->m_iRenormCnt; + pCbCtx->m_uiLow <<= pCbCtx->m_iRenormCnt; + } else { + WelsCabacEncodeUpdateLowNontrivial_ (pCbCtx); + } + pCbCtx->m_iRenormCnt = 0; +} + +// inline function definitions. +void WelsCabacEncodeDecision (SCabacCtx* pCbCtx, int32_t iCtx, uint32_t uiBin) { + if (uiBin == pCbCtx->m_sStateCtx[iCtx].Mps()) { + const int32_t kiState = pCbCtx->m_sStateCtx[iCtx].State(); + uint32_t uiRange = pCbCtx->m_uiRange; + uint32_t uiRangeLps = g_kuiCabacRangeLps[kiState][(uiRange & 0xff) >> 6]; + uiRange -= uiRangeLps; + + const int32_t kiRenormAmount = uiRange >> 8 ^ 1; + pCbCtx->m_uiRange = uiRange << kiRenormAmount; + pCbCtx->m_iRenormCnt += kiRenormAmount; + pCbCtx->m_sStateCtx[iCtx].Set (g_kuiStateTransTable[kiState][1], uiBin); + } else { + WelsCabacEncodeDecisionLps_ (pCbCtx, iCtx); + } +} + +void WelsCabacEncodeBypassOne (SCabacCtx* pCbCtx, int32_t uiBin) { + const uint32_t kuiBinBitmask = -uiBin; + pCbCtx->m_iRenormCnt++; + WelsCabacEncodeUpdateLow_ (pCbCtx); + pCbCtx->m_uiLow += kuiBinBitmask & pCbCtx->m_uiRange; +} + +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/set_mb_syn_cavlc.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/set_mb_syn_cavlc.h new file mode 100644 index 000000000..37735b9b5 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/set_mb_syn_cavlc.h @@ -0,0 +1,92 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file set_mb_syn_cavlc.h + * + * \brief Seting all syntax elements of mb and decoding residual with cavlc + * + * \date 05/19/2009 Created + * + ************************************************************************************* + */ + +#ifndef SET_MB_SYN_CAVLC_H_ +#define SET_MB_SYN_CAVLC_H_ + +#include "typedefs.h" +#include "wels_func_ptr_def.h" + +namespace WelsEnc { + + +enum ECtxBlockCat { + LUMA_DC = 0, + LUMA_AC = 1, + LUMA_4x4 = 2, + CHROMA_DC = 3, + CHROMA_AC = 4 +}; + + +#define LUMA_DC_AC 0x04 + +typedef struct TagCavlcTableItem { + uint16_t uiBits; + uint8_t uiLen; + uint8_t uiSuffixLength; +} SCavlcTableItem; + +void InitCoeffFunc (SWelsFuncPtrList* pFuncList, const uint32_t uiCpuFlag,int32_t iEntropyCodingModeFlag); + +int32_t WriteBlockResidualCavlc (SWelsFuncPtrList* pFuncList, int16_t* pCoffLevel, int32_t iEndIdx, + int32_t iCalRunLevelFlag, + int32_t iResidualProperty, int8_t iNC, SBitStringAux* pBs); + + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +int32_t CavlcParamCal_c (int16_t* pCoffLevel, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs , + int32_t iEndIdx); +#ifdef X86_ASM +int32_t CavlcParamCal_sse2 (int16_t* pCoffLevel, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs , + int32_t iEndIdx); +int32_t CavlcParamCal_sse42 (int16_t* pCoffLevel, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs , + int32_t iEndIdx); +#endif + +#if defined(__cplusplus) +} +#endif//__cplusplus + +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/slice.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/slice.h new file mode 100644 index 000000000..755c80a3b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/slice.h @@ -0,0 +1,212 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_slice.h +#ifndef WELS_SLICE_H__ +#define WELS_SLICE_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "wels_common_basis.h" +#include "mb_cache.h" +#include "picture.h" +#include "parameter_sets.h" +#include "svc_enc_slice_segment.h" +#include "set_mb_syn_cabac.h" +#include "nal_encap.h" + +namespace WelsEnc { + +/*******************************sub struct of slice header****************************/ + + +/* + * Reference picture list reordering syntax, refer to page 64 in JVT X201wcm + */ +typedef struct TagRefPicListReorderSyntax { +struct { + uint32_t uiAbsDiffPicNumMinus1; //uiAbsDiffPicNumMinus1 SHOULD be in the range of [4, (1<uiLog2MaxFrameNum)-1], {p104, JVT-X201wcm1} + //but int8_t can't cover the range, SHOULD modify it. + uint16_t iLongTermPicNum; + uint16_t uiReorderingOfPicNumsIdc; //in order to pack 2-uint16_t into 1-(u)int32_t, so modify the type into uint16_t. +} SReorderingSyntax[MAX_REFERENCE_REORDER_COUNT_NUM]; // MAX_REF_PIC_COUNT +} SRefPicListReorderSyntax; + + +/* Decoded reference picture marking syntax, refer to Page 66 in JVT X201wcm */ +typedef struct TagRefPicMarking { +struct { + int32_t iMmcoType; + int32_t iShortFrameNum; + int32_t iDiffOfPicNum; + int32_t iLongTermPicNum; + int32_t iLongTermFrameIdx; + int32_t iMaxLongTermFrameIdx; +} SMmcoRef[MAX_REFERENCE_MMCO_COUNT_NUM]; // MAX_MMCO_COUNT + +// int32_t mmco_index; +uint8_t uiMmcoCount; +bool bNoOutputOfPriorPicsFlag; +bool bLongTermRefFlag; +bool bAdaptiveRefPicMarkingModeFlag; +} SRefPicMarking; + +// slice level rc statistic info +typedef struct TagRCSlicing { + int32_t iComplexityIndexSlice; + int32_t iCalculatedQpSlice; + int32_t iStartMbSlice; + int32_t iEndMbSlice; + int32_t iTotalQpSlice; + int32_t iTotalMbSlice; + int32_t iTargetBitsSlice; + int32_t iBsPosSlice; + int32_t iFrameBitsSlice; + int32_t iGomBitsSlice; + int32_t iGomTargetBits; + //int32_t gom_coded_mb; +} SRCSlicing; + +/* Header of slice syntax elements, refer to Page 63 in JVT X201wcm */ +typedef struct TagSliceHeader { +/*****************************slice header syntax and generated****************************/ +int32_t iFirstMbInSlice; +// uint32_t pic_parameter_set_id; +int32_t iFrameNum; +int32_t iPicOrderCntLsb; + +// int32_t delta_pic_order_cnt_bottom; +// int32_t delta_pic_order_cnt[2]; +// int32_t redundant_pic_cnt; + +EWelsSliceType eSliceType; +uint8_t uiNumRefIdxL0Active; // +//int32_t num_ref_idx_l1_active_minus1 //B frame is not supported +uint8_t uiRefCount; +//Ref_Pic *ref_pic; +uint8_t uiRefIndex; // exact reference picture index for slice + +int8_t iSliceQpDelta; +// int32_t slice_qp; +// int32_t slice_qs_delta; // For SP/SI slices +uint8_t uiDisableDeblockingFilterIdc; +int8_t iSliceAlphaC0Offset; +int8_t iSliceBetaOffset; +#if !defined(DISABLE_FMO_FEATURE) +int32_t iSliceGroupChangeCycle; +#endif//!DISABLE_FMO_FEATURE + +SWelsSPS* pSps; +SWelsPPS* pPps; +int32_t iSpsId; +int32_t iPpsId; + +uint16_t uiIdrPicId; +// uint8_t color_plane_id;//from? + +bool bNumRefIdxActiveOverrideFlag; +// bool field_pic_flag; //not supported in base profile +// bool bottom_field_flag; //not supported in base profile +uint8_t uiPadding1Bytes; + +SRefPicMarking sRefMarking; // Decoded reference picture marking syntaxs + +SRefPicListReorderSyntax sRefReordering; // Reference picture list reordering syntaxs +} SSliceHeader, *PSliceHeader; + + +/* SSlice header in scalable extension syntax, refer to Page 394 in JVT X201wcm */ +typedef struct TagSliceHeaderExt { +SSliceHeader sSliceHeader; + +SSubsetSps* pSubsetSps; + +uint32_t uiNumMbsInSlice; + +bool bStoreRefBasePicFlag; +bool bConstrainedIntraResamplingFlag; +bool bSliceSkipFlag; + +bool bAdaptiveBaseModeFlag; +bool bDefaultBaseModeFlag; +bool bAdaptiveMotionPredFlag; +bool bDefaultMotionPredFlag; + +bool bAdaptiveResidualPredFlag; +bool bDefaultResidualPredFlag; +bool bTcoeffLevelPredFlag; +uint8_t uiDisableInterLayerDeblockingFilterIdc; + +} SSliceHeaderExt, *PSliceHeaderExt; + + +typedef struct TagSlice { +// mainly for multiple threads imp. +SMbCache sMbCacheInfo; // MBCache is introduced within slice dependency +SBitStringAux* pSliceBsa; +SWelsSliceBs sSliceBs; + +/*******************************sSliceHeader****************************/ +SSliceHeaderExt sSliceHeaderExt; + +SMVUnitXY sMvStartMin; +SMVUnitXY sMvStartMax; +SMVUnitXY sMvc[5]; +uint8_t uiMvcNum; +uint8_t sScaleShift; + +int32_t iSliceIdx; +uint32_t uiBufferIdx; +bool bSliceHeaderExtFlag; // Indicate which slice header is used, avc or ext? +uint8_t uiLastMbQp; // stored qp for last mb coded, maybe more efficient for mb skip detection etc. + +bool bDynamicSlicingSliceSizeCtrlFlag; +uint8_t uiAssumeLog2BytePerMb; + +uint32_t uiSliceFMECostDown;//TODO: for FME switch under MT, to opt after ME final? + +uint8_t uiReservedFillByte; // reserved to meet 4 bytes alignment + +SCabacCtx sCabacCtx; +int32_t iCabacInitIdc; +int32_t iMbSkipRun; + +int32_t iCountMbNumInSlice; +uint32_t uiSliceConsumeTime; +int32_t iSliceComplexRatio; + +SRCSlicing sSlicingOverRc; //slice level rc statistic info +} SSlice, *PSlice; + +} +#endif//WELS_SLICE_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/slice_multi_threading.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/slice_multi_threading.h new file mode 100644 index 000000000..9e5a3c192 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/slice_multi_threading.h @@ -0,0 +1,97 @@ +/*! + * \copy + * Copyright (c) 2010-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file slice_multi_threading.c + * + * \brief slice based multiple threading + * + * \date 04/16/2010 Created + * + ************************************************************************************* + */ +#ifndef SVC_SLICE_MULTIPLE_THREADING_H__ +#define SVC_SLICE_MULTIPLE_THREADING_H__ + + +#include "typedefs.h" +#include "codec_app_def.h" +#include "param_svc.h" +#include "encoder_context.h" +#include "svc_enc_frame.h" +#include "svc_enc_macroblock.h" +#include "svc_enc_slice_segment.h" +#include "WelsThreadLib.h" + +namespace WelsEnc { +void UpdateMbListNeighborParallel (SDqLayer* pCurDq, + SMB* pMbList, + const int32_t kiSliceIdc); + +void CalcSliceComplexRatio (SDqLayer* pCurDq); + +int32_t NeedDynamicAdjust (SSlice** ppSliceInLayer, const int32_t iSliceNum); + +void DynamicAdjustSlicing (sWelsEncCtx* pCtx, + SDqLayer* pCurDqLayer, + int32_t iCurDid); + +int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam, const int32_t kiCountBsLen, + const int32_t kiTargetSpatialBsSize, bool bDynamicSlice); + +void ReleaseMtResource (sWelsEncCtx** ppCtx); + +int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t kiSliceCount); + +#if !defined(_WIN32) +WELS_THREAD_ROUTINE_TYPE UpdateMbListThreadProc (void* arg); +#endif//!_WIN32 + +int32_t DynamicDetectCpuCores(); + + +int32_t AdjustBaseLayer (sWelsEncCtx* pCtx); +int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid); + + + +#if defined(MT_DEBUG) +void TrackSliceComplexities (sWelsEncCtx* pCtx, const int32_t kiCurDid); +#endif +#if defined(MT_DEBUG) +void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t kiSpatialNum); +#endif//defined(MT_DEBUG) + +void SetOneSliceBsBufferUnderMultithread(sWelsEncCtx* pCtx, const int32_t kiThreadIdx, SSlice* pSlice); +int32_t WriteSliceBs (sWelsEncCtx* pCtx,SWelsSliceBs* pSliceBs,const int32_t iSliceIdx,int32_t& iSliceSize); +} + +#endif//SVC_SLICE_MULTIPLE_THREADING_H__ + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/stat.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/stat.h new file mode 100644 index 000000000..6b70026d2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/stat.h @@ -0,0 +1,102 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file stat.h + * + * \brief statistical pData information + * + * \date 4/22/2009 Created + * + ************************************************************************************* + */ +#if !defined(WELS_ENCODER_STATISTICAL_DATA_H__) +#define WELS_ENCODER_STATISTICAL_DATA_H__ + +namespace WelsEnc { + +/* + * Stat quality + */ +typedef struct TagStatQuality { + +float rYPsnr[5]; +float rUPsnr[5]; +float rVPsnr[5]; + +} SStatQuality; + +/* + * Stat complexity pData + */ +typedef struct TagComplexityStat { + +#ifdef FME_TEST +int32_t cost_time; +int32_t me_time; +int32_t mvp_time; +int32_t mvb_time; +#endif + +// any else? + +} SComplexityStat; + +/* + * Stat slice details information + */ +typedef struct TagStatSliceInfo { + +/* per slice info */ +int32_t iSliceCount[5]; +int32_t iSliceSize [5]; +int32_t iMbCount [5][18]; + +} SStatSliceInfo; + +/* + * For overall statistical pData + */ +typedef struct TagStatData { + +// Quality +SStatQuality sQualityStat; + +// Complexity +SComplexityStat sComplexityStat; + +// SSlice information output +SStatSliceInfo sSliceData; + +} SStatData; + +} + +#endif//WELS_ENCODER_STATISTICAL_DATA_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_base_layer_md.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_base_layer_md.h new file mode 100644 index 000000000..c600f779b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_base_layer_md.h @@ -0,0 +1,102 @@ +/*! + * \copy + * Copyright (c) 2010-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_base_layer_md.h + * + * \brief mode decision + * + * \date 2009.08.10 Created + * + ************************************************************************************* + */ +#ifndef SVC_BASE_LAYER_MACROBLOCK_MODE_DECISION_H__ +#define SVC_BASE_LAYER_MACROBLOCK_MODE_DECISION_H__ + +#include "md.h" +#include "mb_cache.h" + +namespace WelsEnc { +void WelsMdIntraInit (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, const int32_t kiSliceFirstMbXY); +int32_t WelsMdI16x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SMbCache* pMbCache, int32_t iLambda); +int32_t WelsMdIntraChroma (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SMbCache* pMbCache, int32_t iLambda); + +int32_t WelsMdI4x4 (sWelsEncCtx* pEnc, SWelsMD* pMd, SMB* pCurMb, SMbCache* pMbCache); +int32_t WelsMdI4x4Fast (sWelsEncCtx* pEnc, SWelsMD* pMd, SMB* pCurMb, SMbCache* pMbCache); + +int32_t WelsMdIntraFinePartition (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache); +int32_t WelsMdIntraFinePartitionVaa (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache); + +void WelsMdIntraMb (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache); + +void WelsMdBackgroundMbEnc (sWelsEncCtx* pEnc, SWelsMD* pMd, SMB* pCurMb, SMbCache* pMbCache, SSlice* pSlice, bool bSkipMbFlag); +bool WelsMdPSkipEnc (sWelsEncCtx* pEnc, SWelsMD* pMd, SMB* pCurMb, SMbCache* pMbCache); +int32_t WelsMdP16x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb); + +int32_t WelsMdP16x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice); +int32_t WelsMdP8x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice); +int32_t WelsMdP8x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice); +int32_t WelsMdP4x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx); +int32_t WelsMdP8x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx); +int32_t WelsMdP4x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx); +/*static*/ void WelsMdInterInit (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, const int32_t kiSliceFirstMbXY); +/*static*/ void WelsMdInterFinePartition (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, int32_t bestCost); +/*static*/ void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, int32_t bestCost); +/*static*/ void WelsMdInterFinePartitionVaaOnScreen (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, + int32_t bestCost); +void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache); +bool WelsMdFirstIntraMode (sWelsEncCtx* pEnc, SWelsMD* pMd, SMB* pCurMb, SMbCache* pMbCache); +//bool svc_md_first_intra_mode_constrained(sWelsEncCtx* pEnc, SWelsMD* pMd, SMB* pCurMb, SMbCache *pMbCache); +void WelsMdInterMb (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pUnused); + +//both used in BL and EL +//void wels_md_inter_init ( SWelsMD* pMd, const uint8_t ref_idx, const bool is_highest_dlayer_flag ); + + + +bool WelsMdInterJudgePskip (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + bool bTrySkip); +void WelsMdInterUpdatePskip (SDqLayer* pCurDqLayer, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache); +void WelsMdInterDecidedPskip (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache); + +void WelsMdInterDoubleCheckPskip (SMB* pCurMb, SMbCache* pMbCache); +void WelsMdInterEncode (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache); + +void WelsMdInterSaveSadAndRefMbType (Mb_Type* pRefMbTypeList, SMbCache* pMbCache, const SMB* kpCurMb, + const SWelsMD* kpMd); + +void WelsMdInterSecondaryModesEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, + SMbCache* pMbCache, const bool kbSkip); +void WelsMdIntraSecondaryModesEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache); +//end of: both used in BL and EL + + +} +#endif//WELS_MACROBLOCK_MODE_DECISION_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_frame.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_frame.h new file mode 100644 index 000000000..8c8db5bee --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_frame.h @@ -0,0 +1,136 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_svc_layer.h +#ifndef WELS_SVC_EXTENSION_LAYER_H__ +#define WELS_SVC_EXTENSION_LAYER_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "wels_common_basis.h" +#include "parameter_sets.h" +#include "slice.h" +#include "picture.h" +#include "svc_enc_macroblock.h" + + +#include "svc_enc_slice_segment.h" +namespace WelsEnc { + +/* + * Frame level in SVC DQLayer instead. + * Dependency-Quaility layer struction definition for SVC extension of H.264/AVC + */ + +///////////////////////////////////DQ Layer level/////////////////////////////////// + +typedef struct TagDqLayer SDqLayer; +typedef SDqLayer* pDqLayer; + +typedef struct TagFeatureSearchPreparation { +SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage + +uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point +uint8_t uiFeatureStrategyIndex;// index of hash strategy + +/* for FME frame-level switch */ +bool bFMESwitchFlag; +uint8_t uiFMEGoodFrameCount; +int32_t iHighFreMbCount; +} SFeatureSearchPreparation; //maintain only one + +typedef struct TagSliceBufferInfo { +SSlice* pSliceBuffer; // slice buffer for multi thread, +int32_t iMaxSliceNum; +int32_t iCodedSliceNum; +}SSliceBufferInfo; + +typedef struct TagLayerInfo { +SNalUnitHeaderExt sNalHeaderExt; +SSubsetSps* pSubsetSpsP; // current pSubsetSps used, memory alloc in external +SWelsSPS* pSpsP; // current pSps based avc used, memory alloc in external +SWelsPPS* pPpsP; // current pPps used +} SLayerInfo; +/* Layer Representation */ +struct TagDqLayer { +SLayerInfo sLayerInfo; +SSliceBufferInfo sSliceBufferInfo[MAX_THREADS_NUM]; +SSlice** ppSliceInLayer; +SSliceCtx sSliceEncCtx; // current slice context +uint8_t* pCsData[3]; // pointer to reconstructed picture pData +int32_t iCsStride[3]; // Cs stride + +uint8_t* pEncData[3]; // pData picture to be encoded in current layer +int32_t iEncStride[3]; // pData picture stride + +SMB* sMbDataP; // pointer to mb of mbAddr equal to 0 in slice, mb_data_ptr = mb_base_ptr + (1+iMbStride). +int16_t iMbWidth; // MB width of this picture, equal to pSps.iMbWidth +int16_t iMbHeight; // MB height of this picture, equal to pSps.iMbHeight; + +bool bBaseLayerAvailableFlag; // whether base layer is available for prediction? +bool bSatdInMdFlag; // whether SATD is calculated in ME and integer-pel MD + +uint8_t iLoopFilterDisableIdc; // 0: on, 1: off, 2: on except for slice boundaries +int8_t iLoopFilterAlphaC0Offset;// AlphaOffset: valid range [-6, 6], default 0 +int8_t iLoopFilterBetaOffset; // BetaOffset: valid range [-6, 6], default 0 +uint8_t uiDisableInterLayerDeblockingFilterIdc; +int8_t iInterLayerSliceAlphaC0Offset; +int8_t iInterLayerSliceBetaOffset; +bool bDeblockingParallelFlag; //parallel_deblocking_flag + +SPicture* pRefPic; // reference picture pointer +SPicture* pDecPic; // reconstruction picture pointer for layer +SPicture* pRefOri[MAX_REF_PIC_COUNT]; + +bool bThreadSlcBufferFlag; +bool bSliceBsBufferFlag; +int32_t iMaxSliceNum; +int32_t NumSliceCodedOfPartition[MAX_THREADS_NUM]; // for dynamic slicing mode +int32_t LastCodedMbIdxOfPartition[MAX_THREADS_NUM]; // for dynamic slicing mode +int32_t FirstMbIdxOfPartition[MAX_THREADS_NUM]; // for dynamic slicing mode +int32_t EndMbIdxOfPartition[MAX_THREADS_NUM]; // for dynamic slicing mode +int32_t* pFirstMbIdxOfSlice; +int32_t* pCountMbNumInSlice; + +bool bNeedAdjustingSlicing; + +SFeatureSearchPreparation* pFeatureSearchPreparation; + +SDqLayer* pRefLayer; // pointer to referencing dq_layer of current layer to be decoded +}; + +/////////////////////////////////////////////////////////////////////// + +// frame structure for svc +typedef SDqLayer SWelsSvcFrame; +} +#endif//WELS_SVC_EXTENSION_LAYER_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_golomb.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_golomb.h new file mode 100644 index 000000000..431d2cc68 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_golomb.h @@ -0,0 +1,123 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_enc_golomb.h + * + * \brief Exponential Golomb entropy coding routine + * + * \date 03/13/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__ +#define WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__ + +#include "wels_common_defs.h" +#include "golomb_common.h" + +using namespace WelsCommon; + +namespace WelsEnc { + +/************************************************************************/ +/* GOLOMB CODIMG FOR WELS ENCODER ONLY */ +/************************************************************************/ + + +/* + * Get size of unsigned exp golomb codes + */ +static inline uint32_t BsSizeUE (const uint32_t kiValue) { +if (256 > kiValue) { + return g_kuiGolombUELength[kiValue]; +} else { + uint32_t n = 0; + uint32_t iTmpValue = kiValue + 1; + + if (iTmpValue & 0xffff0000) { + iTmpValue >>= 16; + n += 16; + } + if (iTmpValue & 0xff00) { + iTmpValue >>= 8; + n += 8; + } + + //n += (g_kuiGolombUELength[iTmpValue] >> 1); + n += (g_kuiGolombUELength[iTmpValue - 1] >> 1); + return ((n << 1) + 1); + +} +} + +/* + * Get size of signed exp golomb codes + */ +static inline uint32_t BsSizeSE (const int32_t kiValue) { +uint32_t iTmpValue; +if (0 == kiValue) { + return 1; +} else if (0 < kiValue) { + iTmpValue = (kiValue << 1) - 1; + return BsSizeUE (iTmpValue); +} else { + iTmpValue = ((-kiValue) << 1); + return BsSizeUE (iTmpValue); +} +} + +/* + * Write truncated exp golomb codes + */ +static inline void BsWriteTE (SBitStringAux* pBs, const int32_t kiX, const uint32_t kuiValue) { +if (1 == kiX) { + BsWriteOneBit (pBs, !kuiValue); +} else { + BsWriteUE (pBs, kuiValue); +} +} + +static inline int32_t BsGetBitsPos (SBitStringAux* pBs) { +return (int32_t) (((pBs->pCurBuf - pBs->pStartBuf) << 3) + 32 - pBs->iLeftBits); +} + +static inline void BsAlign( SBitStringAux* pBs ) +{ + if( pBs->iLeftBits&7 ) + { + pBs->uiCurBits <<= pBs->iLeftBits&7; + pBs->uiCurBits |= (1 << (pBs->iLeftBits&7)) - 1; + pBs->iLeftBits &= ~7; + } + BsFlush(pBs ); +} +} +#endif//WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_macroblock.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_macroblock.h new file mode 100644 index 000000000..c8446f221 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_macroblock.h @@ -0,0 +1,82 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//macroblock.h +#ifndef WELS_MACROBLOCK_H__ +#define WELS_MACROBLOCK_H__ + +#include "typedefs.h" +#include "wels_const.h" +#include "wels_common_basis.h" +#include "macros.h" + +namespace WelsEnc { + +//struct Mb_s; + +/* MB syntax and context, refer to Page 399 in JVT X201wcm */ +// keep the most essential level pData structure be 64 Bytes, which matches cache line size; if so, the order with structure maybe negligible. +// pls take care when modify MB structure size +typedef struct TagMB { +/*************************mb_layer() syntax and generated********************************/ +/*mb_layer():*/ +Mb_Type uiMbType; // including MB detailed partition type, number and type of reference list +uint8_t uiSubMbType[4]; // sub MB types +int32_t iMbXY; // offset position of MB top left point based +int16_t iMbX; // position of MB in horizontal axis [0..32767] +int16_t iMbY; // position of MB in vertical axis [0..32767] + +uint8_t uiNeighborAvail; // avail && same_slice: LEFT_MB_POS:0x01, TOP_MB_POS:0x02, TOPRIGHT_MB_POS = 0x04 ,TOPLEFT_MB_POS = 0x08; +uint8_t uiCbp; + +SMVUnitXY* sMv; +int8_t* pRefIndex; + +int32_t* pSadCost; // mb sad. set to 0 for intra mb +int8_t* pIntra4x4PredMode; // [MB_BLOCK4x4_NUM] +int8_t* pNonZeroCount; // [MB_LUMA_CHROMA_BLOCK4x4_NUM] + +SMVUnitXY sP16x16Mv; + +uint8_t uiLumaQp; // uiLumaQp: pPps->iInitialQp + sSliceHeader->delta_qp + mb->dquant. +uint8_t uiChromaQp; +uint16_t uiSliceIdc; // 2^16=65536 > MaxFS(36864) of level 5.1; AVC: iFirstMbInSlice?; SVC: (iFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId); +uint32_t uiChromPredMode; +int32_t iLumaDQp; +SMVUnitXY sMvd[MB_BLOCK4x4_NUM]; //only for CABAC writing; storage structure the same as sMv, in 4x4 scan order. +int32_t iCbpDc; +//uint8_t reserved_filling_bytes[1]; // not deleting this line for further changes of this structure. filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit +} SMB, *PMb; + +} + +#endif//WELS_MACROBLOCK_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_slice_segment.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_slice_segment.h new file mode 100644 index 000000000..d237dc3ec --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_enc_slice_segment.h @@ -0,0 +1,212 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file slice_segment.h + * + * \brief SSlice segment routine (Single slice/multiple slice/fmo arrangement exclusive) + * + * \date 2/4/2009 Created + * + ************************************************************************************* + */ +#ifndef WELS_SLICE_SEGMENT_H__ +#define WELS_SLICE_SEGMENT_H__ + +#include "typedefs.h" +#include "macros.h" +#include "as264_common.h" +#include "memory_align.h" + +#include "codec_app_def.h" +#include "set_mb_syn_cabac.h" + +using namespace WelsCommon; + +namespace WelsEnc { + + +// NOTE: +// if PREFIX_NALs are used in base layer(iDid=0, qid=0), MAX_SLICES_NUM will be half of MAX_NAL_UNITS_IN_LAYER in case ST or MT without PACKING_ONE_SLICE_PER_LAYER +// in case MT and PACKING_ONE_SLICE_PER_LAYER, MAX_SLICES_NUM should not be exceeding MAX_LAYER_NUM_OF_FRAME +// for AVC cases, maximal resolution we can support up to (?x1024) for SM_ROWMB_SLICE slice mode +// fine solution for MAX_SLICES_NUM, need us use the variable instead of MACRO for any resolution combining any multiple-slice mode adaptive +#define SAVED_NALUNIT_NUM ( (MAX_SPATIAL_LAYER_NUM*MAX_QUALITY_LAYER_NUM) + 1 + MAX_SPATIAL_LAYER_NUM ) // SPS/PPS + SEI/SSEI + PADDING_NAL +#define MAX_SLICES_NUM ( ( MAX_NAL_UNITS_IN_LAYER - SAVED_NALUNIT_NUM ) / 3 ) // Also MAX_SLICES_NUM need constrained by implementation: uiSliceIdc allocated in SSliceCtx.pOverallMbMap need a byte range as expected +#define AVERSLICENUM_CONSTRAINT (MAX_SLICES_NUM) // used in sNalList initialization, + +#define MIN_NUM_MB_PER_SLICE 48 // (128/16 * 96/16), addressing the lowest resolution for multiple slicing is 128x96 above + +#define DEFAULT_MAXPACKETSIZE_CONSTRAINT (1200) //in bytes +//#define MINPACKETSIZE_CONSTRAINT (1200) + +#define AVER_MARGIN_BYTES ( 100 ) //in bytes +#define JUMPPACKETSIZE_CONSTRAINT(max_byte) ( max_byte - AVER_MARGIN_BYTES ) //in bytes +#define JUMPPACKETSIZE_JUDGE(len,mb_idx,max_byte) ( (len) > JUMPPACKETSIZE_CONSTRAINT(max_byte) ) //( (mb_idx+1)%40/*16slice for compare*/ == 0 ) // +//cur_mb_idx is for early tests, can be omit in optimization +typedef struct TagSlice SSlice; +typedef struct TagDqLayer SDqLayer; +typedef struct TagWelsEncCtx sWelsEncCtx; +/*! + * \brief SSlice context + */ +/* Single/multiple slices */ +typedef struct SlicepEncCtx_s { +SliceModeEnum uiSliceMode; /* 0: single slice in frame; 1: multiple slices in frame; */ +int16_t iMbWidth; /* width of picture size in mb */ +int16_t iMbHeight; /* height of picture size in mb */ +int32_t iSliceNumInFrame; /* count number of slices in frame; */ +int32_t iMbNumInFrame; /* count number of MBs in frame */ +uint16_t* pOverallMbMap; /* overall MB map in frame, store virtual slice idc; */ +uint32_t uiSliceSizeConstraint; /* in byte */ +int32_t iMaxSliceNumConstraint; /* maximal number of slices constraint */ + +} SSliceCtx; + + +typedef struct TagDynamicSlicingStack { +int32_t iStartPos; +int32_t iCurrentPos; + +uint8_t* pBsStackBufPtr; // current writing position +uint32_t uiBsStackCurBits; +int32_t iBsStackLeftBits; + +SCabacCtx sStoredCabac; +int32_t iMbSkipRunStack; +uint8_t uiLastMbQp; +uint8_t* pRestoreBuffer; +} SDynamicSlicingStack; + +/*! + * \brief Initialize Wels SSlice context (Single/multiple slices and FMO) + * + * \param pCurDq current layer which its SSlice context will be initialized + * \param bFmoUseFlag flag of using fmo + * \param iMbWidth MB width + * \param iMbHeight MB height + * \param uiSliceMode slice mode + * \param mul_slice_arg argument for multiple slice if it is applicable + * \param pPpsArg argument for pPps parameter + * + * \return 0 - successful; none 0 - failed; + */ +int32_t InitSlicePEncCtx (SDqLayer* pCurDq, + CMemoryAlign* pMa, + bool bFmoUseFlag, + int32_t iMbWidth, + int32_t iMbHeight, + SSliceArgument* pSliceArgument, + void* pPpsArg); + + +/*! + * \brief Uninitialize Wels SSlice context (Single/multiple slices and FMO) + * + * \param pCurDq curent layer which its SSlice context will be initialized + * + * \return NONE; + */ +void UninitSlicePEncCtx (SDqLayer* pCurDq, CMemoryAlign* pMa); + +/*! + * \brief Get slice idc for given iMbXY (apply in Single/multiple slices and FMO) + * + * \param pCurDq current layer info + * \param kiMbXY MB xy index + * + * \return uiSliceIdc - successful; (uint8_t)(-1) - failed; + */ +uint16_t WelsMbToSliceIdc (SDqLayer* pCurDq, const int32_t kiMbXY); + +/*! + * \brief Get first mb in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pCurLayer current layer + * \param kiSliceIdc slice idc + * + * \return first_mb - successful; -1 - failed; + */ +int32_t WelsGetFirstMbOfSlice (SDqLayer* pCurLayer, const int32_t kiSliceIdc); + +/*! + * \brief Get successive mb to be processed in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pCurDq current layer info + * \param kiMbXY MB xy index + * + * \return next_mb - successful; -1 - failed; + */ +int32_t WelsGetNextMbOfSlice (SDqLayer* pCurDq, const int32_t kiMbXY); + +/*! + * \brief Get previous mb to be processed in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pCurDq current layer info + * \param kiMbXY MB xy index + * + * \return prev_mb - successful; -1 - failed; + */ +int32_t WelsGetPrevMbOfSlice (SDqLayer* pCurDq, const int32_t kiMbXY); + +/*! + * \brief Get number of mb in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pCurDq current layer info + * \param pSlice slice which request slice num + * \param kiSliceIdc slice/slice_group idc + * + * \return count_num_of_mb - successful; -1 - failed; + */ +int32_t WelsGetNumMbInSlice (SDqLayer* pCurDq, SSlice* pSlice, const int32_t kuiSliceIdc); + +/*! + * Get slice count for multiple slice segment + * + */ +int32_t GetInitialSliceNum (SSliceArgument* pSliceArgument); +int32_t GetCurrentSliceNum (const SDqLayer* pCurDq); +SSlice* GetSliceByIndex(sWelsEncCtx* pCtx, const int32_t kiSliceIdc); + +//checking valid para +int32_t DynamicMaxSliceNumConstraint (uint32_t uiMaximumNum, int32_t uiConsumedNum, uint32_t uiDulplicateTimes); + +bool CheckFixedSliceNumMultiSliceSetting (const int32_t kiMbNumInFrame, SSliceArgument* pSliceArg); +bool CheckRasterMultiSliceSetting (const int32_t kiMbNumInFrame, SSliceArgument* pSliceArg); +bool CheckRowMbMultiSliceSetting (const int32_t kiMbWidth, SSliceArgument* pSliceArg); + +bool GomValidCheckSliceNum (const int32_t kiMbWidth, const int32_t kiMbHeight, uint32_t* pSliceNum); +bool GomValidCheckSliceMbNum (const int32_t kiMbWidth, const int32_t kiMbHeight, SSliceArgument* pSliceArg); +//end of checking valid para + +int32_t DynamicAdjustSlicePEncCtxAll (SDqLayer* pCurDq, + int32_t* pRunLength); +} +#endif//WELS_SLICE_SEGMENT_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_encode_mb.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_encode_mb.h new file mode 100644 index 000000000..6408e637e --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_encode_mb.h @@ -0,0 +1,63 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file encode_mb.h + * + * \brief interface for mb encoding + * + * \date 5/21/2009 Created + * + ************************************************************************************* + */ +#if !defined(ENCODE_MB_H) +#define ENCODE_MB_H + + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "slice.h" +#include "encoder_context.h" +#include "wels_func_ptr_def.h" + +namespace WelsEnc { +void WelsDctMb (int16_t* pRs, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4); + +void WelsEncRecI16x16Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache); +void WelsEncRecI4x4Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, uint8_t uiI4x4Idx); +void WelsEncInterY (SWelsFuncPtrList* func, SMB* pCurMb, SMbCache* pMbCache); +void WelsEncRecUV (SWelsFuncPtrList* func, SMB* pCurMb, SMbCache* pMbCache, int16_t* pRs, int32_t iUV); +void WelsRecPskip (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SMB* pCurMb, SMbCache* pMbCache); + +bool WelsTryPYskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache); +bool WelsTryPUVskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, int32_t iUV); +} +#endif + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_encode_slice.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_encode_slice.h new file mode 100644 index 000000000..a1de2c554 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_encode_slice.h @@ -0,0 +1,184 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_encode_slice.h + * + * \brief svc encoding slice + * + * \date 2009.07.27 Created + * + ************************************************************************************* + */ +#ifndef SVC_ENCODE_SLICE_H__ +#define SVC_ENCODE_SLICE_H__ + +#include "encoder_context.h" +#include "as264_common.h" +#include "svc_enc_macroblock.h" +#include "mb_cache.h" + +namespace WelsEnc { +#if defined(MB_TYPES_CHECK) +void WelsCountMbType (int32_t (*iMbCount)[18], const EWelsSliceType eSt, const SMB* pMb); +#endif + +void UpdateMbNeighbor(SDqLayer* pCurDq, SMB* pMb, const int32_t kiMbWidth, uint16_t uiSliceIdc); + +void UpdateNonZeroCountCache (SMB* pMb, SMbCache* pMbCache); + +//for P SSlice (intra part + inter part, MB level) +void OutputPMbWithoutConstructCsRsNoCopy (sWelsEncCtx* pEncCtx, SDqLayer* pDq, SSlice* pSlice, SMB* pMb); + +void WelsSliceHeaderScalExtInit (SDqLayer* pCurLayer, SSlice* pSlice); +void WelsSliceHeaderExtInit (sWelsEncCtx* pEncCtx, SDqLayer* pCurLayer, SSlice* pSlice); + +void WelsSliceHeaderWrite (SBitStringAux* pBs, SDqLayer* pCurLayer, SSlice* pSlice, uint32_t uiPpsIdBasis); +void WelsSliceHeaderExtWrite (SBitStringAux* pBs, SDqLayer* pCurLayer, SSlice* pSlice, uint32_t uiPpsIdBasis); + +//===================MB-leve encode====================// +void WelsInterMbEncode (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb); //only for inter part +//for I SSlice (only intra part, MB level) +void WelsIMbChromaEncode (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache); +//for P SSlice (intra part + inter part, MB level) +void WelsPMbChromaEncode (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb); + + +//===================MB-level encode====================// +//encapsulation func: store base rec, highest Dependency Layer(only one quality) rec, single layer rec +int32_t WelsPSliceMdEnc (sWelsEncCtx* pEncCtx, SSlice* pSlice, const bool kbIsHighestDlayerFlag); +int32_t WelsPSliceMdEncDynamic (sWelsEncCtx* pEncCtx, SSlice* pSlice, const bool kbIsHighestDlayerFlag); + +//encapsulation func: store base rec, highest Dependency Layer(only one quality) rec, single layer rec +int32_t WelsISliceMdEnc (sWelsEncCtx* pEncCtx, SSlice* pSlice); // for intra non-dynamic slice +int32_t WelsISliceMdEncDynamic (sWelsEncCtx* pEncCtx, SSlice* pSlice); // for intra dynamic slice + +//slice buffer init, allocate/re-allocate and free process +int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa); +void FreeMbCache (SMbCache* pMbCache, CMemoryAlign* pMa); + +int32_t InitSliceBoundaryInfo (SDqLayer* pCurLayer, + SSliceArgument* pSliceArgument, + const int32_t kiSliceNumInFrame); + +int32_t SetSliceBoundaryInfo(SDqLayer* pCurLayer, SSlice* pSlice, const int32_t kiSliceIdx); + +int32_t AllocateSliceMBBuffer (SSlice* pSlice, CMemoryAlign* pMa); + +int32_t InitSliceBsBuffer (SSlice* pSlice, + SBitStringAux* pBsWrite, + bool bIndependenceBsBuffer, + const int32_t iMaxSliceBufferSize, + CMemoryAlign* pMa); + +void FreeSliceBuffer (SSlice*& pSliceList, + const int32_t kiMaxSliceNum, + CMemoryAlign* pMa, + const char* kpTag); + +void InitSliceHeadWithBase (SSlice* pSlice, SSlice* pBaseSlice); + +void InitSliceRefInfoWithBase (SSlice* pSlice, SSlice* pBaseSlice, const uint8_t kuiRefCount); + +int32_t InitSliceList (SSlice*& pSliceList, + SBitStringAux* pBsWrite, + const int32_t kiMaxSliceNum, + const int32_t kiMaxSliceBufferSize, + const bool bIndependenceBsBuffer, + CMemoryAlign* pMa); + +int32_t InitAllSlicesInThread (sWelsEncCtx* pCtx); + +int32_t InitOneSliceInThread (sWelsEncCtx* pCtx, + SSlice*& pSlice, + const int32_t kiSlcBuffIdx, + const int32_t kiDlayerIdx, + const int32_t kiSliceIdx); + +int32_t InitSliceInLayer (sWelsEncCtx* pCtx, + SDqLayer* pDqLayer, + const int32_t kiDlayerIndex, + CMemoryAlign* pMa); + +int32_t ReallocateSliceList (sWelsEncCtx* pCtx, + SSliceArgument* pSliceArgument, + SSlice*& pSliceList, + const int32_t kiMaxSliceNumOld, + const int32_t kiMaxSliceNumNew); + +int32_t ReallocateSliceInThread (sWelsEncCtx* pCtx, + SDqLayer* pDqLayer, + const int32_t kiDlayerIdx, + const int32_t KiSlcBuffIdx); + +int32_t ReallocSliceBuffer (sWelsEncCtx* pCtx); + +int32_t GetCurLayerNalCount(const SDqLayer* pCurDq, const int32_t kiCodedSliceNum); +int32_t GetTotalCodedNalCount(SFrameBSInfo* pFbi); + +int32_t FrameBsRealloc (sWelsEncCtx* pCtx, + SFrameBSInfo* pFrameBsInfo, + SLayerBSInfo* pLayerBsInfo, + const int32_t kiMaxSliceNumOld); + +int32_t ReOrderSliceInLayer(sWelsEncCtx* pCtx, + const SliceModeEnum kuiSliceMode, + const int32_t kiThreadNum); + +int32_t SliceLayerInfoUpdate (sWelsEncCtx* pCtx, + SFrameBSInfo* pFrameBsInfo, + SLayerBSInfo* pLayerBsInfo, + const SliceModeEnum kuiSliceMode); + +//slice encoding process +int32_t WelsCodePSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice); +int32_t WelsCodePOverDynamicSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice); + +int32_t WelsCodeOneSlice (sWelsEncCtx* pEncCtx, SSlice* pCurSlice, + const int32_t keNalType); + +void WelsInitSliceEncodingFuncs (uint32_t uiCpuFlag); + +void UpdateMbNeighbourInfoForNextSlice (SDqLayer* pCurDq, + SMB* pMbList, + const int32_t kiNextSliceFirstMbIdx, + const int32_t kiLastMbIdxInPartition); +void AddSliceBoundary (sWelsEncCtx* pEncCtx, SSlice* pCurSlice, SSliceCtx* pSliceCtx, SMB* pCurMb, + int32_t iNextSliceFirstMbIdx, const int32_t kiLastMbIdxInPartition); +int32_t WelsMdInterMbLoop (sWelsEncCtx* pEncCtx, SSlice* pSlice, void* pMd, + const int32_t kiSliceFirstMbXY); // for inter non-dynamic slice +int32_t WelsMdInterMbLoopOverDynamicSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice, void* pMd, + const int32_t kiSliceFirstMbXY); // for inter dynamic slice + + +bool DynSlcJudgeSliceBoundaryStepBack (void* pEncCtx, void* pSlice, SSliceCtx* pSliceCtx, SMB* pCurMb, + SDynamicSlicingStack* pDss); +} +#endif //SVC_ENCODE_SLICE_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_mode_decision.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_mode_decision.h new file mode 100644 index 000000000..c45c80151 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_mode_decision.h @@ -0,0 +1,94 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_mode_decision.h + * + * \brief SVC Spatial Enhancement Layer MD + * + * \date 2009.7.29 Created + * + ************************************************************************************* + */ + +#ifndef SVC_MODE_DECISION_H +#define SVC_MODE_DECISION_H +#include "encoder_context.h" +#include "svc_encode_mb.h" +#include "svc_encode_slice.h" +#include "svc_enc_macroblock.h" +#include "md.h" + + +namespace WelsEnc { +//////////////////////// +// INTERFACE, called by svc_encode_slice.c +/////////////////////// +#define DELTA_QP_SCD_THD 5 + +typedef enum { +STATIC, +SCROLLED +} ESkipModes; + +// NOILP ILFMD ENTRANCE +void WelsMdSpatialelInterMbIlfmdNoilp (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, + const Mb_Type kuiRefMbType); +void WelsMdInterMbEnhancelayer (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache); +SMB* GetRefMb (SDqLayer* pCurLayer, SMB* pCurMb); +void SetMvBaseEnhancelayer (SWelsMD* pMd, SMB* pCurMb, const SMB* kpRefMb); + +////////////// +// MD from background detection +////////////// +bool WelsMdInterJudgeBGDPskip (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + bool* bKeepSkip); +bool WelsMdInterJudgeBGDPskipFalse (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + bool* bKeepSkip); + +void WelsMdUpdateBGDInfo (SDqLayer* pCurLayer, SMB* pCurMb, const bool kbCollocatedPredFlag, + const int32_t kiRefPictureType); +void WelsMdUpdateBGDInfoNULL (SDqLayer* pCurLayer, SMB* pCurMb, const bool kbCollocatedPredFlag, + const int32_t kiRefPictureType); + +////////////// +// MD for screen contents +////////////// +bool MdInterSCDPskipProcess (sWelsEncCtx* pEncCtx, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + ESkipModes eSkipMode); +typedef bool (*pJudgeSkipFun) (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, SWelsMD* pWelsMd); +void SetBlockStaticIdcToMd (void* pVaa, void* pMd, SMB* pCurMb, void* pDqLay); +void WelsInitSCDPskipFunc (SWelsFuncPtrList* pFuncList, const bool bScrollingDetection); + +void SetScrollingMvToMd (SVAAFrameInfo* pVaa, SWelsMD* pWelsMd); +void SetScrollingMvToMdNull (SVAAFrameInfo* pVaa, SWelsMD* pWelsMd); +} +#endif //SVC_MODE_DECISION_H + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_motion_estimate.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_motion_estimate.h new file mode 100644 index 000000000..dbfbc3c28 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_motion_estimate.h @@ -0,0 +1,359 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc motion estimate.h + * + * \brief Interfaces introduced in svc mb motion estimation + * + * \date 08/11/2009 Created + * + ************************************************************************************* + */ +#ifndef SVC_MOTION_ESTIMATE_ +#define SVC_MOTION_ESTIMATE_ + +#include "typedefs.h" +#include "encoder_context.h" +#include "wels_func_ptr_def.h" + +namespace WelsEnc { +#define CAMERA_STARTMV_RANGE (64) +#define ITERATIVE_TIMES (16) +#define CAMERA_MV_RANGE (CAMERA_STARTMV_RANGE+ITERATIVE_TIMES) +#define CAMERA_MVD_RANGE ((CAMERA_MV_RANGE+1)<<1) //mvd=mv_range*2; +#define BASE_MV_MB_NMB ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1) +#define CAMERA_HIGHLAYER_MVD_RANGE (243)//mvd range; +#define EXPANDED_MV_RANGE (504) //=512-8 rather than 511 to sacrifice same edge point but save complexity in assemblys +#define EXPANDED_MVD_RANGE ((504+1)<<1) + +enum { +ME_DIA = 0x01, // LITTLE DIAMOND= 0x01 +ME_CROSS = 0x02, // CROSS= 0x02 +ME_FME = 0x04, // FME = 0x04 +ME_FULL = 0x10, // FULL + +// derived ME methods combination +ME_DIA_CROSS = (ME_DIA | ME_CROSS), // DIA+CROSS +ME_DIA_CROSS_FME = (ME_DIA_CROSS | ME_FME) // DIA+CROSS+FME +}; + +union SadPredISatdUnit { +uint32_t uiSadPred; +uint32_t uiSatd; //reuse the sad_pred as a temp satd pData +}; +typedef struct TagWelsME { +/* input */ +uint16_t* pMvdCost; +union SadPredISatdUnit uSadPredISatd; //reuse the sad_pred as a temp pData +uint32_t +uiSadCost; //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535 +uint32_t uiSatdCost; /* satd + lm * nbits */ +uint32_t uiSadCostThreshold; +int32_t iCurMeBlockPixX; +int32_t iCurMeBlockPixY; +uint8_t uiBlockSize; /* BLOCK_WxH */ +uint8_t uiReserved; + +uint8_t* pEncMb; +uint8_t* pRefMb; +uint8_t* pColoRefMb; + +SMVUnitXY sMvp; +SMVUnitXY sMvBase; +SMVUnitXY sDirectionalMv; + +SScreenBlockFeatureStorage* pRefFeatureStorage; + +/* output */ +SMVUnitXY sMv; +} SWelsME; + +typedef struct TagFeatureSearchIn { +PSampleSadSatdCostFunc pSad; + +uint32_t* pTimesOfFeature; +uint16_t** pQpelLocationOfFeature; +uint16_t* pMvdCostX; +uint16_t* pMvdCostY; + +uint8_t* pEnc; +uint8_t* pColoRef; +int32_t iEncStride; +int32_t iRefStride; +uint16_t uiSadCostThresh; + +int32_t iFeatureOfCurrent; + +int32_t iCurPixX; +int32_t iCurPixY; +int32_t iCurPixXQpel; +int32_t iCurPixYQpel; + +int32_t iMinQpelX; +int32_t iMinQpelY; +int32_t iMaxQpelX; +int32_t iMaxQpelY; +} SFeatureSearchIn; + +typedef struct TagFeatureSearchOut { +SMVUnitXY sBestMv; +uint32_t uiBestSadCost; +uint8_t* pBestRef; +} SFeatureSearchOut; + +#define COST_MVD(table, mx, my) (table[mx] + table[my]) +extern const int32_t QStepx16ByQp[52]; + +// Function definitions below + +void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent); + +/*! + * \brief BL mb motion estimate search + * + * \param enc Wels encoder context + * \param m Wels me information + * + * \return NONE + */ +void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice); +void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice); +void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice); +/*! + * \brief BL mb motion estimate initial point testing + * + * \param enc Wels encoder context + * \param m Wels me information + * \param mv_range search range in motion estimate + * \param point the best match point in motion estimation + * + * \return NONE + */ + + +/*! + * \brief EL mb motion estimate initial point testing + * + * \param pix_func SSampleDealingFunc + * \param m Wels me information + * \param mv_range search range in motion estimate + * \param point the best match point in motion estimation + * + * \return NONE + */ + +bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, + const int32_t kiStrideEnc, const int32_t kiStrideRef); + +/*! + * \brief mb iterative motion estimate search + * + * \param enc Wels encoder context + * \param m Wels me information + * \param point the best match point in motion estimation + * + * \return NONE + */ +void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, + const int32_t kiRefStride); + +bool WelsMeSadCostSelect (int32_t* pSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx, + const int32_t kiDy, int32_t* pIx, int32_t* pIy); + +void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride, const int32_t kiRefStride); +void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride, + const int32_t kiRefStride); +bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe, + const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, + int32_t& iBestSadCost); +bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* pMe, + const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, + int32_t& iBestSadCost); + +// Cross Search Basics +void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe, + uint16_t* pMvdTable, + const int32_t kiEncStride, const int32_t kiRefStride, + const int16_t kiMinMv, const int16_t kiMaxMv, + const bool bVerticalSearch); +#ifdef X86_ASM +extern "C" +{ +uint32_t SampleSad8x8Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*); +uint32_t SampleSad16x16Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*); +} + +void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe, + uint16_t* pMvdTable, + const int32_t kiEncStride, const int32_t kiRefStride, + const int16_t kiMinMv, const int16_t kiMaxMv, + const bool bVerticalSearch); +void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe, + uint16_t* pMvdTable, + const int32_t kiEncStride, const int32_t kiRefStride, + const int16_t kiMinMv, const int16_t kiMaxMv, + const bool bVerticalSearch); +#endif +void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, + const int32_t kiEncStride, const int32_t kiRefStride); +void WelsDiamondCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, + const int32_t kiEncStride, const int32_t kiRefStride); + +// Feature Search Basics +#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1) +#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1) +#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2 + +#define FME_DEFAULT_FEATURE_INDEX (0) +#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2) +#define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set. + +void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, + uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); +void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, + uint16_t** pFeatureValuePointerList); +int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride); +int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride); +void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); + +#ifdef X86_ASM +extern "C" +{ +void InitializeHashforFeature_sse2 (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, + uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); +void FillQpelLocationByFeatureValue_sse2 (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, + uint16_t** pFeatureValuePointerList); +int32_t SumOf8x8SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride); +int32_t SumOf16x16SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride); +void SumOf8x8BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +void SumOf16x16BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +void SumOf8x8BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +void SumOf16x16BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +} +#endif +#ifdef HAVE_NEON +extern "C" +{ +void InitializeHashforFeature_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, + uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); +void FillQpelLocationByFeatureValue_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, + uint16_t** pFeatureValuePointerList); +int32_t SumOf8x8SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride); +int32_t SumOf16x16SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride); +void SumOf8x8BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +void SumOf16x16BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +} +#endif + +#ifdef HAVE_NEON_AARCH64 +extern "C" +{ +void InitializeHashforFeature_AArch64_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, + uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); +void FillQpelLocationByFeatureValue_AArch64_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, + uint16_t** pFeatureValuePointerList); +int32_t SumOf8x8SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); +int32_t SumOf16x16SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); +void SumOf8x8BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +void SumOf16x16BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +} +#endif +int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, + const int32_t iNeedFeatureStorage, + SScreenBlockFeatureStorage* pScreenBlockFeatureStorage); +int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage); +int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, + const int32_t iNeedFeatureStorage, + SFeatureSearchPreparation* pFeatureSearchPreparation); +int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock); + +#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2) +#define FME_DEFAULT_FEATURE_INDEX (0) + + +void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock, + SScreenBlockFeatureStorage* pScreenBlockFeatureStorage); +bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc, const SWelsME& sMe, + const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage, + const int32_t kiEncStride, const int32_t kiRefStride, + SFeatureSearchIn* pFeatureSearchIn); +void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn, + const uint32_t kuiMaxSearchPoint, + SWelsME* pMe); +void UpdateFMESwitch (SDqLayer* pCurLayer); +void UpdateFMESwitchNull (SDqLayer* pCurLayer); + +void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, + const int32_t kiEncStride, const int32_t kiRefStride); + +//inline functions +inline void SetMvWithinIntegerMvRange (const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, + const int32_t kiMbY, + const int32_t kiMaxMvRange, + SMVUnitXY* pMvMin, SMVUnitXY* pMvMax) { +pMvMin->iMvX = WELS_MAX (-1 * ((kiMbX + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange); +pMvMin->iMvY = WELS_MAX (-1 * ((kiMbY + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange); +pMvMax->iMvX = WELS_MIN (((kiMbWidth - kiMbX) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange); +pMvMax->iMvY = WELS_MIN (((kiMbHeight - kiMbY) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange); +} + +inline bool CheckMvInRange (const SMVUnitXY ksCurrentMv, const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv) { +return (CheckInRangeCloseOpen (ksCurrentMv.iMvX, ksMinMv.iMvX, ksMaxMv.iMvX) + && CheckInRangeCloseOpen (ksCurrentMv.iMvY, ksMinMv.iMvY, ksMaxMv.iMvY)); +} +//FME switch related +inline bool CalcFMESwitchFlag (const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage, + const int32_t iAvgMbSAD, const bool bScrollingDetected) { +return (bScrollingDetected || (uiFMEGoodFrameCount > 0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD)); +//TODO: add the logic of iHighFreMbPrecentage +//return ( iHighFreMbPrecentage > 2 +// && ( bScrollingDetected || iHighFreMbPrecentage >15 +// ||( uiFMEGoodFrameCount>0 && iFrameSAD > FMESWITCH_FRAMESAD_THRESHOLD ) ) ); +} +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_set_mb_syn.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_set_mb_syn.h new file mode 100644 index 000000000..6c7c87b60 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_set_mb_syn.h @@ -0,0 +1,70 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_set_mb_syn.h + * + * \brief Seting all syntax elements of mb and encoding residual with cavlc and cabac + * + * \date 2009.8.12 Created + * + ************************************************************************************* + */ + +#ifndef SVC_SET_MB_SYN_H_ +#define SVC_SET_MB_SYN_H_ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "encoder_context.h" +#include "md.h" +#include "slice.h" +#include "set_mb_syn_cavlc.h" +#include "set_mb_syn_cabac.h" + +namespace WelsEnc { + + + +int32_t WelsWriteMbResidual (SWelsFuncPtrList* pFuncList, SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs); + +void WelsSpatialWriteSubMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb); + +void WelsSpatialWriteMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb); +void WelsInitSliceCabac(sWelsEncCtx* pEncCtx,SSlice* pSlice); +void WelsCabacInit(void *pCtx); +void WelsWriteSliceEndSyn(SSlice *pSlice,bool bEntropyCodingModeFlag); +//for Base Layer CAVLC writing +int32_t WelsSpatialWriteMbSyn (sWelsEncCtx* Ctx, SSlice* pSlice, SMB* pCurMb); +int32_t WelsSpatialWriteMbSynCabac (sWelsEncCtx* pCtx, SSlice* pSlice, SMB* pCurMb); +int32_t GetBsPosCavlc(SSlice *pSlice); +int32_t GetBsPosCabac(SSlice *pSlice); +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_set_mb_syn_cavlc.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_set_mb_syn_cavlc.h new file mode 100644 index 000000000..076fdb477 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/svc_set_mb_syn_cavlc.h @@ -0,0 +1,67 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_set_mb_syn_cavlc.h + * + * \brief Seting all syntax elements of mb and decoding residual with cavlc + * + * \date 2009.8.12 Created + * + ************************************************************************************* + */ +#ifndef SVC_SET_MB_SYN_CAVLC_H_ +#define SVC_SET_MB_SYN_CAVLC_H_ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "encoder_context.h" +#include "md.h" +#include "slice.h" +#include "set_mb_syn_cavlc.h" +#include "set_mb_syn_cabac.h" + +namespace WelsEnc { + + + +int32_t WelsWriteMbResidual (SWelsFuncPtrList* pFuncList, SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs); + +void WelsSpatialWriteSubMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb); + +void WelsSpatialWriteMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb); +void WelsInitSliceCabac(sWelsEncCtx* pEncCtx,SSlice* pSlice); +void WelsWriteSliceEndSyn(SSlice *pSlice,bool bEntropyCodingModeFlag); +//for Base Layer CAVLC writing +int32_t WelsSpatialWriteMbSyn (void* Ctx, SSlice* pSlice, SMB* pCurMb); +int32_t WelsSpatialWriteMbSynCabac (void* pCtx, SSlice* pSlice, SMB* pCurMb); + +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/vlc_encoder.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/vlc_encoder.h new file mode 100644 index 000000000..6b1a97b27 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/vlc_encoder.h @@ -0,0 +1,92 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef WELS_VLC_ENCODER_H__ +#define WELS_VLC_ENCODER_H__ + +#include "svc_enc_golomb.h" + +/************************************************************************/ +/* VLC FOR WELS ENCODER */ +/************************************************************************/ + +namespace WelsEnc { + +//g_kuiVlcCoeffToken[uiNc][total-coeff][trailing-ones][0--value, 1--bit count] +extern const uint8_t g_kuiVlcCoeffToken[5][17][4][2]; +extern const uint8_t g_kuiVlcLevelPrefix[15][2]; +//g_kuiVlcTotalZeros[tzVlcIndex][uiTotalZeros][0--value, 1--bit count] +extern const uint8_t g_kuiVlcTotalZeros[16][16][2]; +extern const uint8_t g_kuiVlcTotalZerosChromaDc[4][4][2]; +//add for mgs +extern const uint8_t g_kuiVlcTotalZerosChromaDc422[8][8][2]; +//g_kuiVlcRunBefore[zeros-left][run-before][0--value, 1--bit count] +extern const uint8_t g_kuiVlcRunBefore[8][15][2]; +extern const ALIGNED_DECLARE (uint8_t, g_kuiEncNcMapTable[18], 16); + +#define CHROMA_DC_NC_OFFSET 17 + +static inline int32_t WriteTotalCoeffTrailingones (SBitStringAux* pBs, uint8_t uiNc, uint8_t uiTotalCoeff, + uint8_t uiTrailingOnes) { +const uint8_t kuiNcIdx = g_kuiEncNcMapTable[uiNc]; +const uint8_t* kpCoeffToken = &g_kuiVlcCoeffToken[kuiNcIdx][uiTotalCoeff][uiTrailingOnes][0]; +return BsWriteBits (pBs, kpCoeffToken[1], kpCoeffToken[0]); +} + +static inline int32_t WriteTotalcoeffTrailingonesChroma (SBitStringAux* pBs, uint8_t uiTotalCoeff, + uint8_t uiTrailingOnes) { +const uint8_t* kpCoeffToken = &g_kuiVlcCoeffToken[4][uiTotalCoeff][uiTrailingOnes][0]; +return BsWriteBits (pBs, kpCoeffToken[1], kpCoeffToken[0]); +} + +//kuiZeroCount = level_prefix; +static inline int32_t WriteLevelPrefix (SBitStringAux* pBs, const uint32_t kuiZeroCount) { +BsWriteBits (pBs, kuiZeroCount + 1, 1); +return 0; +} + +static inline int32_t WriteTotalZeros (SBitStringAux* pBs, uint32_t uiTotalCoeff, uint32_t uiTotalZeros) { +const uint8_t* kpTotalZeros = &g_kuiVlcTotalZeros[uiTotalCoeff][uiTotalZeros][0]; +return BsWriteBits (pBs, kpTotalZeros[1], kpTotalZeros[0]); +} + +static inline int32_t WriteTotalZerosChromaDc (SBitStringAux* pBs, uint32_t uiTotalCoeff, uint32_t uiTotalZeros) { +const uint8_t* kpTotalZerosChromaDc = &g_kuiVlcTotalZerosChromaDc[uiTotalCoeff][uiTotalZeros][0]; +return BsWriteBits (pBs, kpTotalZerosChromaDc[1], kpTotalZerosChromaDc[0]); +} + +static inline int32_t WriteRunBefore (SBitStringAux* pBs, uint8_t uiZeroLeft, uint8_t uiRunBefore) { +const uint8_t* kpRunBefore = &g_kuiVlcRunBefore[uiZeroLeft][uiRunBefore][0]; +return BsWriteBits (pBs, kpRunBefore[1], kpRunBefore[0]); +} +} +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_common_basis.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_common_basis.h new file mode 100644 index 000000000..ec52169c8 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_common_basis.h @@ -0,0 +1,160 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_common_basis.h +#ifndef WELS_COMMON_BASIS_H__ +#define WELS_COMMON_BASIS_H__ + +#include "typedefs.h" +#include "macros.h" + +#include "wels_const.h" + +#include "wels_common_defs.h" + +using namespace WelsCommon; + +namespace WelsEnc { + +typedef int32_t WelsErrorType; + +struct SMVUnitXY { // each 4 Bytes + int16_t iMvX; + int16_t iMvY; + public: + SMVUnitXY& sDeltaMv (const SMVUnitXY& _v0, const SMVUnitXY& _v1) { + iMvX = _v0.iMvX - _v1.iMvX; + iMvY = _v0.iMvY - _v1.iMvY; + return (*this); + }; + SMVUnitXY& sAssignMv (const SMVUnitXY& _v0) { + iMvX = _v0.iMvX; + iMvY = _v0.iMvY; + return (*this); + }; +}; + +typedef struct TagMVComponentUnit { // each LIST_0/LIST_1 + SMVUnitXY sMotionVectorCache[5 * 6 - 1]; // Luma only: 5 x 6 - 1 = 29 D-Words + int8_t iRefIndexCache[5 * 6]; // Luma only: 5 x 6 = 30 bytes +} SMVComponentUnit, *PMVComponentUnit; + + +typedef struct TagParaSetOffsetVariable { + int32_t iParaSetIdDelta[MAX_DQ_LAYER_NUM/*+1*/];//mark delta between SPS_ID_in_bs and sps_id_in_encoder, can be minus, for each dq-layer +//need not extra +1 due no MGS and FMO case so far + bool bUsedParaSetIdInBs[MAX_PPS_COUNT]; //mark the used SPS_ID with 1 + uint32_t uiNextParaSetIdToUseInBs; //mark the next SPS_ID_in_bs, for all layers +} SParaSetOffsetVariable; + +typedef struct TagParaSetOffset { +//in PS0 design, "sParaSetOffsetVariable" record the previous paras before current IDR, AND NEED to be stacked and recover across IDR + SParaSetOffsetVariable + sParaSetOffsetVariable[PARA_SET_TYPE]; //PARA_SET_TYPE=3; paraset_type = 0: AVC_SPS; =1: Subset_SPS; =2: PPS +//in PSO design, "bPpsIdMappingIntoSubsetsps" uses the current para of current IDR period + bool + bPpsIdMappingIntoSubsetsps[MAX_DQ_LAYER_NUM/*+1*/]; // need not extra +1 due no MGS and FMO case so far + + int32_t iPpsIdList[MAX_DQ_LAYER_NUM][MAX_PPS_COUNT]; //index0: max pps types; index1: for differnt IDRs, if only index0=1, index1 can reach MAX_PPS_COUNT + +#if _DEBUG + int32_t eSpsPpsIdStrategy; +#endif + + uint32_t uiNeededSpsNum; + uint32_t uiNeededSubsetSpsNum; + uint32_t uiNeededPpsNum; + + uint32_t uiInUseSpsNum; + uint32_t uiInUseSubsetSpsNum; + uint32_t uiInUsePpsNum; +} SParaSetOffset; + + + +/* Position Offset structure */ +typedef struct TagCropOffset { + int16_t iCropLeft; + int16_t iCropRight; + int16_t iCropTop; + int16_t iCropBottom; +} SCropOffset; + + +/* Transform Type */ + +enum ETransType { + T_4x4 = 0, + T_8x8 = 1, + T_16x16 = 2, + T_PCM = 3 +}; + +enum EMbPosition { + LEFT_MB_POS = 0x01, // A + TOP_MB_POS = 0x02, // B + TOPRIGHT_MB_POS = 0x04, // C + TOPLEFT_MB_POS = 0x08, // D, + RIGHT_MB_POS = 0x10, // add followed four case to reuse when intra up-sample + BOTTOM_MB_POS = 0x20, // + BOTTOMRIGHT_MB_POS = 0x40, // + BOTTOMLEFT_MB_POS = 0x80, // + MB_POS_A = 0x100 +}; + +/* MB Type & Sub-MB Type */ +typedef uint32_t Mb_Type; + +#define MB_LEFT_BIT 0// add to use in intra up-sample +#define MB_TOP_BIT 1 +#define MB_TOPRIGHT_BIT 2 +#define MB_TOPLEFT_BIT 3 +#define MB_RIGHT_BIT 4 +#define MB_BOTTOM_BIT 5 +#define MB_BTMRIGHT_BIT 6 +#define MB_BTMLEFT_BIT 7 + +#define MB_TYPE_BACKGROUND 0x00010000 // conditional BG skip_mb + +enum { + Intra4x4 = 0, + Intra16x16 = 1, + Inter16x16 = 2, + Inter16x8 = 3, + Inter8x16 = 4, + Inter8x8 = 5, + PSkip = 6 +}; + + +} +#endif//WELS_COMMON_BASIS_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_const.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_const.h new file mode 100644 index 000000000..741ac73e6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_const.h @@ -0,0 +1,174 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +//wels_const.h +#ifndef WELS_CONST_H__ +#define WELS_CONST_H__ + +#include "as264_common.h" // to communicate with specific macros there, 3/18/2010 +#include "codec_app_def.h" +#include "wels_const_common.h" + +/* To control number of spatial, quality and temporal layers constraint by application layer? */ +#define NUM_SPATIAL_LAYERS_CONSTRAINT +#define NUM_QUALITY_LAYERS_CONSTRAINT + + +#define STATISTICS_LOG_INTERVAL_MS (5000) // output statistics log every 5s + +#define INTRA_4x4_MODE_NUM 8 +#define MB_LUMA_CHROMA_BLOCK4x4_NUM 24 + +#define MAX_PPS_COUNT_LIMITED 57// limit the max ID of PPS because of known limitation of receiver endpoints +#define MAX_PPS_COUNT (MAX_PPS_COUNT_LIMITED)//in Standard is 256 // Count number of PPS + +#define PARA_SET_TYPE 3 // SPS+PPS +#define PARA_SET_TYPE_AVCSPS 0 +#define PARA_SET_TYPE_SUBSETSPS 1 +#define PARA_SET_TYPE_PPS 2 + +#define MAX_VERTICAL_MV_RANGE 1024 //TODO, for allocate enough memory for transpose +#define MAX_FRAME_RATE 60 // maximal frame rate to support +#define MIN_FRAME_RATE 1 // minimal frame rate need support + +#define MAX_BIT_RATE INT_MAX // maximal bit rate to support +//TODO {Sijia}: 30fps*MaxCPB in level5.1 = 30*240000*1000bits = 7 200 000 000, larger than INT_MAX which is 2147483647, but this is also very big and abnormal number, should figure out a reasonable number after discussion +#define MIN_BIT_RATE 1 // minimal bit rate need support + +#define SVC_QUALITY_BASE_QP 26 +#define MAX_SLICEGROUP_IDS 8 // Count number of SSlice Groups +#define MAX_THREADS_NUM 4 // assume to support up to 4 logical cores(threads) + +#define INTPEL_NEEDED_MARGIN (3) // for safe sub-pel MC + +#define I420_PLANES 3 + +#define COMPRESS_RATIO_THR (1.0f) //set to size of the original data, which will be large enough considering MinCR + +#if !defined(SSEI_BUFFER_SIZE) +#define SSEI_BUFFER_SIZE 128 +#endif//SSEI_BUFFER_SIZE + +#if !defined(SPS_BUFFER_SIZE) +#define SPS_BUFFER_SIZE 32 +#endif//SPS_BUFFER_SIZE + +#if !defined(PPS_BUFFER_SIZE) +#define PPS_BUFFER_SIZE 16 +#endif//PPS_BUFFER_SIZE + +#if !defined(MAX_MACROBLOCK_SIZE_IN_BYTE) +#define MAX_MACROBLOCK_SIZE_IN_BYTE 400 //3200/8, 3200 is from Annex A.3.1.(n) +#endif + +#define MAX_MACROBLOCK_SIZE_IN_BYTE_x2 (MAX_MACROBLOCK_SIZE_IN_BYTE<<1) + +#if defined(NUM_SPATIAL_LAYERS_CONSTRAINT) +#define MAX_DEPENDENCY_LAYER MAX_SPATIAL_LAYER_NUM // Maximal dependency layer +#else +#define MAX_DEPENDENCY_LAYER 8 // Maximal dependency layer +#endif//NUM_SPATIAL_LAYERS_CONSTRAINT + +//The max temporal level support is equal or less than MAX_TEMPORAL_LAYER_NUM defined @ codec_app_def.h +#define MAX_TEMPORAL_LEVEL MAX_TEMPORAL_LAYER_NUM // Maximal temporal level + +#if defined(NUM_QUALITY_LAYERS_CONSTRAINT) +#define MAX_QUALITY_LEVEL MAX_QUALITY_LAYER_NUM // Maximal quality level +#else +#define MAX_QUALITY_LEVEL 16 // Maximal quality level +#endif//NUM_QUALITY_LAYERS_CONSTRAINT + +#if defined(MAX_GOP_SIZE) +#undef MAX_GOP_SIZE +#endif//MAX_GOP_SIZE +#define MAX_GOP_SIZE (1<<(MAX_TEMPORAL_LEVEL-1)) + +#define MAX_SHORT_REF_COUNT (MAX_GOP_SIZE>>1) // 16 in standard, maximal count number of short reference pictures +#define LONG_TERM_REF_NUM 2 +#define LONG_TERM_REF_NUM_SCREEN 4 +#define MAX_REF_PIC_COUNT 16 // 32 in standard, maximal Short + Long reference pictures +#define MIN_REF_PIC_COUNT 1 // minimal count number of reference pictures, 1 short + 2 key reference based? +#define MAX_MULTI_REF_PIC_COUNT 1 //maximum multi-reference number +//#define TOTAL_REF_MINUS_HALF_GOP 1 // last t0 in last gop +#define MAX_MMCO_COUNT 66 + +// adjusted numbers reference picture functionality related definition +#define MAX_REFERENCE_MMCO_COUNT_NUM 4 // adjusted MAX_MMCO_COUNT(66 in standard) definition per encoder design +#define MAX_REFERENCE_REORDER_COUNT_NUM 2 // adjusted MAX_REF_PIC_COUNT(32 in standard) for reference reordering definition per encoder design +#define MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA (MAX_SHORT_REF_COUNT+LONG_TERM_REF_NUM) // <= MAX_REF_PIC_COUNT, memory saved if < +#define MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN (MAX_SHORT_REF_COUNT+LONG_TERM_REF_NUM_SCREEN) // <= MAX_REF_PIC_COUNT, memory saved if < + +#define BASE_DEPENDENCY_ID 0 +#define MAX_DQ_LAYER_NUM (MAX_DEPENDENCY_LAYER/**MAX_QUALITY_LEVEL*/) + +#define INVALID_ID (-1) + +#define NAL_HEADER_ADD_0X30BYTES 20 + +#define SLICE_NUM_EXPAND_COEF 2 + +enum { +BLOCK_16x16 = 0, +BLOCK_16x8 = 1, +BLOCK_8x16 = 2, +BLOCK_8x8 = 3, +BLOCK_4x4 = 4, +BLOCK_8x4 = 5, +BLOCK_4x8 = 6, +BLOCK_SIZE_ALL = 7 +}; + +typedef enum { +RECIEVE_UNKOWN = 0, +RECIEVE_SUCCESS = 1, +RECIEVE_FAILED = 2 +} LTR_MARKING_RECEIVE_STATE; + +enum { + CUR_AU_IDX = 0, // index symbol for current access unit + SUC_AU_IDX = 1 // index symbol for successive access unit +}; + +enum { + ENC_RETURN_SUCCESS = 0, + ENC_RETURN_MEMALLOCERR = 0x01, //will free memory and uninit + ENC_RETURN_UNSUPPORTED_PARA = 0x02, //unsupported setting + ENC_RETURN_UNEXPECTED = 0x04, //unexpected value + ENC_RETURN_CORRECTED = 0x08, //unexpected value but corrected by encoder + ENC_RETURN_INVALIDINPUT = 0x10, //invalid input + ENC_RETURN_MEMOVERFLOWFOUND = 0x20, + ENC_RETURN_VLCOVERFLOWFOUND = 0x40, + ENC_RETURN_KNOWN_ISSUE = 0x80 +}; +//TODO: need to complete the return checking in encoder and fill in more types if needed + +#endif//WELS_CONST_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_func_ptr_def.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_func_ptr_def.h new file mode 100644 index 000000000..81d9a3723 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_func_ptr_def.h @@ -0,0 +1,300 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +// wels_func_ptr_def.h +#ifndef WELS_ENCODER_FUNCTION_POINTERS_DEFINITION_H_ +#define WELS_ENCODER_FUNCTION_POINTERS_DEFINITION_H_ + +#include "typedefs.h" +#include "wels_common_basis.h" +#include "svc_enc_macroblock.h" +#include "mb_cache.h" +#include "slice.h" +#include "svc_enc_slice_segment.h" +#include "svc_enc_frame.h" +#include "expand_pic.h" +#include "rc.h" +#include "IWelsVP.h" +#include "mc.h" + +namespace WelsEnc { + +typedef struct TagWelsEncCtx sWelsEncCtx; +typedef struct TagWelsFuncPointerList SWelsFuncPtrList; +typedef struct TagVAAFrameInfo SVAAFrameInfo; + +typedef struct TagWelsME SWelsME; +typedef struct TagWelsMD SWelsMD; + +typedef void (*PSetMemoryZero) (void* pDst, int32_t iSize); +typedef void (*PDctFunc) (int16_t* pDct, uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2); + +typedef void (*PCopyFunc) (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +typedef void (*PIDctFunc) (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pRes); +typedef void (*PDeQuantizationFunc) (int16_t* pRes, const uint16_t* kpQpTable); +typedef void (*PDeQuantizationHadamardFunc) (int16_t* pRes, const uint16_t kuiMF); +typedef int32_t (*PGetNoneZeroCountFunc) (int16_t* pLevel); + +typedef void (*PScanFunc) (int16_t* pLevel, int16_t* pDct); +typedef int32_t (*PCalculateSingleCtrFunc) (int16_t* pDct); + +typedef void (*PTransformHadamard4x4Func) (int16_t* pLumaDc, int16_t* pDct); +typedef void (*PQuantizationFunc) (int16_t* pDct, const int16_t* pFF, const int16_t* pMF); +typedef void (*PQuantizationMaxFunc) (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax); +typedef void (*PQuantizationDcFunc) (int16_t* pDct, int16_t iFF, int16_t iMF); +typedef int32_t (*PQuantizationSkipFunc) (int16_t* pDct, int16_t iFF, int16_t iMF); +typedef int32_t (*PQuantizationHadamardFunc) (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, + int16_t* pBlock); + +typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc); +typedef void (*PLumaDeblockingEQ4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +typedef void (*PChromaDeblockingLT4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha, + int32_t iBeta, int8_t* iTc); +typedef void (*PChromaDeblockingEQ4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha, + int32_t iBeta); +typedef void (*PDeblockingBSCalc) (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType, + int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag); +typedef void (*PDeblockingFilterSlice) (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice); + +typedef struct tagDeblockingFunc { + PLumaDeblockingLT4Func pfLumaDeblockingLT4Ver; + PLumaDeblockingEQ4Func pfLumaDeblockingEQ4Ver; + PLumaDeblockingLT4Func pfLumaDeblockingLT4Hor; + PLumaDeblockingEQ4Func pfLumaDeblockingEQ4Hor; + + PChromaDeblockingLT4Func pfChromaDeblockingLT4Ver; + PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Ver; + PChromaDeblockingLT4Func pfChromaDeblockingLT4Hor; + PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor; + + PDeblockingBSCalc pfDeblockingBSCalc; + + PDeblockingFilterSlice pfDeblockingFilterSlice; +} DeblockingFunc; + +typedef void (*PSetNoneZeroCountZeroFunc) (int8_t* pNonZeroCount); + +typedef int32_t (*PIntraFineMdFunc) (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache); +typedef void (*PInterFineMdFunc) (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, int32_t bestCost); +typedef bool (*PInterMdFirstIntraModeFunc) (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache); + +typedef void (*PFillInterNeighborCacheFunc) (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag); +typedef void (*PAccumulateSadFunc) (uint32_t* pSumDiff, int32_t* pGomForegroundBlockNum, int32_t* iSad8x8, + int8_t* pVaaBgMbFlag);//for RC +typedef bool (*PDynamicSlicingStepBackFunc) (sWelsEncCtx* pEncCtx, SSlice* pSlice, SSliceCtx* pSliceCtx, SMB* pCurMb, + SDynamicSlicingStack* pDynamicSlicingStack); // 2010.8.17 + +typedef bool (*PInterMdBackgroundDecisionFunc) (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, + SMbCache* pMbCache, bool* pKeepPskip); +typedef void (*PMdBackgroundInfoUpdateFunc) (SDqLayer* pCurLayer, SMB* pCurMb, const bool bFlag, + const int32_t kiRefPictureType); + +typedef bool (*PInterMdScrollingPSkipDecisionFunc) (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, + SMbCache* pMbCache); +typedef void (*PSetScrollingMv) (SVAAFrameInfo* pVaa, SWelsMD* pMd); + +typedef void (*PInterMdFunc) (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, SMbCache* pMbCache); + +typedef int32_t (*PSampleSadSatdCostFunc) (uint8_t*, int32_t, uint8_t*, int32_t); +typedef void (*PSample4SadCostFunc) (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +typedef int32_t (*PIntraPred4x4Combined3Func) (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, + int32_t, int32_t); +typedef int32_t (*PIntraPred16x16Combined3Func) (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +typedef int32_t (*PIntraPred8x8Combined3Func) (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, + uint8_t*, uint8_t*); + +typedef uint32_t (*PSampleSadHor8Func) (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*); +typedef void (*PMotionSearchFunc) (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe, + SSlice* pSlice); +typedef void (*PSearchMethodFunc) (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, + const int32_t kiRefStride); +typedef void (*PCalculateSatdFunc) (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride, + const int32_t kiRefStride); +typedef bool (*PCheckDirectionalMv) (PSampleSadSatdCostFunc pSad, SWelsME* pMe, + const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, + int32_t& iBestSadCost); +typedef void (*PLineFullSearchFunc) (SWelsFuncPtrList* pFuncList, SWelsME* pMe, + uint16_t* pMvdTable, + const int32_t kiEncStride, const int32_t kiRefStride, + const int16_t kiMinMv, const int16_t kiMaxMv, + const bool bVerticalSearch); +typedef void (*PInitializeHashforFeatureFunc) (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, + uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); +typedef void (*PFillQpelLocationByFeatureValueFunc) (uint16_t* pFeatureOfBlock, const int32_t kiWidth, + const int32_t kiHeight, + uint16_t** pFeatureValuePointerList); +typedef void (*PCalculateBlockFeatureOfFrame) (uint8_t* pRef, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +typedef int32_t (*PCalculateSingleBlockFeature) (uint8_t* pRef, const int32_t kiRefStride); +typedef void (*PUpdateFMESwitch) (SDqLayer* pCurLayer); + +#define MAX_BLOCK_TYPE BLOCK_SIZE_ALL +typedef struct TagSampleDealingFunc { + PSampleSadSatdCostFunc pfSampleSad[MAX_BLOCK_TYPE]; + PSampleSadSatdCostFunc pfSampleSatd[MAX_BLOCK_TYPE]; + PSample4SadCostFunc pfSample4Sad[MAX_BLOCK_TYPE]; + PIntraPred4x4Combined3Func pfIntra4x4Combined3Satd; + PIntraPred16x16Combined3Func pfIntra16x16Combined3Satd; + PIntraPred16x16Combined3Func pfIntra16x16Combined3Sad; + PIntraPred8x8Combined3Func pfIntra8x8Combined3Satd; + PIntraPred8x8Combined3Func pfIntra8x8Combined3Sad; + + PSampleSadSatdCostFunc* pfMdCost; + PSampleSadSatdCostFunc* pfMeCost; + PIntraPred16x16Combined3Func pfIntra16x16Combined3; + PIntraPred8x8Combined3Func pfIntra8x8Combined3; + PIntraPred4x4Combined3Func pfIntra4x4Combined3; +} SSampleDealingFunc; +typedef void (*PGetIntraPredFunc) (uint8_t* pPrediction, uint8_t* pRef, const int32_t kiStride); + +typedef int32_t (*PGetVarianceFromIntraVaaFunc) (uint8_t* pSampelY, const int32_t kiStride); +typedef uint8_t (*PGetMbSignFromInterVaaFunc) (int32_t* pSad8x8); +typedef void (*PUpdateMbMvFunc) (SMVUnitXY* pMvUnit, const SMVUnitXY ksMv); + +typedef bool (*PBuildRefListFunc) (sWelsEncCtx* pCtx, const int32_t iPOC, int32_t iBestLtrRefIdx); +typedef void (*PMarkPicFunc) (sWelsEncCtx* pCtx); +typedef bool (*PUpdateRefListFunc) (sWelsEncCtx* pCtx); +typedef void (*PEndofUpdateRefListFunc) (sWelsEncCtx* pCtx); +typedef void (*PAfterBuildRefListFunc) (sWelsEncCtx* pCtx); + +typedef int32_t (*PCavlcParamCalFunc) (int16_t* pCoff, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs, + int32_t iEndIdx); +typedef int32_t (*PWelsSpatialWriteMbSyn) (sWelsEncCtx* pCtx, SSlice* pSlice, SMB* pCurMb); +typedef void (*PStashMBStatus) (SDynamicSlicingStack* pDss, SSlice* pSlice, int32_t iMbSkipRun); +typedef int32_t (*PStashPopMBStatus) (SDynamicSlicingStack* pDss, SSlice* pSlice); +typedef int32_t (*PGetBsPosition)(SSlice *pSlice); +class IWelsParametersetStrategy; + +struct TagWelsFuncPointerList { + SExpandPicFunc sExpandPicFunc; + PFillInterNeighborCacheFunc pfFillInterNeighborCache; + + PGetVarianceFromIntraVaaFunc pfGetVarianceFromIntraVaa; + PGetMbSignFromInterVaaFunc pfGetMbSignFromInterVaa; + PUpdateMbMvFunc pfUpdateMbMv; + PInterMdFirstIntraModeFunc pfFirstIntraMode; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c + PIntraFineMdFunc + pfIntraFineMd; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c + PInterFineMdFunc pfInterFineMd; //svc_encode_slice.c svc_base_layer_md.c + PInterMdFunc pfInterMd; + + PInterMdBackgroundDecisionFunc pfInterMdBackgroundDecision; + PMdBackgroundInfoUpdateFunc pfMdBackgroundInfoUpdate; + + PInterMdScrollingPSkipDecisionFunc pfSCDPSkipDecision; + PSetScrollingMv pfSetScrollingMv; + + SMcFunc sMcFuncs; + SSampleDealingFunc sSampleDealingFuncs; + PGetIntraPredFunc pfGetLumaI16x16Pred[I16_PRED_DC_A]; + PGetIntraPredFunc pfGetLumaI4x4Pred[I4_PRED_A]; + PGetIntraPredFunc pfGetChromaPred[C_PRED_A]; + + PSampleSadHor8Func pfSampleSadHor8[2]; // 1: for 16x16 square; 0: for 8x8 square + PMotionSearchFunc + pfMotionSearch[BLOCK_STATIC_IDC_ALL]; //svc_encode_slice.c svc_mode_decision.c svc_enhance_layer_md.c svc_base_layer_md.c + PSearchMethodFunc pfSearchMethod[BLOCK_SIZE_ALL]; + PCalculateSatdFunc pfCalculateSatd; + PCheckDirectionalMv pfCheckDirectionalMv; + + PInitializeHashforFeatureFunc pfInitializeHashforFeature; + PFillQpelLocationByFeatureValueFunc pfFillQpelLocationByFeatureValue; + PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16 + PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16 + PLineFullSearchFunc pfVerticalFullSearch; + PLineFullSearchFunc pfHorizontalFullSearch; + PUpdateFMESwitch pfUpdateFMESwitch; + + PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c + PCopyFunc pfCopy16x16NotAligned; //md.c + PCopyFunc pfCopy8x8Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c md.c + PCopyFunc pfCopy16x8NotAligned; //for MeRefineFracPixel 16x8 based + PCopyFunc pfCopy8x16Aligned; //for MeRefineFracPixel 8x16 based + PCopyFunc pfCopy4x4; //not sure if aligned or not, need further tune + PCopyFunc pfCopy8x4; //not sure if aligned or not, need further tune + PCopyFunc pfCopy4x8; //not sure if aligned or not, need further tune + + PDctFunc pfDctT4; + PDctFunc pfDctFourT4; + + PCalculateSingleCtrFunc pfCalculateSingleCtr4x4; + PScanFunc pfScan4x4; //DC/AC + PScanFunc pfScan4x4Ac; + + PQuantizationFunc pfQuantization4x4; + PQuantizationFunc pfQuantizationFour4x4; + PQuantizationDcFunc pfQuantizationDc4x4; + PQuantizationMaxFunc pfQuantizationFour4x4Max; + PQuantizationHadamardFunc pfQuantizationHadamard2x2; + PQuantizationSkipFunc pfQuantizationHadamard2x2Skip; + + PTransformHadamard4x4Func pfTransformHadamard4x4Dc; + + PGetNoneZeroCountFunc pfGetNoneZeroCount; + + PDeQuantizationFunc pfDequantization4x4; + PDeQuantizationFunc pfDequantizationFour4x4; + PDeQuantizationHadamardFunc pfDequantizationIHadamard4x4; + PIDctFunc pfIDctFourT4; + PIDctFunc pfIDctT4; + PIDctFunc pfIDctI16x16Dc; + + + + // OPTI: if MT under diff uiSliceMode, need change here + //PDynamicSlicingStepBackFunc dynslc_funcpointer_stepback;//svc_encode_slice.c + //DYNSLC_LNGTH_CRTL dynslc_funcpointer_slcsize_ctrl; + + /* For Deblocking */ + DeblockingFunc pfDeblocking; + PSetNoneZeroCountZeroFunc pfSetNZCZero; + + SWelsRcFunc pfRc; + PAccumulateSadFunc pfAccumulateSadForRc; + + PSetMemoryZero pfSetMemZeroSize8; // for size is times to 8 + PSetMemoryZero pfSetMemZeroSize64Aligned16; // for size is times of 64, and address is align to 16 + PSetMemoryZero pfSetMemZeroSize64; // for size is times of 64, and don't know address is align to 16 or not + + PCavlcParamCalFunc pfCavlcParamCal; + PWelsSpatialWriteMbSyn pfWelsSpatialWriteMbSyn; + PGetBsPosition pfGetBsPosition; + PStashMBStatus pfStashMBStatus; + PStashPopMBStatus pfStashPopMBStatus; + + IWelsParametersetStrategy* pParametersetStrategy; +}; + +} //end of namespace WelsEnc { + +#endif//WELS_ENCODER_FUNCTION_POINTERS_DEFINITION_H_ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_preprocess.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_preprocess.h new file mode 100644 index 000000000..4d1cd8eff --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_preprocess.h @@ -0,0 +1,251 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_preprocess.h + * + * \brief interface of video pre-process plugins + * + * \date 03/15/2011 + * + * \description : this class is designed as an interface to unify video pre-processing + * class implement sets such as denoise,colorspace conversion etc... + * + ************************************************************************************* + */ + +#ifndef WELS_PREPROCESS_H +#define WELS_PREPROCESS_H + +#include "typedefs.h" +#include "picture.h" +#include "wels_const.h" +#include "IWelsVP.h" +#include "param_svc.h" + +namespace WelsEnc { + +typedef struct TagWelsEncCtx sWelsEncCtx; + +typedef struct { + SPicture* pScaledInputPicture; + int32_t iScaledWidth[MAX_DEPENDENCY_LAYER]; + int32_t iScaledHeight[MAX_DEPENDENCY_LAYER]; +} Scaled_Picture; + + +typedef struct { + int64_t iMinFrameComplexity; + int64_t iMinFrameComplexity08; + int64_t iMinFrameComplexity11; + + int32_t iMinFrameNumGap; + int32_t iMinFrameQp; +} SRefJudgement; + +typedef struct { + SPicture* pRefPicture; + int32_t iSrcListIdx; //idx in h->spatial_pic[base_did]; + bool bSceneLtrFlag; + unsigned char* pBestBlockStaticIdc; +} SRefInfoParam; + +typedef struct TagVAAFrameInfo { + SVAACalcResult sVaaCalcInfo; + SAdaptiveQuantizationParam sAdaptiveQuantParam; + SComplexityAnalysisParam sComplexityAnalysisParam; + + int32_t iPicWidth; // maximal iWidth of picture in samples for svc coding + int32_t iPicHeight; // maximal iHeight of picture in samples for svc coding + int32_t iPicStride; //luma + int32_t iPicStrideUV; + + uint8_t* pRefY; //pRef + uint8_t* pCurY; //cur + uint8_t* pRefU; //pRef + uint8_t* pCurU; //cur + uint8_t* pRefV; //pRef + uint8_t* pCurV; //cur + + int8_t* pVaaBackgroundMbFlag; + uint8_t uiValidLongTermPicIdx; + uint8_t uiMarkLongTermPicIdx; + + ESceneChangeIdc eSceneChangeIdc; + bool bSceneChangeFlag; + bool bIdrPeriodFlag; +} SVAAFrameInfo; + +typedef struct SVAAFrameInfoExt_t: public SVAAFrameInfo { + SComplexityAnalysisScreenParam sComplexityScreenParam; + SScrollDetectionParam sScrollDetectInfo; + SRefInfoParam sVaaStrBestRefCandidate[MAX_REF_PIC_COUNT]; + SRefInfoParam sVaaLtrBestRefCandidate[MAX_REF_PIC_COUNT]; + int32_t iNumOfAvailableRef; + + int32_t iVaaBestRefFrameNum; + uint8_t* pVaaBestBlockStaticIdc;//pointer + uint8_t* pVaaBlockStaticIdc[16];//real memory, +} SVAAFrameInfoExt; + +class CWelsPreProcess { + public: + CWelsPreProcess (sWelsEncCtx* pEncCtx); + virtual ~CWelsPreProcess(); + + static CWelsPreProcess* CreatePreProcess (sWelsEncCtx* pEncCtx); + + virtual SPicture* GetCurrentOrigFrame (int32_t iDIdx) = 0; + public: + int32_t WelsPreprocessReset (sWelsEncCtx* pEncCtx, int32_t iWidth, int32_t iHeight); + int32_t AllocSpatialPictures (sWelsEncCtx* pCtx, SWelsSvcCodingParam* pParam); + void FreeSpatialPictures (sWelsEncCtx* pCtx); + int32_t BuildSpatialPicList (sWelsEncCtx* pEncCtx, const SSourcePicture* kpSrcPic); + int32_t AnalyzeSpatialPic (sWelsEncCtx* pEncCtx, const int32_t kiDIdx); + int32_t UpdateSpatialPictures (sWelsEncCtx* pEncCtx, SWelsSvcCodingParam* pParam, const int8_t iCurTid, + const int32_t d_idx); + int32_t GetRefFrameInfo (int32_t iRefIdx, bool bCurrentFrameIsSceneLtr, SPicture*& pRefOri); + void AnalyzePictureComplexity (sWelsEncCtx* pCtx, SPicture* pCurPicture, SPicture* pRefPicture, + const int32_t kiDependencyId, const bool kbCalculateBGD); + int32_t UpdateBlockIdcForScreen (uint8_t* pCurBlockStaticPointer, const SPicture* kpRefPic, const SPicture* kpSrcPic); + + + void UpdateSrcList (SPicture* pCurPicture, const int32_t kiCurDid, SPicture** pShortRefList, + const uint32_t kuiShortRefCount); + void UpdateSrcListLosslessScreenRefSelectionWithLtr (SPicture* pCurPicture, const int32_t kiCurDid, + const int32_t kuiMarkLongTermPicIdx, SPicture** pLongRefList); + + + protected: + bool GetSceneChangeFlag (ESceneChangeIdc eSceneChangeIdc); + virtual ESceneChangeIdc DetectSceneChange (SPicture* pCurPicture, SPicture* pRefPicture = NULL) = 0; + + void InitPixMap (const SPicture* pPicture, SPixMap* pPixMap); + + int32_t GetCurPicPosition (const int32_t kiDidx); + + private: + int32_t WelsPreprocessCreate(); + int32_t WelsPreprocessDestroy(); + int32_t InitLastSpatialPictures (sWelsEncCtx* pEncCtx); + + private: + int32_t SingleLayerPreprocess (sWelsEncCtx* pEncCtx, const SSourcePicture* kpSrc, Scaled_Picture* m_sScaledPicture); + + void BilateralDenoising (SPicture* pSrc, const int32_t iWidth, const int32_t iHeight); + + int32_t DownsamplePadding (SPicture* pSrc, SPicture* pDstPic, int32_t iSrcWidth, int32_t iSrcHeight, + int32_t iShrinkWidth, int32_t iShrinkHeight, int32_t iTargetWidth, int32_t iTargetHeight, + bool bForceCopy); + + void VaaCalculation (SVAAFrameInfo* pVaaInfo, SPicture* pCurPicture, SPicture* pRefPicture, bool bCalculateSQDiff, + bool bCalculateVar, bool bCalculateBGD); + void BackgroundDetection (SVAAFrameInfo* pVaaInfo, SPicture* pCurPicture, SPicture* pRefPicture, bool bDetectFlag); + void AdaptiveQuantCalculation (SVAAFrameInfo* pVaaInfo, SPicture* pCurPicture, SPicture* pRefPicture); + void Padding (uint8_t* pSrcY, uint8_t* pSrcU, uint8_t* pSrcV, int32_t iStrideY, int32_t iStrideUV, + int32_t iActualWidth, int32_t iPaddingWidth, int32_t iActualHeight, int32_t iPaddingHeight); + void SetRefMbType (sWelsEncCtx* pCtx, uint32_t** pRefMbTypeArray, int32_t iRefPicType); + + int32_t ColorspaceConvert (SWelsSvcCodingParam* pSvcParam, SPicture* pDstPic, const SSourcePicture* kpSrc, + const int32_t kiWidth, const int32_t kiHeight); + void WelsMoveMemoryWrapper (SWelsSvcCodingParam* pSvcParam, SPicture* pDstPic, const SSourcePicture* kpSrc, + const int32_t kiWidth, const int32_t kiHeight); + + /*! + * \brief exchange two picture pData planes + * \param ppPic1 picture pointer to picture 1 + * \param ppPic2 picture pointer to picture 2 + * \return none + */ + void WelsExchangeSpatialPictures (SPicture** ppPic1, SPicture** ppPic2); + + SPicture* GetBestRefPic (EUsageType iUsageType, bool bSceneLtr, EWelsSliceType eSliceType, int32_t kiDidx, + int32_t iRefTemporalIdx); + SPicture* GetBestRefPic (const int32_t kiDidx, const int32_t iRefTemporalIdx); + protected: + IWelsVP* m_pInterfaceVp; + sWelsEncCtx* m_pEncCtx; + uint8_t m_uiSpatialLayersInTemporal[MAX_DEPENDENCY_LAYER]; + + private: + Scaled_Picture m_sScaledPicture; + SPicture* m_pLastSpatialPicture[MAX_DEPENDENCY_LAYER][2]; + bool m_bInitDone; + uint8_t m_uiSpatialPicNum[MAX_DEPENDENCY_LAYER]; + protected: + /* For Downsampling & VAA I420 based source pictures */ + SPicture* m_pSpatialPic[MAX_DEPENDENCY_LAYER][MAX_REF_PIC_COUNT + 1]; + // need memory requirement with total number of num_of_ref + 1, "+1" is for current frame + int32_t m_iAvaliableRefInSpatialPicList; + +}; + +class CWelsPreProcessVideo : public CWelsPreProcess { + public: + CWelsPreProcessVideo (sWelsEncCtx* pEncCtx) : CWelsPreProcess (pEncCtx) {}; + + virtual SPicture* GetCurrentOrigFrame (int32_t iDIdx); + + virtual ESceneChangeIdc DetectSceneChange (SPicture* pCurPicture, SPicture* pRefPicture = NULL); +}; + + + +class CWelsPreProcessScreen : public CWelsPreProcess { + public: + CWelsPreProcessScreen (sWelsEncCtx* pEncCtx) : CWelsPreProcess (pEncCtx) {}; + + virtual SPicture* GetCurrentOrigFrame (int32_t iDIdx); + + virtual ESceneChangeIdc DetectSceneChange (SPicture* pCurPicture, SPicture* pRefPicture = NULL); + + private: + SPicture** GetReferenceSrcPicList(int32_t iTargetDid); + + void GetAvailableRefListLosslessScreenRefSelection (SPicture** pSrcPicList, uint8_t iCurTid, + const int32_t iClosestLtrFrameNum, + SRefInfoParam* pAvailableRefList, int32_t& iAvailableRefNum, int32_t& iAvailableSceneRefNum); + + void GetAvailableRefList (SPicture** pSrcPicList, uint8_t iCurTid, const int32_t iClosestLtrFrameNum, + SRefInfoParam* pAvailableRefList, int32_t& iAvailableRefNum, int32_t& iAvailableSceneRefNum); + void InitRefJudgement (SRefJudgement* pRefJudgement); + + bool JudgeBestRef (SPicture* pRefPic, const SRefJudgement& sRefJudgement, const int64_t iFrameComplexity, + const bool bIsClosestLtrFrame); + void SaveBestRefToJudgement (const int32_t iRefPictureAvQP, const int64_t iComplexity, SRefJudgement* pRefJudgement); + void SaveBestRefToLocal (SRefInfoParam* pRefPicInfo, const SSceneChangeResult& sSceneChangeResult, + SRefInfoParam* pRefSaved); + void SaveBestRefToVaa (SRefInfoParam& sRefSaved, SRefInfoParam* pVaaBestRef); +}; + + +} + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_base.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_base.h new file mode 100644 index 000000000..e27af3118 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_base.h @@ -0,0 +1,79 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_task_base.h + * + * \brief interface for base task + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ + + + +#ifndef _WELS_BASE_TASK_H_ +#define _WELS_BASE_TASK_H_ + +#include "typedefs.h" +#include "WelsTask.h" + +namespace WelsEnc { + +class CWelsBaseTask : public WelsCommon::IWelsTask { + public: + enum ETaskType { + WELS_ENC_TASK_ENCODING = 0, + WELS_ENC_TASK_ENCODE_FIXED_SLICE = WELS_ENC_TASK_ENCODING, + WELS_ENC_TASK_ENCODE_SLICE_LOADBALANCING = WELS_ENC_TASK_ENCODING, + WELS_ENC_TASK_ENCODE_SLICE_SIZECONSTRAINED = WELS_ENC_TASK_ENCODING, + WELS_ENC_TASK_UPDATEMBMAP = 1, + WELS_ENC_TASK_PREPROCESS = 2, + WELS_ENC_TASK_ALL = 3, + }; + + CWelsBaseTask (WelsCommon::IWelsTaskSink* pSink): IWelsTask (pSink) {}; + virtual ~CWelsBaseTask(); + + virtual uint32_t GetTaskType() const = 0; + + private: + +}; + +} + + +#endif + + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_encoder.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_encoder.h new file mode 100644 index 000000000..3203bf495 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_encoder.h @@ -0,0 +1,142 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_task_encoder.h + * + * \brief interface for encoder tasks + * + * \date 07/06/2015 Created + * + ************************************************************************************* + */ + +#ifndef _WELS_ENCODER_TASK_H_ +#define _WELS_ENCODER_TASK_H_ + +#include "wels_task_base.h" +#include "encoder_context.h" + +namespace WelsEnc { + +extern int32_t WriteSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, uint8_t* pFrameBsBuffer, + const int32_t iSliceIdx, + int32_t& iSliceSize); +extern int32_t WriteSliceBs (sWelsEncCtx* pCtx,SWelsSliceBs* pSliceBs,const int32_t iSliceIdx, int32_t& iSliceSize); + +class CWelsSliceEncodingTask : public CWelsBaseTask { + public: + CWelsSliceEncodingTask (WelsCommon::IWelsTaskSink* pSink, sWelsEncCtx* pCtx, const int32_t iSliceIdx); + virtual ~CWelsSliceEncodingTask(); + + CWelsSliceEncodingTask* CreateSliceEncodingTask (sWelsEncCtx* pCtx, const int32_t iSliceIdx); + WelsErrorType SetBoundary (int32_t iStartMbIdx, int32_t iEndMbIdx); + + virtual WelsErrorType Execute(); + virtual WelsErrorType InitTask(); + virtual WelsErrorType ExecuteTask(); + virtual void FinishTask(); + + virtual uint32_t GetTaskType() const { + return WELS_ENC_TASK_ENCODE_FIXED_SLICE; + } + protected: + WelsErrorType m_eTaskResult; + + int32_t QueryEmptyThread (bool* pThreadBsBufferUsage); + + sWelsEncCtx* m_pCtx; + SSliceThreadPrivateData* m_pPrivateData; + SLayerBSInfo* m_pLbi; + int32_t m_iStartMbIdx; + int32_t m_iEndMbIdx; + + EWelsNalUnitType m_eNalType; + EWelsNalRefIdc m_eNalRefIdc; + bool m_bNeedPrefix; + uint32_t m_uiDependencyId; + + SSlice* m_pSlice; + SWelsSliceBs* m_pSliceBs; + int32_t m_iSliceIdx; + int32_t m_iSliceSize; + int32_t m_iThreadIdx; +}; + +class CWelsLoadBalancingSlicingEncodingTask : public CWelsSliceEncodingTask { + public: + CWelsLoadBalancingSlicingEncodingTask (WelsCommon::IWelsTaskSink* pSink, sWelsEncCtx* pCtx, const int32_t iSliceIdx) : CWelsSliceEncodingTask (pSink, pCtx, + iSliceIdx) { + }; + + virtual WelsErrorType InitTask(); + virtual void FinishTask(); + + virtual uint32_t GetTaskType() const { + return WELS_ENC_TASK_ENCODE_SLICE_LOADBALANCING; + } + private: + int64_t m_iSliceStart; +}; + + +class CWelsConstrainedSizeSlicingEncodingTask : public CWelsLoadBalancingSlicingEncodingTask { + public: + CWelsConstrainedSizeSlicingEncodingTask (WelsCommon::IWelsTaskSink* pSink, sWelsEncCtx* pCtx, + const int32_t iSliceIdx) : CWelsLoadBalancingSlicingEncodingTask (pSink, pCtx, iSliceIdx) { + }; + + virtual WelsErrorType ExecuteTask(); + + virtual uint32_t GetTaskType() const { + return WELS_ENC_TASK_ENCODE_SLICE_SIZECONSTRAINED; + } + +}; + + +class CWelsUpdateMbMapTask : public CWelsBaseTask { + public: + CWelsUpdateMbMapTask (WelsCommon::IWelsTaskSink* pSink, sWelsEncCtx* pCtx, const int32_t iSliceIdx); + virtual ~CWelsUpdateMbMapTask(); + + virtual WelsErrorType Execute(); + + virtual uint32_t GetTaskType() const { + return WELS_ENC_TASK_UPDATEMBMAP; + } + protected: + sWelsEncCtx* m_pCtx; + int32_t m_iSliceIdx; +}; + +} //namespace +#endif //header guard + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_management.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_management.h new file mode 100644 index 000000000..acd612633 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_task_management.h @@ -0,0 +1,135 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_task_management.h + * + * \brief interface for task management + * + * \date 5/14/2012 Created + * + ************************************************************************************* + */ + +#ifndef _WELS_ENCODER_TASK_MANAGE_H_ +#define _WELS_ENCODER_TASK_MANAGE_H_ + +#include "wels_common_basis.h" +#include "WelsLock.h" +#include "WelsThreadPool.h" +#include "wels_task_base.h" + +namespace WelsEnc { + +class IWelsTaskManage { + public: + virtual ~IWelsTaskManage() { } + + virtual WelsErrorType Init (sWelsEncCtx* pEncCtx) = 0; + virtual void Uninit() = 0; + + virtual void InitFrame (const int32_t kiCurDid) {} + virtual WelsErrorType ExecuteTasks (const CWelsBaseTask::ETaskType iTaskType = CWelsBaseTask::WELS_ENC_TASK_ENCODING) + = 0; + + static IWelsTaskManage* CreateTaskManage (sWelsEncCtx* pCtx, const int32_t iSpatialLayer, const bool bNeedLock); + + virtual int32_t GetThreadPoolThreadNum() = 0; +}; + + +class CWelsTaskManageBase : public IWelsTaskManage, public WelsCommon::IWelsTaskSink { + public: + typedef CWelsNonDuplicatedList TASKLIST_TYPE; + //typedef std::pair SLICE_BOUNDARY_PAIR; + //typedef CWelsList SLICE_PAIR_LIST; + + CWelsTaskManageBase(); + virtual ~ CWelsTaskManageBase(); + + virtual WelsErrorType Init (sWelsEncCtx* pEncCtx); + virtual void InitFrame (const int32_t kiCurDid = 0); + + virtual WelsErrorType ExecuteTasks (const CWelsBaseTask::ETaskType iTaskType = CWelsBaseTask::WELS_ENC_TASK_ENCODING); + + //IWelsTaskSink + virtual WelsErrorType OnTaskExecuted(); + virtual WelsErrorType OnTaskCancelled(); + + int32_t GetThreadPoolThreadNum(); + + protected: + virtual WelsErrorType CreateTasks (sWelsEncCtx* pEncCtx, const int32_t kiTaskCount); + + WelsErrorType ExecuteTaskList(TASKLIST_TYPE** pTaskList); + + protected: + sWelsEncCtx* m_pEncCtx; + WelsCommon::CWelsThreadPool* m_pThreadPool; + + TASKLIST_TYPE* m_pcAllTaskList[CWelsBaseTask::WELS_ENC_TASK_ALL][MAX_DEPENDENCY_LAYER]; + TASKLIST_TYPE* m_cEncodingTaskList[MAX_DEPENDENCY_LAYER]; + TASKLIST_TYPE* m_cPreEncodingTaskList[MAX_DEPENDENCY_LAYER]; + int32_t m_iTaskNum[MAX_DEPENDENCY_LAYER]; + + //SLICE_PAIR_LIST *m_cSliceList; + + int32_t m_iThreadNum; + + int32_t m_iWaitTaskNum; + WELS_EVENT m_hTaskEvent; + WELS_MUTEX m_hEventMutex; + WelsCommon::CWelsLock m_cWaitTaskNumLock; + + private: + DISALLOW_COPY_AND_ASSIGN (CWelsTaskManageBase); + void OnTaskMinusOne(); + + void Uninit(); + void DestroyTasks(); + void DestroyTaskList(TASKLIST_TYPE* pTargetTaskList); + + int32_t m_iCurDid; +}; + +class CWelsTaskManageOne : public CWelsTaskManageBase { + public: + CWelsTaskManageOne(); + virtual ~CWelsTaskManageOne(); + + WelsErrorType Init (sWelsEncCtx* pEncCtx); + virtual WelsErrorType ExecuteTasks(const CWelsBaseTask::ETaskType iTaskType = CWelsBaseTask::WELS_ENC_TASK_ENCODING); + + int32_t GetThreadPoolThreadNum() {return 1;}; +}; + +} //namespace +#endif //header guard + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_transpose_matrix.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_transpose_matrix.h new file mode 100644 index 000000000..38832e405 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/inc/wels_transpose_matrix.h @@ -0,0 +1,58 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef WELS_RUBY_ENCODER_TRANSPOSE_MATRIX_H__ +#define WELS_RUBY_ENCODER_TRANSPOSE_MATRIX_H__ + +#include "typedefs.h" + +namespace WelsEnc { + +#ifdef X86_ASM +extern "C" +{ + void TransposeMatrixBlocksx16_sse2 (void* pDst, const int32_t kiDstStride, void* pSrc, const int32_t kiSrcStride, + const int32_t kiBlocksNum); + void TransposeMatrixBlock16x16_sse2 (void* pDst, const int32_t kiDstStride, void* pSrc, const int32_t kiSrcStride); + void TransposeMatrixBlocksx8_mmx (void* pDst, const int32_t kiDstStride, void* pSrc, const int32_t kiSrcStride, + const int32_t kiBlocksNum); + void TransposeMatrixBlock8x8_mmx (void* pDst, const int32_t kiDstStride, void* pSrc, const int32_t kiSrcStride); +} +#endif + +typedef void (*PTransposeMatrixBlockFunc) (void* pDst, const int32_t kiDstStride, void* pSrc, + const int32_t kiSrcStride); +typedef void (*PTransposeMatrixBlocksFunc) (void* pDst, const int32_t kiDstStride, void* pSrc, + const int32_t kiSrcStride, const int32_t kiBlocksNum); + +}// end of namespace declaration + +#endif//WELS_RUBY_ENCODER_TRANSPOSE_MATRIX_H__ diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/au_set.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/au_set.cpp new file mode 100644 index 000000000..a49df4752 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/au_set.cpp @@ -0,0 +1,634 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file au_set.c + * + * \brief Interfaces introduced in Access Unit level based writer + * + * \date 05/18/2009 Created + * + ************************************************************************************* + */ + +#include "au_set.h" +#include "svc_enc_golomb.h" +#include "macros.h" + +#include "wels_common_defs.h" + +using namespace WelsCommon; + +namespace WelsEnc { + +static inline int32_t WelsCheckLevelLimitation (const SWelsSPS* kpSps, const SLevelLimits* kpLevelLimit, + float fFrameRate, int32_t iTargetBitRate) { + uint32_t uiPicWidthInMBs = kpSps->iMbWidth; + uint32_t uiPicHeightInMBs = kpSps->iMbHeight; + uint32_t uiPicInMBs = uiPicWidthInMBs * uiPicHeightInMBs; + uint32_t uiNumRefFrames = kpSps->iNumRefFrames; + + if (kpLevelLimit->uiMaxMBPS < (uint32_t) (uiPicInMBs * fFrameRate)) + return 0; + if (kpLevelLimit->uiMaxFS < uiPicInMBs) + return 0; + if ((kpLevelLimit->uiMaxFS << 3) < (uiPicWidthInMBs * uiPicWidthInMBs)) + return 0; + if ((kpLevelLimit->uiMaxFS << 3) < (uiPicHeightInMBs * uiPicHeightInMBs)) + return 0; + if (kpLevelLimit->uiMaxDPBMbs < uiNumRefFrames * uiPicInMBs) + return 0; + if ((iTargetBitRate != UNSPECIFIED_BIT_RATE) + && ((int32_t) kpLevelLimit->uiMaxBR * 1200) < iTargetBitRate) //RC enabled, considering bitrate constraint + return 0; + //add more checks here if needed in future + + return 1; + +} +int32_t WelsAdjustLevel (SSpatialLayerConfig* pSpatialLayer, const SLevelLimits* pCurLevel) { + int32_t iMaxBitrate = pSpatialLayer->iMaxSpatialBitrate; + do { + if (iMaxBitrate <= (int32_t) (pCurLevel->uiMaxBR * CpbBrNalFactor)) { + pSpatialLayer->uiLevelIdc = pCurLevel->uiLevelIdc; + return 0; + } + pCurLevel++; + } while (pCurLevel->uiLevelIdc != LEVEL_5_2); + return 1; +} + +static int32_t WelsCheckNumRefSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, bool bStrictCheck) { + // validate LTR num + int32_t iCurrentSupportedLtrNum = (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME) ? LONG_TERM_REF_NUM : + LONG_TERM_REF_NUM_SCREEN; + if ((pParam->bEnableLongTermReference) && (iCurrentSupportedLtrNum != pParam->iLTRRefNum)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "iLTRRefNum(%d) does not equal to currently supported %d, will be reset", + pParam->iLTRRefNum, iCurrentSupportedLtrNum); + pParam->iLTRRefNum = iCurrentSupportedLtrNum; + } else if (!pParam->bEnableLongTermReference) { + pParam->iLTRRefNum = 0; + } + + //TODO: here is a fix needed here, the most reasonable value should be: + // iCurrentStrNum = WELS_MAX (1, WELS_LOG2 (pParam->uiGopSize)); + // but reference list updating need to be changed + int32_t iCurrentStrNum = ((pParam->iUsageType == SCREEN_CONTENT_REAL_TIME && pParam->bEnableLongTermReference) + ? (WELS_MAX (1, WELS_LOG2 (pParam->uiGopSize))) + : (WELS_MAX (1, (pParam->uiGopSize >> 1)))); + int32_t iNeededRefNum = (pParam->uiIntraPeriod != 1) ? (iCurrentStrNum + pParam->iLTRRefNum) : 0; + + iNeededRefNum = WELS_CLIP3 (iNeededRefNum, + MIN_REF_PIC_COUNT, + (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME) ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA : + MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN); + // to adjust default or invalid input, in case pParam->iNumRefFrame do not have a valid value for the next step + if (pParam->iNumRefFrame == AUTO_REF_PIC_COUNT) { + pParam->iNumRefFrame = iNeededRefNum; + } else if (pParam->iNumRefFrame < iNeededRefNum) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "iNumRefFrame(%d) setting does not support the temporal and LTR setting, will be reset to %d", + pParam->iNumRefFrame, iNeededRefNum); + if (bStrictCheck) { + return ENC_RETURN_UNSUPPORTED_PARA; + } + pParam->iNumRefFrame = iNeededRefNum; + } + + // after adjustment, do the following: + // if the setting is larger than needed, we will use the needed, and write the max into sps and for memory to wait for further expanding + if (pParam->iMaxNumRefFrame < pParam->iNumRefFrame) { + pParam->iMaxNumRefFrame = pParam->iNumRefFrame; + } + pParam->iNumRefFrame = iNeededRefNum; + + return ENC_RETURN_SUCCESS; +} + +int32_t WelsCheckRefFrameLimitationNumRefFirst (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam) { + + if (WelsCheckNumRefSetting (pLogCtx, pParam, false)) { + // we take num-ref as the honored setting but it conflicts with temporal and LTR + return ENC_RETURN_UNSUPPORTED_PARA; + } + return ENC_RETURN_SUCCESS; +} +int32_t WelsCheckRefFrameLimitationLevelIdcFirst (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam) { + if ((pParam->iNumRefFrame == AUTO_REF_PIC_COUNT) || (pParam->iMaxNumRefFrame == AUTO_REF_PIC_COUNT)) { + //no need to do the checking + return ENC_RETURN_SUCCESS; + } + + WelsCheckNumRefSetting (pLogCtx, pParam, false); + + int32_t i = 0; + int32_t iRefFrame; + //get the number of reference frame according to level limitation. + for (i = 0; i < pParam->iSpatialLayerNum; ++ i) { + SSpatialLayerConfig* pSpatialLayer = &pParam->sSpatialLayers[i]; + if (pSpatialLayer->uiLevelIdc == LEVEL_UNKNOWN) { + continue; + } + + uint32_t uiPicInMBs = ((pSpatialLayer->iVideoHeight + 15) >> 4) * ((pSpatialLayer->iVideoWidth + 15) >> 4); + iRefFrame = g_ksLevelLimits[pSpatialLayer->uiLevelIdc - 1].uiMaxDPBMbs / uiPicInMBs; + + //check iMaxNumRefFrame + if (iRefFrame < pParam->iMaxNumRefFrame) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "iMaxNumRefFrame(%d) adjusted to %d because of limitation from uiLevelIdc=%d", + pParam->iMaxNumRefFrame, iRefFrame, pSpatialLayer->uiLevelIdc); + pParam->iMaxNumRefFrame = iRefFrame; + + //check iNumRefFrame + if (iRefFrame < pParam->iNumRefFrame) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "iNumRefFrame(%d) adjusted to %d because of limitation from uiLevelIdc=%d", + pParam->iNumRefFrame, iRefFrame, pSpatialLayer->uiLevelIdc); + pParam->iNumRefFrame = iRefFrame; + } + } else { + //because it is level first now, so adjust max-ref + WelsLog (pLogCtx, WELS_LOG_INFO, + "iMaxNumRefFrame(%d) adjusted to %d because of uiLevelIdc=%d -- under level-idc first strategy ", + pParam->iMaxNumRefFrame, iRefFrame, pSpatialLayer->uiLevelIdc); + pParam->iMaxNumRefFrame = iRefFrame; + } + } + + return ENC_RETURN_SUCCESS; +} + +static inline ELevelIdc WelsGetLevelIdc (const SWelsSPS* kpSps, float fFrameRate, int32_t iTargetBitRate) { + int32_t iOrder; + for (iOrder = 0; iOrder < LEVEL_NUMBER; iOrder++) { + if (WelsCheckLevelLimitation (kpSps, & (g_ksLevelLimits[iOrder]), fFrameRate, iTargetBitRate)) { + return (g_ksLevelLimits[iOrder].uiLevelIdc); + } + } + return LEVEL_5_1; //final decision: select the biggest level +} + +int32_t WelsWriteVUI (SWelsSPS* pSps, SBitStringAux* pBitStringAux) { + SBitStringAux* pLocalBitStringAux = pBitStringAux; + assert (pSps != NULL && pBitStringAux != NULL); + + BsWriteOneBit (pLocalBitStringAux, pSps->bAspectRatioPresent); //aspect_ratio_info_present_flag + if (pSps->bAspectRatioPresent) { + BsWriteBits (pLocalBitStringAux, 8, pSps->eAspectRatio); // aspect_ratio_idc + if (pSps->eAspectRatio == ASP_EXT_SAR) { + BsWriteBits (pLocalBitStringAux, 16, pSps->sAspectRatioExtWidth); // sar_width + BsWriteBits (pLocalBitStringAux, 16, pSps->sAspectRatioExtHeight); // sar_height + } + } + BsWriteOneBit (pLocalBitStringAux, false); //overscan_info_present_flag + + // See codec_app_def.h and parameter_sets.h for more info about members bVideoSignalTypePresent through uiColorMatrix. + BsWriteOneBit (pLocalBitStringAux, pSps->bVideoSignalTypePresent); //video_signal_type_present_flag + if (pSps->bVideoSignalTypePresent) { + //write video signal type info to header + + BsWriteBits (pLocalBitStringAux, 3, pSps->uiVideoFormat); + BsWriteOneBit (pLocalBitStringAux, pSps->bFullRange); + BsWriteOneBit (pLocalBitStringAux, pSps->bColorDescriptionPresent); + + if (pSps->bColorDescriptionPresent) { + //write color description info to header + + BsWriteBits (pLocalBitStringAux, 8, pSps->uiColorPrimaries); + BsWriteBits (pLocalBitStringAux, 8, pSps->uiTransferCharacteristics); + BsWriteBits (pLocalBitStringAux, 8, pSps->uiColorMatrix); + + }//write color description info to header + + }//write video signal type info to header + + BsWriteOneBit (pLocalBitStringAux, false); //chroma_loc_info_present_flag + BsWriteOneBit (pLocalBitStringAux, false); //timing_info_present_flag + BsWriteOneBit (pLocalBitStringAux, false); //nal_hrd_parameters_present_flag + BsWriteOneBit (pLocalBitStringAux, false); //vcl_hrd_parameters_present_flag + BsWriteOneBit (pLocalBitStringAux, false); //pic_struct_present_flag + BsWriteOneBit (pLocalBitStringAux, true); //bitstream_restriction_flag + + // + BsWriteOneBit (pLocalBitStringAux, true); //motion_vectors_over_pic_boundaries_flag + BsWriteUE (pLocalBitStringAux, 0); //max_bytes_per_pic_denom + BsWriteUE (pLocalBitStringAux, 0); //max_bits_per_mb_denom + BsWriteUE (pLocalBitStringAux, 16); //log2_max_mv_length_horizontal + BsWriteUE (pLocalBitStringAux, 16); //log2_max_mv_length_vertical + + BsWriteUE (pLocalBitStringAux, 0); //max_num_reorder_frames + BsWriteUE (pLocalBitStringAux, pSps->iNumRefFrames); //max_dec_frame_buffering + + return 0; +} + +/*! + ************************************************************************************* + * \brief to set Sequence Parameter Set (SPS) + * + * \param pSps SWelsSPS to be wrote, update iSpsId dependency + * \param pBitStringAux bitstream writer auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case EWelsNalUnitType is SPS. + ************************************************************************************* + */ +int32_t WelsWriteSpsSyntax (SWelsSPS* pSps, SBitStringAux* pBitStringAux, int32_t* pSpsIdDelta, bool bBaseLayer) { + SBitStringAux* pLocalBitStringAux = pBitStringAux; + + assert (pSps != NULL && pBitStringAux != NULL); + + BsWriteBits (pLocalBitStringAux, 8, pSps->uiProfileIdc); + + BsWriteOneBit (pLocalBitStringAux, pSps->bConstraintSet0Flag); // bConstraintSet0Flag + BsWriteOneBit (pLocalBitStringAux, pSps->bConstraintSet1Flag); // bConstraintSet1Flag + BsWriteOneBit (pLocalBitStringAux, pSps->bConstraintSet2Flag); // bConstraintSet2Flag + BsWriteOneBit (pLocalBitStringAux, pSps->bConstraintSet3Flag); // bConstraintSet3Flag + if (PRO_HIGH == pSps->uiProfileIdc || PRO_EXTENDED == pSps->uiProfileIdc || + PRO_MAIN == pSps->uiProfileIdc) { + BsWriteOneBit (pLocalBitStringAux, 1); // bConstraintSet4Flag: If profile_idc is equal to 77, 88, or 100, constraint_set4_flag equal to 1 indicates that the value of frame_mbs_only_flag is equal to 1. constraint_set4_flag equal to 0 indicates that the value of frame_mbs_only_flag may or may not be equal to 1. + BsWriteOneBit (pLocalBitStringAux, 1); // bConstraintSet5Flag: If profile_idc is equal to 77, 88, or 100, constraint_set5_flag equal to 1 indicates that B slice types are not present in the coded video sequence. constraint_set5_flag equal to 0 indicates that B slice types may or may not be present in the coded video sequence. + BsWriteBits (pLocalBitStringAux, 2, 0); // reserved_zero_2bits, equal to 0 + } else { + BsWriteBits (pLocalBitStringAux, 4, 0); // reserved_zero_4bits, equal to 0 + } + BsWriteBits (pLocalBitStringAux, 8, pSps->iLevelIdc); // iLevelIdc + BsWriteUE (pLocalBitStringAux, pSps->uiSpsId + pSpsIdDelta[pSps->uiSpsId]); // seq_parameter_set_id + + if (PRO_SCALABLE_BASELINE == pSps->uiProfileIdc || PRO_SCALABLE_HIGH == pSps->uiProfileIdc || + PRO_HIGH == pSps->uiProfileIdc || PRO_HIGH10 == pSps->uiProfileIdc || + PRO_HIGH422 == pSps->uiProfileIdc || PRO_HIGH444 == pSps->uiProfileIdc || + PRO_CAVLC444 == pSps->uiProfileIdc || 44 == pSps->uiProfileIdc) { + BsWriteUE (pLocalBitStringAux, 1); //uiChromaFormatIdc, now should be 1 + BsWriteUE (pLocalBitStringAux, 0); //uiBitDepthLuma + BsWriteUE (pLocalBitStringAux, 0); //uiBitDepthChroma + BsWriteOneBit (pLocalBitStringAux, 0); //qpprime_y_zero_transform_bypass_flag + BsWriteOneBit (pLocalBitStringAux, 0); //seq_scaling_matrix_present_flag + } + + BsWriteUE (pLocalBitStringAux, pSps->uiLog2MaxFrameNum - 4); // log2_max_frame_num_minus4 + BsWriteUE (pLocalBitStringAux, 0/*pSps->uiPocType*/); // pic_order_cnt_type + BsWriteUE (pLocalBitStringAux, pSps->iLog2MaxPocLsb - 4); // log2_max_pic_order_cnt_lsb_minus4 + + BsWriteUE (pLocalBitStringAux, pSps->iNumRefFrames); // max_num_ref_frames + BsWriteOneBit (pLocalBitStringAux, pSps->bGapsInFrameNumValueAllowedFlag); //gaps_in_frame_numvalue_allowed_flag + BsWriteUE (pLocalBitStringAux, pSps->iMbWidth - 1); // pic_width_in_mbs_minus1 + BsWriteUE (pLocalBitStringAux, pSps->iMbHeight - 1); // pic_height_in_map_units_minus1 + BsWriteOneBit (pLocalBitStringAux, true/*pSps->bFrameMbsOnlyFlag*/); // bFrameMbsOnlyFlag + + BsWriteOneBit (pLocalBitStringAux, 0/*pSps->bDirect8x8InferenceFlag*/); // direct_8x8_inference_flag + BsWriteOneBit (pLocalBitStringAux, pSps->bFrameCroppingFlag); // bFrameCroppingFlag + if (pSps->bFrameCroppingFlag) { + BsWriteUE (pLocalBitStringAux, pSps->sFrameCrop.iCropLeft); // frame_crop_left_offset + BsWriteUE (pLocalBitStringAux, pSps->sFrameCrop.iCropRight); // frame_crop_right_offset + BsWriteUE (pLocalBitStringAux, pSps->sFrameCrop.iCropTop); // frame_crop_top_offset + BsWriteUE (pLocalBitStringAux, pSps->sFrameCrop.iCropBottom); // frame_crop_bottom_offset + } + if (bBaseLayer) { + BsWriteOneBit (pLocalBitStringAux, true); // vui_parameters_present_flag + WelsWriteVUI (pSps, pBitStringAux); + } else { + BsWriteOneBit (pLocalBitStringAux, false); + } + return 0; +} + + +int32_t WelsWriteSpsNal (SWelsSPS* pSps, SBitStringAux* pBitStringAux, int32_t* pSpsIdDelta) { + WelsWriteSpsSyntax (pSps, pBitStringAux, pSpsIdDelta, true); + + BsRbspTrailingBits (pBitStringAux); + + return 0; +} + +/*! + ************************************************************************************* + * \brief to write SubSet Sequence Parameter Set + * + * \param sub_sps subset pSps parsed + * \param pBitStringAux bitstream writer auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case EWelsNalUnitType is SubSet SPS. + ************************************************************************************* + */ + +int32_t WelsWriteSubsetSpsSyntax (SSubsetSps* pSubsetSps, SBitStringAux* pBitStringAux , int32_t* pSpsIdDelta) { + SWelsSPS* pSps = &pSubsetSps->pSps; + + WelsWriteSpsSyntax (pSps, pBitStringAux, pSpsIdDelta, false); + + if (pSps->uiProfileIdc == PRO_SCALABLE_BASELINE || pSps->uiProfileIdc == PRO_SCALABLE_HIGH) { + SSpsSvcExt* pSubsetSpsExt = &pSubsetSps->sSpsSvcExt; + + BsWriteOneBit (pBitStringAux, true/*pSubsetSpsExt->bInterLayerDeblockingFilterCtrlPresentFlag*/); + BsWriteBits (pBitStringAux, 2, pSubsetSpsExt->iExtendedSpatialScalability); + BsWriteOneBit (pBitStringAux, 0/*pSubsetSpsExt->uiChromaPhaseXPlus1Flag*/); + BsWriteBits (pBitStringAux, 2, 1/*pSubsetSpsExt->uiChromaPhaseYPlus1*/); + if (pSubsetSpsExt->iExtendedSpatialScalability == 1) { + BsWriteOneBit (pBitStringAux, 0/*pSubsetSpsExt->uiSeqRefLayerChromaPhaseXPlus1Flag*/); + BsWriteBits (pBitStringAux, 2, 1/*pSubsetSpsExt->uiSeqRefLayerChromaPhaseYPlus1*/); + BsWriteSE (pBitStringAux, 0/*pSubsetSpsExt->sSeqScaledRefLayer.left_offset*/); + BsWriteSE (pBitStringAux, 0/*pSubsetSpsExt->sSeqScaledRefLayer.top_offset*/); + BsWriteSE (pBitStringAux, 0/*pSubsetSpsExt->sSeqScaledRefLayer.right_offset*/); + BsWriteSE (pBitStringAux, 0/*pSubsetSpsExt->sSeqScaledRefLayer.bottom_offset*/); + } + BsWriteOneBit (pBitStringAux, pSubsetSpsExt->bSeqTcoeffLevelPredFlag); + if (pSubsetSpsExt->bSeqTcoeffLevelPredFlag) { + BsWriteOneBit (pBitStringAux, pSubsetSpsExt->bAdaptiveTcoeffLevelPredFlag); + } + BsWriteOneBit (pBitStringAux, pSubsetSpsExt->bSliceHeaderRestrictionFlag); + + BsWriteOneBit (pBitStringAux, false/*pSubsetSps->bSvcVuiParamPresentFlag*/); + } + BsWriteOneBit (pBitStringAux, false/*pSubsetSps->bAdditionalExtension2Flag*/); + + BsRbspTrailingBits (pBitStringAux); + + return 0; +} + +/*! + ************************************************************************************* + * \brief to write Picture Parameter Set (PPS) + * + * \param pPps pPps + * \param pBitStringAux bitstream writer auxiliary + * + * \return 0 - successed + * 1 - failed + * + * \note Call it in case EWelsNalUnitType is PPS. + ************************************************************************************* + */ +int32_t WelsWritePpsSyntax (SWelsPPS* pPps, SBitStringAux* pBitStringAux, + IWelsParametersetStrategy* pParametersetStrategy) { + SBitStringAux* pLocalBitStringAux = pBitStringAux; + + BsWriteUE (pLocalBitStringAux, pPps->iPpsId + pParametersetStrategy->GetPpsIdOffset (pPps->iPpsId)); + BsWriteUE (pLocalBitStringAux, pPps->iSpsId + pParametersetStrategy->GetSpsIdOffset (pPps->iPpsId, pPps->iSpsId)); + + BsWriteOneBit (pLocalBitStringAux, pPps->bEntropyCodingModeFlag); + BsWriteOneBit (pLocalBitStringAux, false/*pPps->bPicOrderPresentFlag*/); + +#ifdef DISABLE_FMO_FEATURE + BsWriteUE (pLocalBitStringAux, 0/*pPps->uiNumSliceGroups - 1*/); +#else + BsWriteUE (pLocalBitStringAux, pPps->uiNumSliceGroups - 1); + if (pPps->uiNumSliceGroups > 1) { + uint32_t i, uiNumBits; + + BsWriteUE (pLocalBitStringAux, pPps->uiSliceGroupMapType); + + switch (pPps->uiSliceGroupMapType) { + case 0: + for (i = 0; i < pPps->uiNumSliceGroups; i ++) { + BsWriteUE (pLocalBitStringAux, pPps->uiRunLength[i] - 1); + } + break; + case 2: + for (i = 0; i < pPps->uiNumSliceGroups; i ++) { + BsWriteUE (pLocalBitStringAux, pPps->uiTopLeft[i]); + BsWriteUE (pLocalBitStringAux, pPps->uiBottomRight[i]); + } + break; + case 3: + case 4: + case 5: + BsWriteOneBit (pLocalBitStringAux, pPps->bSliceGroupChangeDirectionFlag); + BsWriteUE (pLocalBitStringAux, pPps->uiSliceGroupChangeRate - 1); + break; + case 6: + BsWriteUE (pLocalBitStringAux, pPps->uiPicSizeInMapUnits - 1); + uiNumBits = 0;///////////////////WELS_CEILLOG2(pPps->uiPicSizeInMapUnits); + for (i = 0; i < pPps->uiPicSizeInMapUnits; i ++) { + BsWriteBits (pLocalBitStringAux, uiNumBits, pPps->uiSliceGroupId[i]); + } + break; + default: + break; + } + } +#endif//!DISABLE_FMO_FEATURE + + BsWriteUE (pLocalBitStringAux, 0/*pPps->uiNumRefIdxL0Active - 1*/); + BsWriteUE (pLocalBitStringAux, 0/*pPps->uiNumRefIdxL1Active - 1*/); + + + BsWriteOneBit (pLocalBitStringAux, false/*pPps->bWeightedPredFlag*/); + BsWriteBits (pLocalBitStringAux, 2, 0/*pPps->uiWeightedBiPredIdc*/); + + BsWriteSE (pLocalBitStringAux, pPps->iPicInitQp - 26); + BsWriteSE (pLocalBitStringAux, pPps->iPicInitQs - 26); + + BsWriteSE (pLocalBitStringAux, pPps->uiChromaQpIndexOffset); + BsWriteOneBit (pLocalBitStringAux, pPps->bDeblockingFilterControlPresentFlag); + BsWriteOneBit (pLocalBitStringAux, false/*pPps->bConstainedIntraPredFlag*/); + BsWriteOneBit (pLocalBitStringAux, false/*pPps->bRedundantPicCntPresentFlag*/); + + BsRbspTrailingBits (pLocalBitStringAux); + + return 0; +} + +static inline bool WelsGetPaddingOffset (int32_t iActualWidth, int32_t iActualHeight, int32_t iWidth, + int32_t iHeight, SCropOffset& pOffset) { + if ((iWidth < iActualWidth) || (iHeight < iActualHeight)) + return false; + + // make actual size even + iActualWidth -= (iActualWidth & 1); + iActualHeight -= (iActualHeight & 1); + + pOffset.iCropLeft = 0; + pOffset.iCropRight = (iWidth - iActualWidth) / 2; + pOffset.iCropTop = 0; + pOffset.iCropBottom = (iHeight - iActualHeight) / 2; + + return (iWidth > iActualWidth) || (iHeight > iActualHeight); +} +int32_t WelsInitSps (SWelsSPS* pSps, SSpatialLayerConfig* pLayerParam, SSpatialLayerInternal* pLayerParamInternal, + const uint32_t kuiIntraPeriod, const int32_t kiNumRefFrame, + const uint32_t kuiSpsId, const bool kbEnableFrameCropping, bool bEnableRc, + const int32_t kiDlayerCount, bool bSVCBaselayer) { + memset (pSps, 0, sizeof (SWelsSPS)); + pSps->uiSpsId = kuiSpsId; + pSps->iMbWidth = (pLayerParam->iVideoWidth + 15) >> 4; + pSps->iMbHeight = (pLayerParam->iVideoHeight + 15) >> 4; + + //max value of both iFrameNum and POC are 2^16-1, in our encoder, iPOC=2*iFrameNum, so max of iFrameNum should be 2^15-1.-- + pSps->uiLog2MaxFrameNum = 15;//16; + pSps->iLog2MaxPocLsb = 1 + pSps->uiLog2MaxFrameNum; + + pSps->iNumRefFrames = kiNumRefFrame; /* min pRef size when fifo pRef operation*/ + + if (kbEnableFrameCropping) { + // TODO: get frame_crop_left_offset, frame_crop_right_offset, frame_crop_top_offset, frame_crop_bottom_offset + pSps->bFrameCroppingFlag = WelsGetPaddingOffset (pLayerParamInternal->iActualWidth, pLayerParamInternal->iActualHeight, + pLayerParam->iVideoWidth, pLayerParam->iVideoHeight, pSps->sFrameCrop); + } else { + pSps->bFrameCroppingFlag = false; + } + pSps->uiProfileIdc = pLayerParam->uiProfileIdc ? pLayerParam->uiProfileIdc : PRO_BASELINE; + if (pLayerParam->uiProfileIdc == PRO_BASELINE) { + pSps->bConstraintSet0Flag = true; + } + if (pLayerParam->uiProfileIdc <= PRO_MAIN) { + pSps->bConstraintSet1Flag = true; + } + if ((kiDlayerCount > 1) && bSVCBaselayer) { + pSps->bConstraintSet2Flag = true; + } + + ELevelIdc uiLevel = WelsGetLevelIdc (pSps, pLayerParamInternal->fOutputFrameRate, pLayerParam->iSpatialBitrate); + //update level + //for Scalable Baseline, Scalable High, and Scalable High Intra profiles.If level_idc is equal to 9, the indicated level is level 1b. + //for the Baseline, Constrained Baseline, Main, and Extended profiles,If level_idc is equal to 11 and constraint_set3_flag is equal to 1, the indicated level is level 1b. + if ((uiLevel == LEVEL_1_B) && + ((pSps->uiProfileIdc == PRO_BASELINE) || (pSps->uiProfileIdc == PRO_MAIN) || (pSps->uiProfileIdc == PRO_EXTENDED))) { + uiLevel = LEVEL_1_1; + pSps->bConstraintSet3Flag = true; + } + if ((pLayerParam->uiLevelIdc == LEVEL_UNKNOWN) || (pLayerParam->uiLevelIdc < uiLevel)) { + pLayerParam->uiLevelIdc = uiLevel; + } + pSps->iLevelIdc = pLayerParam->uiLevelIdc; + + //bGapsInFrameNumValueAllowedFlag is false when only spatial layer number and temporal layer number is 1, and ltr is 0. + if ((kiDlayerCount == 1) && (pSps->iNumRefFrames == 1)) + pSps->bGapsInFrameNumValueAllowedFlag = false; + else + pSps->bGapsInFrameNumValueAllowedFlag = true; + + pSps->bVuiParamPresentFlag = true; + + pSps->bAspectRatioPresent = pLayerParam->bAspectRatioPresent; + pSps->eAspectRatio = pLayerParam->eAspectRatio; + pSps->sAspectRatioExtWidth = pLayerParam->sAspectRatioExtWidth; + pSps->sAspectRatioExtHeight = pLayerParam->sAspectRatioExtHeight; + + // See codec_app_def.h and parameter_sets.h for more info about members bVideoSignalTypePresent through uiColorMatrix. + pSps->bVideoSignalTypePresent = pLayerParam->bVideoSignalTypePresent; + pSps->uiVideoFormat = pLayerParam->uiVideoFormat; + pSps->bFullRange = pLayerParam->bFullRange; + pSps->bColorDescriptionPresent = pLayerParam->bColorDescriptionPresent; + pSps->uiColorPrimaries = pLayerParam->uiColorPrimaries; + pSps->uiTransferCharacteristics = pLayerParam->uiTransferCharacteristics; + pSps->uiColorMatrix = pLayerParam->uiColorMatrix; + + return 0; +} + + +int32_t WelsInitSubsetSps (SSubsetSps* pSubsetSps, SSpatialLayerConfig* pLayerParam, + SSpatialLayerInternal* pLayerParamInternal, + const uint32_t kuiIntraPeriod, const int32_t kiNumRefFrame, + const uint32_t kuiSpsId, const bool kbEnableFrameCropping, bool bEnableRc, + const int32_t kiDlayerCount) { + SWelsSPS* pSps = &pSubsetSps->pSps; + + memset (pSubsetSps, 0, sizeof (SSubsetSps)); + + WelsInitSps (pSps, pLayerParam, pLayerParamInternal, kuiIntraPeriod, kiNumRefFrame, kuiSpsId, kbEnableFrameCropping, + bEnableRc, kiDlayerCount, false); + + pSps->uiProfileIdc = pLayerParam->uiProfileIdc ; + + pSubsetSps->sSpsSvcExt.iExtendedSpatialScalability = 0; /* ESS is 0 in default */ + pSubsetSps->sSpsSvcExt.bAdaptiveTcoeffLevelPredFlag = false; + pSubsetSps->sSpsSvcExt.bSeqTcoeffLevelPredFlag = false; + pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag = true; + + return 0; +} + +int32_t WelsInitPps (SWelsPPS* pPps, + SWelsSPS* pSps, + SSubsetSps* pSubsetSps, + const uint32_t kuiPpsId, + const bool kbDeblockingFilterPresentFlag, + const bool kbUsingSubsetSps, + const bool kbEntropyCodingModeFlag) { + SWelsSPS* pUsedSps = NULL; + if (pPps == NULL || (pSps == NULL && pSubsetSps == NULL)) + return 1; + if (!kbUsingSubsetSps) { + assert (pSps != NULL); + if (NULL == pSps) + return 1; + pUsedSps = pSps; + } else { + assert (pSubsetSps != NULL); + if (NULL == pSubsetSps) + return 1; + pUsedSps = &pSubsetSps->pSps; + } + + /* fill picture parameter set syntax */ + pPps->iPpsId = kuiPpsId; + pPps->iSpsId = pUsedSps->uiSpsId; + pPps->bEntropyCodingModeFlag = kbEntropyCodingModeFlag; +#if !defined(DISABLE_FMO_FEATURE) + pPps->uiNumSliceGroups = 1; //param->qos_param.sliceGroupCount; + if (pPps->uiNumSliceGroups > 1) { + pPps->uiSliceGroupMapType = 0; //param->qos_param.sliceGroupType; + if (pPps->uiSliceGroupMapType == 0) { + uint32_t uiGroup = 0; + while (uiGroup < pPps->uiNumSliceGroups) { + pPps->uiRunLength[uiGroup] = 25; + ++ uiGroup; + } + } else if (pPps->uiSliceGroupMapType == 2) { + memset (&pPps->uiTopLeft[0], 0, MAX_SLICEGROUP_IDS * sizeof (pPps->uiTopLeft[0])); + memset (&pPps->uiBottomRight[0], 0, MAX_SLICEGROUP_IDS * sizeof (pPps->uiBottomRight[0])); + } else if (pPps->uiSliceGroupMapType >= 3 && + pPps->uiSliceGroupMapType <= 5) { + pPps->bSliceGroupChangeDirectionFlag = false; + pPps->uiSliceGroupChangeRate = 0; + } else if (pPps->uiSliceGroupMapType == 6) { + pPps->uiPicSizeInMapUnits = 1; + memset (&pPps->uiSliceGroupId[0], 0, MAX_SLICEGROUP_IDS * sizeof (pPps->uiSliceGroupId[0])); + } + } +#endif//!DISABLE_FMO_FEATURE + + pPps->iPicInitQp = 26; + pPps->iPicInitQs = 26; + + pPps->uiChromaQpIndexOffset = 0; + pPps->bDeblockingFilterControlPresentFlag = kbDeblockingFilterPresentFlag; + + return 0; +} +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp new file mode 100644 index 000000000..8fd00ea61 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp @@ -0,0 +1,885 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file deblocking.c + * + * \brief Interfaces introduced in frame deblocking filtering + * + * \date 08/03/2009 Created + * + ************************************************************************************* + */ + +#include "deblocking.h" +#include "cpu_core.h" + +namespace WelsEnc { + +#define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)] +#define g_kiBetaTable(x) g_kiBetaTable[(x)] +#define g_kiTc0Table(x) g_kiTc0Table[(x)] + +#define MB_BS_MV(sCurMv, sNeighMv, uiBIdx, uiBnIdx) \ + (\ + ( WELS_ABS( sCurMv[uiBIdx].iMvX - sNeighMv[uiBnIdx].iMvX ) >= 4 ) ||\ + ( WELS_ABS( sCurMv[uiBIdx].iMvY - sNeighMv[uiBnIdx].iMvY ) >= 4 )\ + ) + +#define SMB_EDGE_MV(uiRefIndex, sMotionVector, uiBIdx, uiBnIdx) \ + (\ + !!((WELS_ABS(sMotionVector[uiBIdx].iMvX - sMotionVector[uiBnIdx].iMvX) &(~3)) | (WELS_ABS(sMotionVector[uiBIdx].iMvY - sMotionVector[uiBnIdx].iMvY) &(~3)))\ + ) + +#define BS_EDGE(bsx1, uiRefIndex, sMotionVector, uiBIdx, uiBnIdx) \ + ( (bsx1|SMB_EDGE_MV(uiRefIndex, sMotionVector, uiBIdx, uiBnIdx))<<(bsx1?1:0)) + +#define GET_ALPHA_BETA_FROM_QP(QP, iAlphaOffset, iBetaOffset, iIdexA, iAlpha, iBeta) \ +{\ + iIdexA = (QP + iAlphaOffset);\ + iIdexA = CLIP3_QP_0_51(iIdexA);\ + iAlpha = g_kuiAlphaTable(iIdexA);\ + iBeta = g_kiBetaTable((CLIP3_QP_0_51(QP + iBetaOffset)));\ +} + +static const uint8_t g_kuiAlphaTable[52 + 12] = { //this table refers to Table 8-16 in H.264/AVC standard + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, + 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, + 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, + 80, 90, 101, 113, 127, 144, 162, 182, 203, 226, + 255, 255 + , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 +}; + +static const int8_t g_kiBetaTable[52 + 12] = { //this table refers to Table 8-16 in H.264/AVC standard + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, + 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, + 18, 18 + , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18 +}; + +static const int8_t g_kiTc0Table[52 + 12][4] = { //this table refers Table 8-17 in H.264/AVC standard + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, + { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 }, + { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 }, + { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, + { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 }, + { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 }, + { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 }, + { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 } + , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 } + , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 } +}; + +static const uint8_t g_kuiTableBIdx[2][8] = { + { + 0, 4, 8, 12, // g_kuiTableBIdx + 3, 7, 11, 15 + }, // table_bn_idx + + { + 0, 1, 2, 3 , // g_kuiTableBIdx + 12, 13, 14, 15 + }, // table_bn_idx +}; + +#define TC0_TBL_LOOKUP(iTc, iIdexA, pBS, bchroma) \ +{\ + iTc[0] = g_kiTc0Table(iIdexA)[pBS[0]] + bchroma;\ + iTc[1] = g_kiTc0Table(iIdexA)[pBS[1]] + bchroma;\ + iTc[2] = g_kiTc0Table(iIdexA)[pBS[2]] + bchroma;\ + iTc[3] = g_kiTc0Table(iIdexA)[pBS[3]] + bchroma;\ +} + +void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4], int32_t iLShiftFactor) { + uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; + + uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); + uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); + uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); + uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); + + uiBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor; + uiBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor; + uiBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor; + + uiBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor; + uiBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor; + uiBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor; + * (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor; + + uiBS[0][1][2] = (pNnzTab[8] | pNnzTab[9]) << iLShiftFactor; + uiBS[0][2][2] = (pNnzTab[9] | pNnzTab[10]) << iLShiftFactor; + uiBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor; + * (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor; + + uiBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor; + uiBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor; + uiBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor; + * (uint32_t*)uiBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor; + +} + +void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8_t* pNnzTab) { + uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); + + uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); + uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); + uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); + uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; + uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0); + uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1); + uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; + uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4); + uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5); + uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; + uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8); + uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9); + uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10); + + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; + uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12); + uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13); + uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14); + + //horizontal + * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); + uiBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 4, 0); + uiBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 5, 1); + uiBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 6, 2); + uiBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 7, 3); + + * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); + uiBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 8, 4); + uiBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 9, 5); + uiBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 10, 6); + uiBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 11, 7); + + * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); + uiBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 12, 8); + uiBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 13, 9); + uiBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 14, 10); + uiBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 15, 11); +} + +uint32_t DeblockingBSMarginalMBAvcbase (SMB* pCurMb, SMB* pNeighMb, int32_t iEdge) { + int32_t i; + uint32_t uiBSx4; + uint8_t* pBS = (uint8_t*) (&uiBSx4); + const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0]; + const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4]; + + + for (i = 0; i < 4; i++) { + if (pCurMb->pNonZeroCount[*pBIdx] | pNeighMb->pNonZeroCount[*pBnIdx]) { + pBS[i] = 2; + } else { + pBS[i] = +#ifndef SINGLE_REF_FRAME + (pCurMb->uiRefIndex[g_kiTableBlock8x8Idx[1][iEdge][i]] - pNeighMb->uiRefIndex[g_kiTableBlock8x8NIdx[1][iEdge][i]]) || +#endif + MB_BS_MV (pCurMb->sMv, pNeighMb->sMv, *pBIdx, *pBnIdx); + } + pBIdx++; + pBnIdx++; + } + return uiBSx4; +} + +void FilteringEdgeLumaH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, + uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + + GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 0); + pfDeblocking->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, iTc); + } + return; +} +void FilteringEdgeLumaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, + uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + + GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 0); + pfDeblocking->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, iTc); + } + return; +} + +void FilteringEdgeLumaIntraH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, + uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + + GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + pfDeblocking->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta); + } + return; +} + +void FilteringEdgeLumaIntraV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, + uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + + GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + pfDeblocking->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta); + } + return; +} +void FilteringEdgeChromaH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, + int32_t iStride, uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + + GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1); + pfDeblocking->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc); + } + return; +} +void FilteringEdgeChromaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, + int32_t iStride, uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + + GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1); + pfDeblocking->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc); + } + return; +} + +void FilteringEdgeChromaIntraH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, + uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + + GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + pfDeblocking->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta); + } + return; +} + +void FilteringEdgeChromaIntraV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, + uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) { + int32_t iIdexA; + int32_t iAlpha; + int32_t iBeta; + + GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + + if (iAlpha | iBeta) { + pfDeblocking->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta); + } + return; +} + +void DeblockingInterMb (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter, uint8_t uiBS[2][4][4]) { + int8_t iCurLumaQp = pCurMb->uiLumaQp; + int8_t iCurChromaQp = pCurMb->uiChromaQp; + int32_t iLineSize = pFilter->iCsStride[0]; + int32_t iLineSizeUV = pFilter->iCsStride[1]; + int32_t iMbStride = pFilter->iMbStride; + + int32_t iMbX = pCurMb->iMbX; + int32_t iMbY = pCurMb->iMbY; + + bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))}; + bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))}; + + int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc]; + int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc]; + + uint8_t* pDestY, *pDestCb, *pDestCr; + pDestY = pFilter->pCsData[0]; + pDestCb = pFilter->pCsData[1]; + pDestCr = pFilter->pCsData[2]; + + if (iLeftFlag) { + pFilter->uiLumaQP = (iCurLumaQp + (pCurMb - 1)->uiLumaQp + 1) >> 1; + pFilter->uiChromaQP = (iCurChromaQp + (pCurMb - 1)->uiChromaQp + 1) >> 1; + + if (uiBS[0][0][0] == 0x04) { + FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize , NULL); + FilteringEdgeChromaIntraV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, NULL); + } else { + if (* (uint32_t*)uiBS[0][0] != 0) { + FilteringEdgeLumaV (pfDeblocking, pFilter, pDestY, iLineSize, uiBS[0][0]); + FilteringEdgeChromaV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, uiBS[0][0]); + } + } + } + + pFilter->uiLumaQP = iCurLumaQp; + pFilter->uiChromaQP = iCurChromaQp; + + if (* (uint32_t*)uiBS[0][1] != 0) { + FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[1 << 2], iLineSize, uiBS[0][1]); + } + + if (* (uint32_t*)uiBS[0][2] != 0) { + FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[2 << 2], iLineSize, uiBS[0][2]); + FilteringEdgeChromaV (pfDeblocking, pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, uiBS[0][2]); + } + + if (* (uint32_t*)uiBS[0][3] != 0) { + FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[3 << 2], iLineSize, uiBS[0][3]); + } + + if (iTopFlag) { + pFilter->uiLumaQP = (iCurLumaQp + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1; + pFilter->uiChromaQP = (iCurChromaQp + (pCurMb - iMbStride)->uiChromaQp + 1) >> 1; + + if (uiBS[1][0][0] == 0x04) { + FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize , NULL); + FilteringEdgeChromaIntraH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, NULL); + } else { + if (* (uint32_t*)uiBS[1][0] != 0) { + FilteringEdgeLumaH (pfDeblocking, pFilter, pDestY, iLineSize, uiBS[1][0]); + FilteringEdgeChromaH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, uiBS[1][0]); + } + } + } + + pFilter->uiLumaQP = iCurLumaQp; + pFilter->uiChromaQP = iCurChromaQp; + + if (* (uint32_t*)uiBS[1][1] != 0) { + FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, uiBS[1][1]); + } + + if (* (uint32_t*)uiBS[1][2] != 0) { + FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, uiBS[1][2]); + FilteringEdgeChromaH (pfDeblocking, pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV], + iLineSizeUV, uiBS[1][2]); + } + + if (* (uint32_t*)uiBS[1][3] != 0) { + FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, uiBS[1][3]); + } +} + +void FilteringEdgeLumaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) { + int32_t iLineSize = pFilter->iCsStride[0]; + int32_t iMbStride = pFilter->iMbStride; + + uint8_t* pDestY; + int8_t iCurQp; + int32_t iIdexA, iAlpha, iBeta; + + int32_t iMbX = pCurMb->iMbX; + int32_t iMbY = pCurMb->iMbY; + + bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))}; + bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))}; + + int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc]; + int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc]; + + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); + + pDestY = pFilter->pCsData[0]; + iCurQp = pCurMb->uiLumaQp; + + * (uint32_t*)uiBSx4 = 0x03030303; + + // luma v + if (iLeftFlag) { + pFilter->uiLumaQP = (iCurQp + (pCurMb - 1)->uiLumaQp + 1) >> 1; + FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize, NULL); + } + + pFilter->uiLumaQP = iCurQp; + GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 0); + pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc); + pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc); + pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc); + + } + + // luma h + if (iTopFlag) { + pFilter->uiLumaQP = (iCurQp + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1; + FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize, NULL); + } + + pFilter->uiLumaQP = iCurQp; + if (iAlpha | iBeta) { + pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); + pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); + pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); + } +} +void FilteringEdgeChromaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) { + int32_t iLineSize = pFilter->iCsStride[1]; + int32_t iMbStride = pFilter->iMbStride; + + uint8_t* pDestCb, *pDestCr; + int8_t iCurQp; + int32_t iIdexA, iAlpha, iBeta; + + int32_t iMbX = pCurMb->iMbX; + int32_t iMbY = pCurMb->iMbY; + + bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))}; + bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))}; + + int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc]; + int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc]; + + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); + + pDestCb = pFilter->pCsData[1]; + pDestCr = pFilter->pCsData[2]; + iCurQp = pCurMb->uiChromaQp; + * (uint32_t*)uiBSx4 = 0x03030303; + + // chroma v + if (iLeftFlag) { + pFilter->uiChromaQP = (iCurQp + (pCurMb - 1)->uiChromaQp + 1) >> 1; + FilteringEdgeChromaIntraV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSize, NULL); + } + + pFilter->uiChromaQP = iCurQp; + GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, + iBeta); + if (iAlpha | iBeta) { + TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 1); + pfDeblocking->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc); + } + + // chroma h + if (iTopFlag) { + pFilter->uiChromaQP = (iCurQp + (pCurMb - iMbStride)->uiChromaQp + 1) >> 1; + FilteringEdgeChromaIntraH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSize, NULL); + } + + pFilter->uiChromaQP = iCurQp; + if (iAlpha | iBeta) { + pfDeblocking->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize, iAlpha, + iBeta, iTc); + } +} + +// merge h&v lookup table operation to save performance +void DeblockingIntraMb (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) { + FilteringEdgeLumaHV (pfDeblocking, pCurMb, pFilter); + FilteringEdgeChromaHV (pfDeblocking, pCurMb, pFilter); +} + +#if defined(HAVE_NEON) && defined(SINGLE_REF_FRAME) +void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType, + int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) { + DeblockingBSCalcEnc_neon (pCurMb->pNonZeroCount, pCurMb->sMv, + (iLeftFlag ? LEFT_MB_POS : 0) | (iTopFlag ? TOP_MB_POS : 0), iMbStride, uiBS); + if (iLeftFlag) { + if (IS_INTRA ((pCurMb - 1)->uiMbType)) { + * (uint32_t*)uiBS[0][0] = 0x04040404; + } + } else { + * (uint32_t*)uiBS[0][0] = 0; + } + if (iTopFlag) { + if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) { + * (uint32_t*)uiBS[1][0] = 0x04040404; + } + } else { + * (uint32_t*)uiBS[1][0] = 0; + } +} +#endif + +#if defined(HAVE_NEON_AARCH64) && defined(SINGLE_REF_FRAME) +void DeblockingBSCalc_AArch64_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType, + int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) { + DeblockingBSCalcEnc_AArch64_neon (pCurMb->pNonZeroCount, pCurMb->sMv, + (iLeftFlag ? LEFT_MB_POS : 0) | (iTopFlag ? TOP_MB_POS : 0), iMbStride, uiBS); + if (iLeftFlag) { + if (IS_INTRA ((pCurMb - 1)->uiMbType)) { + * (uint32_t*)uiBS[0][0] = 0x04040404; + } + } else { + * (uint32_t*)uiBS[0][0] = 0; + } + if (iTopFlag) { + if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) { + * (uint32_t*)uiBS[1][0] = 0x04040404; + } + } else { + * (uint32_t*)uiBS[1][0] = 0; + } +} +#endif + +void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType, + int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) { + if (iLeftFlag) { + * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb, + pCurMb - 1, 0); + } else { + * (uint32_t*)uiBS[0][0] = 0; + } + if (iTopFlag) { + * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase ( + pCurMb, (pCurMb - iMbStride), 1); + } else { + * (uint32_t*)uiBS[1][0] = 0; + } + //SKIP MB_16x16 or others + if (uiCurMbType != MB_TYPE_SKIP) { + pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti! + + if (uiCurMbType == MB_TYPE_16x16) { + DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1); + } else { + DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount); + } + } else { + * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] = + * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0; + } +} + +void DeblockingMbAvcbase (SWelsFuncPtrList* pFunc, SMB* pCurMb, SDeblockingFilter* pFilter) { + uint8_t uiBS[2][4][4] = {{{ 0 }}}; + + Mb_Type uiCurMbType = pCurMb->uiMbType; + int32_t iMbStride = pFilter->iMbStride; + + int32_t iMbX = pCurMb->iMbX; + int32_t iMbY = pCurMb->iMbY; + + bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))}; + bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))}; + + int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc]; + int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc]; + + switch (uiCurMbType) { + case MB_TYPE_INTRA4x4: + case MB_TYPE_INTRA16x16: + case MB_TYPE_INTRA_PCM: + DeblockingIntraMb (&pFunc->pfDeblocking, pCurMb, pFilter); + break; + default: + pFunc->pfDeblocking.pfDeblockingBSCalc (pFunc, pCurMb, uiBS, uiCurMbType, iMbStride, iLeftFlag, iTopFlag); + DeblockingInterMb (&pFunc->pfDeblocking, pCurMb, pFilter, uiBS); + break; + } +} + +void DeblockingFilterFrameAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc) { + int32_t i, j; + const int32_t kiMbWidth = pCurDq->iMbWidth; + const int32_t kiMbHeight = pCurDq->iMbHeight; + SMB* pCurrentMbBlock = pCurDq->sMbDataP; + SSliceHeaderExt* sSliceHeaderExt = &pCurDq->ppSliceInLayer[0]->sSliceHeaderExt; + SDeblockingFilter pFilter; + + /* Step1: parameters set */ + if (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc == 1) + return; + + pFilter.uiFilterIdc = (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc != 0); + + pFilter.iCsStride[0] = pCurDq->pDecPic->iLineSize[0]; + pFilter.iCsStride[1] = pCurDq->pDecPic->iLineSize[1]; + pFilter.iCsStride[2] = pCurDq->pDecPic->iLineSize[2]; + + pFilter.iSliceAlphaC0Offset = sSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset; + pFilter.iSliceBetaOffset = sSliceHeaderExt->sSliceHeader.iSliceBetaOffset; + + pFilter.iMbStride = kiMbWidth; + + for (j = 0; j < kiMbHeight; ++j) { + pFilter.pCsData[0] = pCurDq->pDecPic->pData[0] + ((j * pFilter.iCsStride[0]) << 4); + pFilter.pCsData[1] = pCurDq->pDecPic->pData[1] + ((j * pFilter.iCsStride[1]) << 3); + pFilter.pCsData[2] = pCurDq->pDecPic->pData[2] + ((j * pFilter.iCsStride[2]) << 3); + for (i = 0; i < kiMbWidth; i++) { + DeblockingMbAvcbase (pFunc, pCurrentMbBlock, &pFilter); + ++pCurrentMbBlock; + pFilter.pCsData[0] += MB_WIDTH_LUMA; + pFilter.pCsData[1] += MB_WIDTH_CHROMA; + pFilter.pCsData[2] += MB_WIDTH_CHROMA; + } + } +} + +void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) { + SMB* pMbList = pCurDq->sMbDataP; + SSliceHeaderExt* sSliceHeaderExt = &pSlice->sSliceHeaderExt; + SMB* pCurrentMbBlock; + + const int32_t kiMbWidth = pCurDq->iMbWidth; + const int32_t kiMbHeight = pCurDq->iMbHeight; + const int32_t kiTotalNumMb = kiMbWidth * kiMbHeight; + int32_t iCurMbIdx = 0, iNextMbIdx = 0, iNumMbFiltered = 0; + + /* Step1: parameters set */ + if (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc == 1) + return; + + SDeblockingFilter pFilter; + + pFilter.uiFilterIdc = (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc != 0); + pFilter.iCsStride[0] = pCurDq->pDecPic->iLineSize[0]; + pFilter.iCsStride[1] = pCurDq->pDecPic->iLineSize[1]; + pFilter.iCsStride[2] = pCurDq->pDecPic->iLineSize[2]; + pFilter.iSliceAlphaC0Offset = sSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset; + pFilter.iSliceBetaOffset = sSliceHeaderExt->sSliceHeader.iSliceBetaOffset; + pFilter.iMbStride = kiMbWidth; + + iNextMbIdx = sSliceHeaderExt->sSliceHeader.iFirstMbInSlice; + + for (; ;) { + iCurMbIdx = iNextMbIdx; + pCurrentMbBlock = &pMbList[ iCurMbIdx ]; + + pFilter.pCsData[0] = pCurDq->pDecPic->pData[0] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[0]) + << 4); + pFilter.pCsData[1] = pCurDq->pDecPic->pData[1] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[1]) + << 3); + pFilter.pCsData[2] = pCurDq->pDecPic->pData[2] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[2]) + << 3); + + DeblockingMbAvcbase (pFunc, pCurrentMbBlock, &pFilter); + + ++iNumMbFiltered; + iNextMbIdx = WelsGetNextMbOfSlice (pCurDq, iCurMbIdx); + //whether all of MB in current slice filtered or not + if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbFiltered >= kiTotalNumMb) { + break; + } + } +} + +void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) { +} + +void PerformDeblockingFilter (sWelsEncCtx* pEnc) { + SDqLayer* pCurLayer = pEnc->pCurDqLayer; + SSlice* pSlice = NULL; + + if (pCurLayer->iLoopFilterDisableIdc == 0) { + DeblockingFilterFrameAvcbase (pCurLayer, pEnc->pFuncList); + } else if (pCurLayer->iLoopFilterDisableIdc == 2) { + int32_t iSliceCount = 0; + int32_t iSliceIdx = 0; + + iSliceCount = GetCurrentSliceNum (pCurLayer); + do { + pSlice = pCurLayer->ppSliceInLayer[iSliceIdx]; + assert (NULL != pSlice); + DeblockingFilterSliceAvcbase (pCurLayer, pEnc->pFuncList, pSlice); + ++ iSliceIdx; + } while (iSliceIdx < iSliceCount); + } +} + +void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu) { + *pfSetNZCZero = WelsNonZeroCount_c; +#ifdef HAVE_NEON + if (iCpu & WELS_CPU_NEON) { + *pfSetNZCZero = WelsNonZeroCount_neon; + } +#endif +#ifdef HAVE_NEON_AARCH64 + if (iCpu & WELS_CPU_NEON) { + *pfSetNZCZero = WelsNonZeroCount_AArch64_neon; + } +#endif +#if defined(X86_ASM) + if (iCpu & WELS_CPU_SSE2) { + *pfSetNZCZero = WelsNonZeroCount_sse2; + } +#endif +#if defined(HAVE_MMI) + if (iCpu & WELS_CPU_MMI) { + *pfSetNZCZero = WelsNonZeroCount_mmi; + } +#endif +#if defined(HAVE_MSA) + if (iCpu & WELS_CPU_MSA) { + *pfSetNZCZero = WelsNonZeroCount_msa; + } +#endif +} + +void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_c; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_c; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_c; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_c; + + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_c; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_c; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_c; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_c; + + pFunc->pfDeblockingBSCalc = DeblockingBSCalc_c; + + +#ifdef X86_ASM + if (iCpu & WELS_CPU_SSSE3) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3; + } +#endif + +#if defined(HAVE_NEON) + if (iCpu & WELS_CPU_NEON) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_neon; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_neon; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_neon; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_neon; + + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon; + +#if defined(SINGLE_REF_FRAME) + pFunc->pfDeblockingBSCalc = DeblockingBSCalc_neon; +#endif + } +#endif + +#if defined(HAVE_NEON_AARCH64) + if (iCpu & WELS_CPU_NEON) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon; + + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon; + +#if defined(SINGLE_REF_FRAME) + pFunc->pfDeblockingBSCalc = DeblockingBSCalc_AArch64_neon; +#endif + } +#endif + +#if defined(HAVE_MMI) + if (iCpu & WELS_CPU_MMI) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_mmi; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_mmi; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_mmi; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_mmi; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_mmi; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_mmi; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_mmi; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi; + } +#endif//HAVE_MMI + +#if defined(HAVE_MSA) + if (iCpu & WELS_CPU_MSA) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa; + } +#endif//HAVE_MSA +} + + +} // namespace WelsEnc + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/decode_mb_aux.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/decode_mb_aux.cpp new file mode 100644 index 000000000..e139c76da --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/decode_mb_aux.cpp @@ -0,0 +1,314 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "decode_mb_aux.h" +#include "cpu_core.h" + +namespace WelsEnc { +/**************************************************************************** + * Dequant and Ihdm functions + ****************************************************************************/ +void WelsIHadamard4x4Dc (int16_t* pRes) { //pBuffer size : 4x4 + int16_t iTemp[4]; + int32_t i = 4; + + while (--i >= 0) { + const int32_t kiIdx = i << 2; + const int32_t kiIdx1 = 1 + kiIdx; + const int32_t kiIdx2 = 1 + kiIdx1; + const int32_t kiIdx3 = 1 + kiIdx2; + + iTemp[0] = pRes[kiIdx ] + pRes[kiIdx2]; + iTemp[1] = pRes[kiIdx ] - pRes[kiIdx2]; + iTemp[2] = pRes[kiIdx1] - pRes[kiIdx3]; + iTemp[3] = pRes[kiIdx1] + pRes[kiIdx3]; + + pRes[kiIdx ] = iTemp[0] + iTemp[3]; + pRes[kiIdx1] = iTemp[1] + iTemp[2]; + pRes[kiIdx2] = iTemp[1] - iTemp[2]; + pRes[kiIdx3] = iTemp[0] - iTemp[3]; + } + + i = 4; + while (--i >= 0) { + const int32_t kiI4 = 4 + i; + const int32_t kiI8 = 4 + kiI4; + const int32_t kiI12 = 4 + kiI8; + + iTemp[0] = pRes[i ] + pRes[kiI8 ]; + iTemp[1] = pRes[i ] - pRes[kiI8 ]; + iTemp[2] = pRes[kiI4 ] - pRes[kiI12]; + iTemp[3] = pRes[kiI4 ] + pRes[kiI12]; + + pRes[i ] = iTemp[0] + iTemp[3]; + pRes[kiI4 ] = iTemp[1] + iTemp[2]; + pRes[kiI8 ] = iTemp[1] - iTemp[2]; + pRes[kiI12] = iTemp[0] - iTemp[3]; + } +} + +/* for qp < 12 */ +void WelsDequantLumaDc4x4 (int16_t* pRes, const int32_t kiQp) { + int32_t i = 15; + const uint16_t kuiDequantValue = g_kuiDequantCoeff[kiQp % 6][0]; + const int16_t kiQF0 = kiQp / 6; + const int16_t kiQF1 = 2 - kiQF0; + const int16_t kiQF0S = 1 << (1 - kiQF0); + + while (i >= 0) { + pRes[i ] = (pRes[i ] * kuiDequantValue + kiQF0S) >> kiQF1; + pRes[i - 1] = (pRes[i - 1] * kuiDequantValue + kiQF0S) >> kiQF1; + pRes[i - 2] = (pRes[i - 2] * kuiDequantValue + kiQF0S) >> kiQF1; + pRes[i - 3] = (pRes[i - 3] * kuiDequantValue + kiQF0S) >> kiQF1; + + i -= 4; + } +} + +/* for qp >= 12 */ +void WelsDequantIHadamard4x4_c (int16_t* pRes, const uint16_t kuiMF) { + int16_t iTemp[4]; + int32_t i; + + for (i = 0; i < 16; i += 4) { + iTemp[0] = pRes[i ] + pRes[i + 2]; + iTemp[1] = pRes[i ] - pRes[i + 2]; + iTemp[2] = pRes[i + 1] - pRes[i + 3]; + iTemp[3] = pRes[i + 1] + pRes[i + 3]; + + pRes[i ] = iTemp[0] + iTemp[3]; + pRes[i + 1] = iTemp[1] + iTemp[2]; + pRes[i + 2] = iTemp[1] - iTemp[2]; + pRes[i + 3] = iTemp[0] - iTemp[3]; + } + + for (i = 0; i < 4; i++) { + iTemp[0] = pRes[i ] + pRes[i + 8 ]; + iTemp[1] = pRes[i ] - pRes[i + 8 ]; + iTemp[2] = pRes[i + 4 ] - pRes[i + 12]; + iTemp[3] = pRes[i + 4 ] + pRes[i + 12]; + + pRes[i ] = (iTemp[0] + iTemp[3]) * kuiMF; + pRes[i + 4 ] = (iTemp[1] + iTemp[2]) * kuiMF; + pRes[i + 8 ] = (iTemp[1] - iTemp[2]) * kuiMF; + pRes[i + 12] = (iTemp[0] - iTemp[3]) * kuiMF; + } +} + +void WelsDequantIHadamard2x2Dc (int16_t* pDct, const uint16_t kuiMF) { + const int16_t kiSumU = pDct[0] + pDct[2]; + const int16_t kiDelU = pDct[0] - pDct[2]; + const int16_t kiSumD = pDct[1] + pDct[3]; + const int16_t kiDelD = pDct[1] - pDct[3]; + + pDct[0] = ((kiSumU + kiSumD) * kuiMF) >> 1; + pDct[1] = ((kiSumU - kiSumD) * kuiMF) >> 1; + pDct[2] = ((kiDelU + kiDelD) * kuiMF) >> 1; + pDct[3] = ((kiDelU - kiDelD) * kuiMF) >> 1; +} + +void WelsDequant4x4_c (int16_t* pRes, const uint16_t* kpMF) { + int32_t i; + for (i = 0; i < 8; i++) { + pRes[i] *= kpMF[i]; + pRes[i + 8] *= kpMF[i]; + } +} + +void WelsDequantFour4x4_c (int16_t* pRes, const uint16_t* kpMF) { + int32_t i; + for (i = 0; i < 8; i++) { + pRes[i] *= kpMF[i]; + pRes[i + 8] *= kpMF[i]; + pRes[i + 16] *= kpMF[i]; + pRes[i + 24] *= kpMF[i]; + pRes[i + 32] *= kpMF[i]; + pRes[i + 40] *= kpMF[i]; + pRes[i + 48] *= kpMF[i]; + pRes[i + 56] *= kpMF[i]; + } +} + +/**************************************************************************** + * IDCT functions, final output = prediction(CS) + IDCT(scaled_coeff) + ****************************************************************************/ +void WelsIDctT4Rec_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct) { + int32_t i; + int16_t iTemp[16]; + + int32_t iDstStridex2 = iStride << 1; + int32_t iDstStridex3 = iStride + iDstStridex2; + int32_t iPredStridex2 = iPredStride << 1; + int32_t iPredStridex3 = iPredStride + iPredStridex2; + + for (i = 0; i < 4; i ++) { //horizon + int32_t iIdx = i << 2; + const int32_t kiHorSumU = pDct[iIdx] + pDct[iIdx + 2]; // add 0-2 + const int32_t kiHorDelU = pDct[iIdx] - pDct[iIdx + 2]; // sub 0-2 + const int32_t kiHorSumD = pDct[iIdx + 1] + (pDct[iIdx + 3] >> 1); + const int32_t kiHorDelD = (pDct[iIdx + 1] >> 1) - pDct[iIdx + 3]; + + iTemp[iIdx ] = kiHorSumU + kiHorSumD; + iTemp[iIdx + 1] = kiHorDelU + kiHorDelD; + iTemp[iIdx + 2] = kiHorDelU - kiHorDelD; + iTemp[iIdx + 3] = kiHorSumU - kiHorSumD; + } + + for (i = 0; i < 4; i ++) { //vertical + const int32_t kiVerSumL = iTemp[i] + iTemp[8 + i]; + const int32_t kiVerDelL = iTemp[i] - iTemp[8 + i]; + const int32_t kiVerDelR = (iTemp[4 + i] >> 1) - iTemp[12 + i]; + const int32_t kiVerSumR = iTemp[4 + i] + (iTemp[12 + i] >> 1); + + pRec[i ] = WelsClip1 (pPred[i ] + ((kiVerSumL + kiVerSumR + 32) >> 6)); + pRec[iStride + i ] = WelsClip1 (pPred[iPredStride + i ] + ((kiVerDelL + kiVerDelR + 32) >> 6)); + pRec[iDstStridex2 + i] = WelsClip1 (pPred[iPredStridex2 + i] + ((kiVerDelL - kiVerDelR + 32) >> 6)); + pRec[iDstStridex3 + i] = WelsClip1 (pPred[iPredStridex3 + i] + ((kiVerSumL - kiVerSumR + 32) >> 6)); + } +} + +void WelsIDctFourT4Rec_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct) { + int32_t iDstStridex4 = iStride << 2; + int32_t iPredStridex4 = iPredStride << 2; + WelsIDctT4Rec_c (pRec, iStride, pPred, iPredStride, pDct); + WelsIDctT4Rec_c (&pRec[4], iStride, &pPred[4], iPredStride, pDct + 16); + WelsIDctT4Rec_c (&pRec[iDstStridex4 ], iStride, &pPred[iPredStridex4 ], iPredStride, pDct + 32); + WelsIDctT4Rec_c (&pRec[iDstStridex4 + 4], iStride, &pPred[iPredStridex4 + 4], iPredStride, pDct + 48); + +} + +void WelsIDctT4RecOnMb (uint8_t* pDst, int32_t iDstStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct, + PIDctFunc pfIDctFourT4) { + int32_t iDstStridex8 = iDstStride << 3; + int32_t iPredStridex8 = iPredStride << 3; + + pfIDctFourT4 (&pDst[0], iDstStride, &pPred[0], iPredStride, pDct); + pfIDctFourT4 (&pDst[8], iDstStride, &pPred[8], iPredStride, pDct + 64); + pfIDctFourT4 (&pDst[iDstStridex8], iDstStride, &pPred[iPredStridex8], iPredStride, pDct + 128); + pfIDctFourT4 (&pDst[iDstStridex8 + 8], iDstStride, &pPred[iPredStridex8 + 8], iPredStride, pDct + 192); +} + +/* + * pfIDctI16x16Dc: do luma idct of an MB for I16x16 mode, when only dc value are non-zero + */ +void WelsIDctRecI16x16Dc_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDctDc) { + int32_t i, j; + + for (i = 0; i < 16; i ++) { + for (j = 0; j < 16; j++) { + pRec[j] = WelsClip1 (pPred[j] + ((pDctDc[ (i & 0x0C) + (j >> 2)] + 32) >> 6)); + } + pRec += iStride; + pPred += iPredStride; + } +} + +void WelsGetEncBlockStrideOffset (int32_t* pBlock, const int32_t kiStrideY, const int32_t kiStrideUV) { + int32_t i, j, k, r; + for (j = 0; j < 4; j++) { + i = j << 2; + k = (j & 0x01) << 1; + r = j & 0x02; + pBlock[i] = (0 + k + (0 + r) * kiStrideY) << 2; + pBlock[i + 1] = (1 + k + (0 + r) * kiStrideY) << 2; + pBlock[i + 2] = (0 + k + (1 + r) * kiStrideY) << 2; + pBlock[i + 3] = (1 + k + (1 + r) * kiStrideY) << 2; + + pBlock[16 + j] = + pBlock[20 + j] = ((j & 0x01) + r * kiStrideUV) << 2; + } +} + +void WelsInitReconstructionFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { + pFuncList->pfDequantization4x4 = WelsDequant4x4_c; + pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_c; + pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_c; + + pFuncList->pfIDctT4 = WelsIDctT4Rec_c; + pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_c; + pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_c; + +#if defined(X86_ASM) + if (uiCpuFlag & WELS_CPU_MMXEXT) { + pFuncList->pfIDctT4 = WelsIDctT4Rec_mmx; + } + if (uiCpuFlag & WELS_CPU_SSE2) { + pFuncList->pfDequantization4x4 = WelsDequant4x4_sse2; + pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_sse2; + pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_sse2; + + pFuncList->pfIDctT4 = WelsIDctT4Rec_sse2; + pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_sse2; + pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_sse2; + } +#if defined(HAVE_AVX2) + if (uiCpuFlag & WELS_CPU_AVX2) { + pFuncList->pfIDctT4 = WelsIDctT4Rec_avx2; + pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_avx2; + } +#endif + +#endif//X86_ASM + +#if defined(HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfDequantization4x4 = WelsDequant4x4_neon; + pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_neon; + pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_neon; + + pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_neon; + pFuncList->pfIDctT4 = WelsIDctT4Rec_neon; + pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_neon; + } +#endif + +#if defined(HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfDequantization4x4 = WelsDequant4x4_AArch64_neon; + pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_AArch64_neon; + pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_AArch64_neon; + + pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_AArch64_neon; + pFuncList->pfIDctT4 = WelsIDctT4Rec_AArch64_neon; + pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_AArch64_neon; + } +#endif + +#if defined(HAVE_MMI) + if (uiCpuFlag & WELS_CPU_MMI) { + pFuncList->pfIDctT4 = WelsIDctT4Rec_mmi; + pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_mmi; + pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_mmi; + } +#endif//HAVE_MMI +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp new file mode 100644 index 000000000..f9bc6c476 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp @@ -0,0 +1,627 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#include "ls_defines.h" +#include "encode_mb_aux.h" +#include "cpu_core.h" +namespace WelsEnc { + +ALIGNED_DECLARE (const int16_t, g_kiQuantInterFF[58][8], 16) = { + /* 0*/ { 0, 1, 0, 1, 1, 1, 1, 1 }, + /* 1*/ { 0, 1, 0, 1, 1, 1, 1, 1 }, + /* 2*/ { 1, 1, 1, 1, 1, 1, 1, 1 }, + /* 3*/ { 1, 1, 1, 1, 1, 1, 1, 1 }, + /* 4*/ { 1, 1, 1, 1, 1, 2, 1, 2 }, + /* 5*/ { 1, 1, 1, 1, 1, 2, 1, 2 }, + /* 6*/ { 1, 1, 1, 1, 1, 2, 1, 2 }, + /* 7*/ { 1, 1, 1, 1, 1, 2, 1, 2 }, + /* 8*/ { 1, 2, 1, 2, 2, 3, 2, 3 }, + /* 9*/ { 1, 2, 1, 2, 2, 3, 2, 3 }, + /*10*/ { 1, 2, 1, 2, 2, 3, 2, 3 }, + /*11*/ { 1, 2, 1, 2, 2, 4, 2, 4 }, + /*12*/ { 2, 3, 2, 3, 3, 4, 3, 4 }, + /*13*/ { 2, 3, 2, 3, 3, 5, 3, 5 }, + /*14*/ { 2, 3, 2, 3, 3, 5, 3, 5 }, + /*15*/ { 2, 4, 2, 4, 4, 6, 4, 6 }, + /*16*/ { 3, 4, 3, 4, 4, 7, 4, 7 }, + /*17*/ { 3, 5, 3, 5, 5, 8, 5, 8 }, + /*18*/ { 3, 5, 3, 5, 5, 8, 5, 8 }, + /*19*/ { 4, 6, 4, 6, 6, 9, 6, 9 }, + /*20*/ { 4, 7, 4, 7, 7, 10, 7, 10 }, + /*21*/ { 5, 8, 5, 8, 8, 12, 8, 12 }, + /*22*/ { 5, 8, 5, 8, 8, 13, 8, 13 }, + /*23*/ { 6, 10, 6, 10, 10, 15, 10, 15 }, + /*24*/ { 7, 11, 7, 11, 11, 17, 11, 17 }, + /*25*/ { 7, 12, 7, 12, 12, 19, 12, 19 }, + /*26*/ { 9, 13, 9, 13, 13, 21, 13, 21 }, + /*27*/ { 9, 15, 9, 15, 15, 24, 15, 24 }, + /*28*/ { 11, 17, 11, 17, 17, 26, 17, 26 }, + /*29*/ { 12, 19, 12, 19, 19, 30, 19, 30 }, + /*30*/ { 13, 22, 13, 22, 22, 33, 22, 33 }, + /*31*/ { 15, 23, 15, 23, 23, 38, 23, 38 }, + /*32*/ { 17, 27, 17, 27, 27, 42, 27, 42 }, + /*33*/ { 19, 30, 19, 30, 30, 48, 30, 48 }, + /*34*/ { 21, 33, 21, 33, 33, 52, 33, 52 }, + /*35*/ { 24, 38, 24, 38, 38, 60, 38, 60 }, + /*36*/ { 27, 43, 27, 43, 43, 67, 43, 67 }, + /*37*/ { 29, 47, 29, 47, 47, 75, 47, 75 }, + /*38*/ { 35, 53, 35, 53, 53, 83, 53, 83 }, + /*39*/ { 37, 60, 37, 60, 60, 96, 60, 96 }, + /*40*/ { 43, 67, 43, 67, 67, 104, 67, 104 }, + /*41*/ { 48, 77, 48, 77, 77, 121, 77, 121 }, + /*42*/ { 53, 87, 53, 87, 87, 133, 87, 133 }, + /*43*/ { 59, 93, 59, 93, 93, 150, 93, 150 }, + /*44*/ { 69, 107, 69, 107, 107, 167, 107, 167 }, + /*45*/ { 75, 120, 75, 120, 120, 192, 120, 192 }, + /*46*/ { 85, 133, 85, 133, 133, 208, 133, 208 }, + /*47*/ { 96, 153, 96, 153, 153, 242, 153, 242 }, + /*48*/ { 107, 173, 107, 173, 173, 267, 173, 267 }, + /*49*/ { 117, 187, 117, 187, 187, 300, 187, 300 }, + /*50*/ { 139, 213, 139, 213, 213, 333, 213, 333 }, + /*51*/ { 149, 240, 149, 240, 240, 383, 240, 383 }, + /* from here below is only for intra */ + /*46*/ { 171, 267, 171, 267, 267, 417, 267, 417 }, + /*47*/ { 192, 307, 192, 307, 307, 483, 307, 483 }, + /*48*/ { 213, 347, 213, 347, 347, 533, 347, 533 }, + /*49*/ { 235, 373, 235, 373, 373, 600, 373, 600 }, + /*50*/ { 277, 427, 277, 427, 427, 667, 427, 667 }, + /*51*/ { 299, 480, 299, 480, 480, 767, 480, 767 }, +}; + + + +ALIGNED_DECLARE (const int16_t, g_kiQuantMF[52][8], 16) = { + /* 0*/ {26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486 }, + /* 1*/ {23832, 14980, 23832, 14980, 14980, 9320, 14980, 9320 }, + /* 2*/ {20164, 13108, 20164, 13108, 13108, 8388, 13108, 8388 }, + /* 3*/ {18724, 11650, 18724, 11650, 11650, 7294, 11650, 7294 }, + /* 4*/ {16384, 10486, 16384, 10486, 10486, 6710, 10486, 6710 }, + /* 5*/ {14564, 9118, 14564, 9118, 9118, 5786, 9118, 5786 }, + /* 6*/ {13107, 8066, 13107, 8066, 8066, 5243, 8066, 5243 }, + /* 7*/ {11916, 7490, 11916, 7490, 7490, 4660, 7490, 4660 }, + /* 8*/ {10082, 6554, 10082, 6554, 6554, 4194, 6554, 4194 }, + /* 9*/ { 9362, 5825, 9362, 5825, 5825, 3647, 5825, 3647 }, + /*10*/ { 8192, 5243, 8192, 5243, 5243, 3355, 5243, 3355 }, + /*11*/ { 7282, 4559, 7282, 4559, 4559, 2893, 4559, 2893 }, + /*12*/ { 6554, 4033, 6554, 4033, 4033, 2622, 4033, 2622 }, + /*13*/ { 5958, 3745, 5958, 3745, 3745, 2330, 3745, 2330 }, + /*14*/ { 5041, 3277, 5041, 3277, 3277, 2097, 3277, 2097 }, + /*15*/ { 4681, 2913, 4681, 2913, 2913, 1824, 2913, 1824 }, + /*16*/ { 4096, 2622, 4096, 2622, 2622, 1678, 2622, 1678 }, + /*17*/ { 3641, 2280, 3641, 2280, 2280, 1447, 2280, 1447 }, + /*18*/ { 3277, 2017, 3277, 2017, 2017, 1311, 2017, 1311 }, + /*19*/ { 2979, 1873, 2979, 1873, 1873, 1165, 1873, 1165 }, + /*20*/ { 2521, 1639, 2521, 1639, 1639, 1049, 1639, 1049 }, + /*21*/ { 2341, 1456, 2341, 1456, 1456, 912, 1456, 912 }, + /*22*/ { 2048, 1311, 2048, 1311, 1311, 839, 1311, 839 }, + /*23*/ { 1821, 1140, 1821, 1140, 1140, 723, 1140, 723 }, + /*24*/ { 1638, 1008, 1638, 1008, 1008, 655, 1008, 655 }, + /*25*/ { 1490, 936, 1490, 936, 936, 583, 936, 583 }, + /*26*/ { 1260, 819, 1260, 819, 819, 524, 819, 524 }, + /*27*/ { 1170, 728, 1170, 728, 728, 456, 728, 456 }, + /*28*/ { 1024, 655, 1024, 655, 655, 419, 655, 419 }, + /*29*/ { 910, 570, 910, 570, 570, 362, 570, 362 }, + /*30*/ { 819, 504, 819, 504, 504, 328, 504, 328 }, + /*31*/ { 745, 468, 745, 468, 468, 291, 468, 291 }, + /*32*/ { 630, 410, 630, 410, 410, 262, 410, 262 }, + /*33*/ { 585, 364, 585, 364, 364, 228, 364, 228 }, + /*34*/ { 512, 328, 512, 328, 328, 210, 328, 210 }, + /*35*/ { 455, 285, 455, 285, 285, 181, 285, 181 }, + /*36*/ { 410, 252, 410, 252, 252, 164, 252, 164 }, + /*37*/ { 372, 234, 372, 234, 234, 146, 234, 146 }, + /*38*/ { 315, 205, 315, 205, 205, 131, 205, 131 }, + /*39*/ { 293, 182, 293, 182, 182, 114, 182, 114 }, + /*40*/ { 256, 164, 256, 164, 164, 105, 164, 105 }, + /*41*/ { 228, 142, 228, 142, 142, 90, 142, 90 }, + /*42*/ { 205, 126, 205, 126, 126, 82, 126, 82 }, + /*43*/ { 186, 117, 186, 117, 117, 73, 117, 73 }, + /*44*/ { 158, 102, 158, 102, 102, 66, 102, 66 }, + /*45*/ { 146, 91, 146, 91, 91, 57, 91, 57 }, + /*46*/ { 128, 82, 128, 82, 82, 52, 82, 52 }, + /*47*/ { 114, 71, 114, 71, 71, 45, 71, 45 }, + /*48*/ { 102, 63, 102, 63, 63, 41, 63, 41 }, + /*49*/ { 93, 59, 93, 59, 59, 36, 59, 36 }, + /*50*/ { 79, 51, 79, 51, 51, 33, 51, 33 }, + /*51*/ { 73, 46, 73, 46, 46, 28, 46, 28 } +}; + +/**************************************************************************** + * HDM and Quant functions + ****************************************************************************/ +#define WELS_ABS_LC(a) ((iSign ^ (int32_t)(a)) - iSign) +#define NEW_QUANT(pDct, iFF, iMF) (((iFF)+ WELS_ABS_LC(pDct))*(iMF)) >>16 +#define WELS_NEW_QUANT(pDct,iFF,iMF) WELS_ABS_LC(NEW_QUANT(pDct, iFF, iMF)) +void WelsQuant4x4_c (int16_t* pDct, const int16_t* pFF, const int16_t* pMF) { + int32_t i, j, iSign; + for (i = 0; i < 16; i += 4) { + j = i & 0x07; + iSign = WELS_SIGN (pDct[i]); + pDct[i] = WELS_NEW_QUANT (pDct[i], pFF[j], pMF[j]); + iSign = WELS_SIGN (pDct[i + 1]); + pDct[i + 1] = WELS_NEW_QUANT (pDct[i + 1], pFF[j + 1], pMF[j + 1]); + iSign = WELS_SIGN (pDct[i + 2]); + pDct[i + 2] = WELS_NEW_QUANT (pDct[i + 2], pFF[j + 2], pMF[j + 2]); + iSign = WELS_SIGN (pDct[i + 3]); + pDct[i + 3] = WELS_NEW_QUANT (pDct[i + 3], pFF[j + 3], pMF[j + 3]); + } +} + +void WelsQuant4x4Dc_c (int16_t* pDct, int16_t iFF, int16_t iMF) { + int32_t i, iSign; + for (i = 0; i < 16; i += 4) { + iSign = WELS_SIGN (pDct[i]); + pDct[i] = WELS_NEW_QUANT (pDct[i], iFF, iMF); + iSign = WELS_SIGN (pDct[i + 1]); + pDct[i + 1] = WELS_NEW_QUANT (pDct[i + 1], iFF, iMF); + iSign = WELS_SIGN (pDct[i + 2]); + pDct[i + 2] = WELS_NEW_QUANT (pDct[i + 2], iFF, iMF); + iSign = WELS_SIGN (pDct[i + 3]); + pDct[i + 3] = WELS_NEW_QUANT (pDct[i + 3], iFF, iMF); + } +} + +void WelsQuantFour4x4_c (int16_t* pDct, const int16_t* pFF, const int16_t* pMF) { + int32_t i, j, iSign; + + for (i = 0; i < 64; i += 4) { + j = i & 0x07; + iSign = WELS_SIGN (pDct[i]); + pDct[i] = WELS_NEW_QUANT (pDct[i], pFF[j], pMF[j]); + iSign = WELS_SIGN (pDct[i + 1]); + pDct[i + 1] = WELS_NEW_QUANT (pDct[i + 1], pFF[j + 1], pMF[j + 1]); + iSign = WELS_SIGN (pDct[i + 2]); + pDct[i + 2] = WELS_NEW_QUANT (pDct[i + 2], pFF[j + 2], pMF[j + 2]); + iSign = WELS_SIGN (pDct[i + 3]); + pDct[i + 3] = WELS_NEW_QUANT (pDct[i + 3], pFF[j + 3], pMF[j + 3]); + } +} + +void WelsQuantFour4x4Max_c (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax) { + int32_t i, j, k, iSign; + int16_t iMaxAbs; + for (k = 0; k < 4; k++) { + iMaxAbs = 0; + for (i = 0; i < 16; i++) { + j = i & 0x07; + iSign = WELS_SIGN (pDct[i]); + pDct[i] = NEW_QUANT (pDct[i], pFF[j], pMF[j]); + if (iMaxAbs < pDct[i]) iMaxAbs = pDct[i]; + pDct[i] = WELS_ABS_LC (pDct[i]); + } + pDct += 16; + pMax[k] = iMaxAbs; + } +} + +int32_t WelsHadamardQuant2x2Skip_c (int16_t* pRs, int16_t iFF, int16_t iMF) { + int16_t pDct[4], s[4]; + int16_t iThreshold = ((1 << 16) - 1) / iMF - iFF; + + s[0] = pRs[0] + pRs[32]; + s[1] = pRs[0] - pRs[32]; + s[2] = pRs[16] + pRs[48]; + s[3] = pRs[16] - pRs[48]; + + pDct[0] = s[0] + s[2]; + pDct[1] = s[0] - s[2]; + pDct[2] = s[1] + s[3]; + pDct[3] = s[1] - s[3]; + + return ((WELS_ABS (pDct[0]) > iThreshold) || (WELS_ABS (pDct[1]) > iThreshold) || (WELS_ABS (pDct[2]) > iThreshold) + || (WELS_ABS (pDct[3]) > iThreshold)); +} + +int32_t WelsHadamardQuant2x2_c (int16_t* pRs, const int16_t iFF, int16_t iMF, int16_t* pDct, int16_t* pBlock) { + int16_t s[4]; + int32_t iSign, i, iDcNzc = 0; + + s[0] = pRs[0] + pRs[32]; + s[1] = pRs[0] - pRs[32]; + s[2] = pRs[16] + pRs[48]; + s[3] = pRs[16] - pRs[48]; + + pRs[0] = 0; + pRs[16] = 0; + pRs[32] = 0; + pRs[48] = 0; + + pDct[0] = s[0] + s[2]; + pDct[1] = s[0] - s[2]; + pDct[2] = s[1] + s[3]; + pDct[3] = s[1] - s[3]; + + iSign = WELS_SIGN (pDct[0]); + pDct[0] = WELS_NEW_QUANT (pDct[0], iFF, iMF); + iSign = WELS_SIGN (pDct[1]); + pDct[1] = WELS_NEW_QUANT (pDct[1], iFF, iMF); + iSign = WELS_SIGN (pDct[2]); + pDct[2] = WELS_NEW_QUANT (pDct[2], iFF, iMF); + iSign = WELS_SIGN (pDct[3]); + pDct[3] = WELS_NEW_QUANT (pDct[3], iFF, iMF); + + ST64 (pBlock, LD64 (pDct)); + + for (i = 0; i < 4; i++) + iDcNzc += (pBlock[i] != 0); + return iDcNzc; +} + +/* dc value pick up and hdm_4x4 */ +void WelsHadamardT4Dc_c (int16_t* pLumaDc, int16_t* pDct) { + int32_t p[16], s[4]; + int32_t i, iIdx; + + for (i = 0 ; i < 16 ; i += 4) { + iIdx = ((i & 0x08) << 4) + ((i & 0x04) << 3); + s[0] = pDct[iIdx ] + pDct[iIdx + 80]; + s[3] = pDct[iIdx ] - pDct[iIdx + 80]; + s[1] = pDct[iIdx + 16] + pDct[iIdx + 64]; + s[2] = pDct[iIdx + 16] - pDct[iIdx + 64]; + + p[i ] = s[0] + s[1]; + p[i + 2] = s[0] - s[1]; + p[i + 1] = s[3] + s[2]; + p[i + 3] = s[3] - s[2]; + } + + for (i = 0 ; i < 4 ; i ++) { + s[0] = p[i ] + p[i + 12]; + s[3] = p[i ] - p[i + 12]; + s[1] = p[i + 4] + p[i + 8]; + s[2] = p[i + 4] - p[i + 8]; + + pLumaDc[i ] = WELS_CLIP3 ((s[0] + s[1] + 1) >> 1, -32768, 32767); + pLumaDc[i + 8 ] = WELS_CLIP3 ((s[0] - s[1] + 1) >> 1, -32768, 32767); + pLumaDc[i + 4 ] = WELS_CLIP3 ((s[3] + s[2] + 1) >> 1, -32768, 32767); + pLumaDc[i + 12] = WELS_CLIP3 ((s[3] - s[2] + 1) >> 1, -32768, 32767); + } +} + +/**************************************************************************** + * DCT functions + ****************************************************************************/ +void WelsDctT4_c (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2) { + int16_t i, pData[16], s[4]; + for (i = 0 ; i < 16 ; i += 4) { + const int32_t kiI1 = 1 + i; + const int32_t kiI2 = 2 + i; + const int32_t kiI3 = 3 + i; + + pData[i ] = pPixel1[0] - pPixel2[0]; + pData[kiI1] = pPixel1[1] - pPixel2[1]; + pData[kiI2] = pPixel1[2] - pPixel2[2]; + pData[kiI3] = pPixel1[3] - pPixel2[3]; + + pPixel1 += iStride1; + pPixel2 += iStride2; + + /*horizontal transform */ + s[0] = pData[i] + pData[kiI3]; + s[3] = pData[i] - pData[kiI3]; + s[1] = pData[kiI1] + pData[kiI2]; + s[2] = pData[kiI1] - pData[kiI2]; + + pDct[i ] = s[0] + s[1]; + pDct[kiI2] = s[0] - s[1]; + pDct[kiI1] = (s[3] * (1 << 1)) + s[2]; + pDct[kiI3] = s[3] - (s[2] * (1 << 1)); + } + + /* vertical transform */ + for (i = 0 ; i < 4 ; i ++) { + const int32_t kiI4 = 4 + i; + const int32_t kiI8 = 8 + i; + const int32_t kiI12 = 12 + i; + + s[0] = pDct[i ] + pDct[kiI12]; + s[3] = pDct[i ] - pDct[kiI12]; + s[1] = pDct[kiI4] + pDct[kiI8 ]; + s[2] = pDct[kiI4] - pDct[kiI8 ]; + + pDct[i ] = s[0] + s[1]; + pDct[kiI8 ] = s[0] - s[1]; + pDct[kiI4 ] = (s[3] * (1 << 1)) + s[2]; + pDct[kiI12] = s[3] - (s[2] * (1 << 1)); + } +} + +void WelsDctFourT4_c (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2) { + int32_t stride_1 = iStride1 << 2; + int32_t stride_2 = iStride2 << 2; + + WelsDctT4_c (pDct, &pPixel1[0], iStride1, &pPixel2[0], iStride2); + WelsDctT4_c (pDct + 16, &pPixel1[4], iStride1, &pPixel2[4], iStride2); + WelsDctT4_c (pDct + 32, &pPixel1[stride_1 ], iStride1, &pPixel2[stride_2 ], iStride2); + WelsDctT4_c (pDct + 48, &pPixel1[stride_1 + 4], iStride1, &pPixel2[stride_2 + 4], iStride2); +} + +/**************************************************************************** + * Scan and Score functions + ****************************************************************************/ +void WelsScan4x4DcAc_c (int16_t* pLevel, int16_t* pDct) { + ST32 (pLevel, LD32 (pDct)); + pLevel[2] = pDct[4]; + pLevel[3] = pDct[8]; + pLevel[4] = pDct[5]; + ST32 (pLevel + 5, LD32 (pDct + 2)); + pLevel[7] = pDct[6]; + pLevel[8] = pDct[9]; + ST32 (pLevel + 9, LD32 (pDct + 12)); + pLevel[11] = pDct[10]; + pLevel[12] = pDct[7]; + pLevel[13] = pDct[11]; + ST32 (pLevel + 14, LD32 (pDct + 14)); +} + +void WelsScan4x4Ac_c (int16_t* pLevel, int16_t* pDct) { + pLevel[0] = pDct[1]; + pLevel[1] = pDct[4]; + pLevel[2] = pDct[8]; + pLevel[3] = pDct[5]; + ST32 (&pLevel[4], LD32 (&pDct[2])); + pLevel[6] = pDct[6]; + pLevel[7] = pDct[9]; + ST32 (&pLevel[8], LD32 (&pDct[12])); + pLevel[10] = pDct[10]; + pLevel[11] = pDct[7]; + pLevel[12] = pDct[11]; + ST32 (&pLevel[13], LD32 (&pDct[14])); + pLevel[15] = 0; +} + +void WelsScan4x4Dc (int16_t* pLevel, int16_t* pDct) { + ST32 (pLevel, LD32 (pDct)); + pLevel[2] = pDct[4]; + pLevel[3] = pDct[8]; + pLevel[4] = pDct[5]; + ST32 (pLevel + 5, LD32 (pDct + 2)); + pLevel[7] = pDct[6]; + pLevel[8] = pDct[9]; + ST32 (pLevel + 9, LD32 (pDct + 12)); + pLevel[11] = pDct[10]; + pLevel[12] = pDct[7]; + pLevel[13] = pDct[11]; + ST32 (pLevel + 14, LD32 (pDct + 14)); +} + +//refer to JVT-O079 +int32_t WelsCalculateSingleCtr4x4_c (int16_t* pDct) { + static const int32_t kiTRunTable[16] = { 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + int32_t iSingleCtr = 0; + int32_t iIdx = 15; + int32_t iRun; + + while (iIdx >= 0 && pDct[iIdx] == 0) --iIdx; + + while (iIdx >= 0) { + -- iIdx; + iRun = iIdx; + while (iIdx >= 0 && pDct[iIdx] == 0) --iIdx; + iRun -= iIdx; + iSingleCtr += kiTRunTable[iRun]; + } + return iSingleCtr; +} + +int32_t WelsGetNoneZeroCount_c (int16_t* pLevel) { + int32_t iCnt = 0; + int32_t iIdx = 0; + + while (iIdx < 16) { + iCnt += (pLevel[ iIdx] == 0); + iCnt += (pLevel[1 + iIdx] == 0); + iCnt += (pLevel[2 + iIdx] == 0); + iCnt += (pLevel[3 + iIdx] == 0); + + iIdx += 4; + } + return (16 - iCnt); +} + +#ifdef HAVE_NEON +int32_t WelsHadamardQuant2x2Skip_neon (int16_t* pRes, int16_t iFF, int16_t iMF) { + int16_t iThreshold = ((1 << 16) - 1) / iMF - iFF; + return WelsHadamardQuant2x2SkipKernel_neon (pRes, iThreshold); +} +#endif +#ifdef HAVE_NEON_AARCH64 +int32_t WelsHadamardQuant2x2Skip_AArch64_neon (int16_t* pRes, int16_t iFF, int16_t iMF) { + int16_t iThreshold = ((1 << 16) - 1) / iMF - iFF; + return WelsHadamardQuant2x2SkipKernel_AArch64_neon (pRes, iThreshold); +} +#endif +void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { + pFuncList->pfCopy8x8Aligned = WelsCopy8x8_c; + pFuncList->pfCopy16x16Aligned = + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c; + pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_c; + pFuncList->pfCopy8x16Aligned = WelsCopy8x16_c; + pFuncList->pfCopy4x4 = WelsCopy4x4_c; + pFuncList->pfCopy8x4 = WelsCopy8x4_c; + pFuncList->pfCopy4x8 = WelsCopy4x8_c; + pFuncList->pfQuantizationHadamard2x2 = WelsHadamardQuant2x2_c; + pFuncList->pfQuantizationHadamard2x2Skip = WelsHadamardQuant2x2Skip_c; + pFuncList->pfTransformHadamard4x4Dc = WelsHadamardT4Dc_c; + + pFuncList->pfDctT4 = WelsDctT4_c; + pFuncList->pfDctFourT4 = WelsDctFourT4_c; + + pFuncList->pfScan4x4 = WelsScan4x4DcAc_c; + pFuncList->pfScan4x4Ac = WelsScan4x4Ac_c; + pFuncList->pfCalculateSingleCtr4x4 = WelsCalculateSingleCtr4x4_c; + + pFuncList->pfGetNoneZeroCount = WelsGetNoneZeroCount_c; + + pFuncList->pfQuantization4x4 = WelsQuant4x4_c; + pFuncList->pfQuantizationDc4x4 = WelsQuant4x4Dc_c; + pFuncList->pfQuantizationFour4x4 = WelsQuantFour4x4_c; + pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_c; + +#if defined(X86_ASM) + if (uiCpuFlag & WELS_CPU_MMXEXT) { + + pFuncList->pfQuantizationHadamard2x2 = WelsHadamardQuant2x2_mmx; + pFuncList->pfQuantizationHadamard2x2Skip = WelsHadamardQuant2x2Skip_mmx; + + pFuncList->pfDctT4 = WelsDctT4_mmx; + + pFuncList->pfCopy8x8Aligned = WelsCopy8x8_mmx; + pFuncList->pfCopy8x16Aligned = WelsCopy8x16_mmx; + } + if (uiCpuFlag & WELS_CPU_SSE2) { + pFuncList->pfGetNoneZeroCount = WelsGetNoneZeroCount_sse2; + pFuncList->pfTransformHadamard4x4Dc = WelsHadamardT4Dc_sse2; + + pFuncList->pfQuantization4x4 = WelsQuant4x4_sse2; + pFuncList->pfQuantizationDc4x4 = WelsQuant4x4Dc_sse2; + pFuncList->pfQuantizationFour4x4 = WelsQuantFour4x4_sse2; + pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_sse2; + + pFuncList->pfCopy16x16Aligned = WelsCopy16x16_sse2; + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16NotAligned_sse2; + pFuncList->pfCopy16x8NotAligned = WelsCopy16x8NotAligned_sse2; + + pFuncList->pfScan4x4 = WelsScan4x4DcAc_sse2; + pFuncList->pfScan4x4Ac = WelsScan4x4Ac_sse2; + pFuncList->pfCalculateSingleCtr4x4 = WelsCalculateSingleCtr4x4_sse2; + + pFuncList->pfDctT4 = WelsDctT4_sse2; + pFuncList->pfDctFourT4 = WelsDctFourT4_sse2; + } +//#ifndef MACOS + if (uiCpuFlag & WELS_CPU_SSSE3) { + pFuncList->pfScan4x4 = WelsScan4x4DcAc_ssse3; + } + if (uiCpuFlag & WELS_CPU_SSE42) { + pFuncList->pfGetNoneZeroCount = WelsGetNoneZeroCount_sse42; + } +#if defined(HAVE_AVX2) + if (uiCpuFlag & WELS_CPU_AVX2) { + pFuncList->pfDctT4 = WelsDctT4_avx2; + pFuncList->pfDctFourT4 = WelsDctFourT4_avx2; + + pFuncList->pfQuantization4x4 = WelsQuant4x4_avx2; + pFuncList->pfQuantizationDc4x4 = WelsQuant4x4Dc_avx2; + pFuncList->pfQuantizationFour4x4 = WelsQuantFour4x4_avx2; + pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_avx2; + } +#endif +//#endif//MACOS + +#endif//X86_ASM + +#if defined(HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfQuantizationHadamard2x2 = WelsHadamardQuant2x2_neon; + pFuncList->pfQuantizationHadamard2x2Skip = WelsHadamardQuant2x2Skip_neon; + pFuncList->pfDctT4 = WelsDctT4_neon; + pFuncList->pfCopy8x8Aligned = WelsCopy8x8_neon; + pFuncList->pfCopy8x16Aligned = WelsCopy8x16_neon; + + pFuncList->pfGetNoneZeroCount = WelsGetNoneZeroCount_neon; + pFuncList->pfTransformHadamard4x4Dc = WelsHadamardT4Dc_neon; + + pFuncList->pfQuantization4x4 = WelsQuant4x4_neon; + pFuncList->pfQuantizationDc4x4 = WelsQuant4x4Dc_neon; + pFuncList->pfQuantizationFour4x4 = WelsQuantFour4x4_neon; + pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_neon; + + pFuncList->pfCopy16x16Aligned = WelsCopy16x16_neon; + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16NotAligned_neon; + pFuncList->pfCopy16x8NotAligned = WelsCopy16x8NotAligned_neon; + pFuncList->pfDctFourT4 = WelsDctFourT4_neon; + } +#endif + +#if defined(HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfQuantizationHadamard2x2 = WelsHadamardQuant2x2_AArch64_neon; + pFuncList->pfQuantizationHadamard2x2Skip = WelsHadamardQuant2x2Skip_AArch64_neon; + pFuncList->pfDctT4 = WelsDctT4_AArch64_neon; + pFuncList->pfCopy8x8Aligned = WelsCopy8x8_AArch64_neon; + pFuncList->pfCopy8x16Aligned = WelsCopy8x16_AArch64_neon; + + pFuncList->pfGetNoneZeroCount = WelsGetNoneZeroCount_AArch64_neon; + pFuncList->pfTransformHadamard4x4Dc = WelsHadamardT4Dc_AArch64_neon; + + pFuncList->pfQuantization4x4 = WelsQuant4x4_AArch64_neon; + pFuncList->pfQuantizationDc4x4 = WelsQuant4x4Dc_AArch64_neon; + pFuncList->pfQuantizationFour4x4 = WelsQuantFour4x4_AArch64_neon; + pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_AArch64_neon; + + pFuncList->pfCopy16x16Aligned = WelsCopy16x16_AArch64_neon; + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16NotAligned_AArch64_neon; + pFuncList->pfCopy16x8NotAligned = WelsCopy16x8NotAligned_AArch64_neon; + pFuncList->pfDctFourT4 = WelsDctFourT4_AArch64_neon; + } +#endif + +#if defined(HAVE_MMI) + if (uiCpuFlag & WELS_CPU_MMI) { + pFuncList->pfCopy8x8Aligned = WelsCopy8x8_mmi; + pFuncList->pfCopy8x16Aligned = WelsCopy8x16_mmi; + + pFuncList->pfGetNoneZeroCount = WelsGetNoneZeroCount_mmi; + pFuncList->pfTransformHadamard4x4Dc = WelsHadamardT4Dc_mmi; + + pFuncList->pfQuantization4x4 = WelsQuant4x4_mmi; + pFuncList->pfQuantizationDc4x4 = WelsQuant4x4Dc_mmi; + pFuncList->pfQuantizationFour4x4 = WelsQuantFour4x4_mmi; + pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_mmi; + + pFuncList->pfCopy16x16Aligned = WelsCopy16x16_mmi; + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16NotAligned_mmi; + pFuncList->pfCopy16x8NotAligned = WelsCopy16x8NotAligned_mmi; + + pFuncList->pfScan4x4 = WelsScan4x4DcAc_mmi; + pFuncList->pfScan4x4Ac = WelsScan4x4Ac_mmi; + pFuncList->pfCalculateSingleCtr4x4 = WelsCalculateSingleCtr4x4_mmi; + + pFuncList->pfDctT4 = WelsDctT4_mmi; + pFuncList->pfDctFourT4 = WelsDctFourT4_mmi; + } +#endif//HAVE_MMI + +#if defined(HAVE_MSA) + if (uiCpuFlag & WELS_CPU_MSA) { + pFuncList->pfCopy8x8Aligned = WelsCopy8x8_msa; + pFuncList->pfCopy8x16Aligned = WelsCopy8x16_msa; + + pFuncList->pfCopy16x16Aligned = + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_msa; + pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_msa; + } +#endif +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder.cpp new file mode 100644 index 000000000..8b5b79299 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder.cpp @@ -0,0 +1,551 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file encoder.c + * + * \brief core encoder + * + * \date 5/14/2009 Created + * + ************************************************************************************* + */ +#include "encoder.h" +#include "cpu_core.h" + +#include "decode_mb_aux.h" +#include "get_intra_predictor.h" + +#include "deblocking.h" +#include "ref_list_mgr_svc.h" +#include "mc.h" +#include "paraset_strategy.h" +#include "sample.h" + +#include "svc_enc_golomb.h" +#include "svc_base_layer_md.h" +#include "svc_mode_decision.h" +#include "set_mb_syn_cavlc.h" +#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross_platforms +#include "slice_multi_threading.h" + +// global function pointers definition +namespace WelsEnc { +/* Motion compensation */ + + +/*! + * \brief initialize source picture body + * \param pSrc SSourcePicture* + * \param csp internal csp format + * \param iWidth widht of picture in pixels + * \param iHeight iHeight of picture in pixels + * \return successful - 0; otherwise none 0 for failed + */ +int32_t InitPic (const void* kpSrc, const int32_t kiColorspace, const int32_t kiWidth, const int32_t kiHeight) { + SSourcePicture* pSrcPic = (SSourcePicture*)kpSrc; + + if (NULL == pSrcPic || kiWidth == 0 || kiHeight == 0) + return 1; + + pSrcPic->iColorFormat = kiColorspace; + pSrcPic->iPicWidth = kiWidth; + pSrcPic->iPicHeight = kiHeight; + + //currently encoder only supports videoFormatI420. + if ((kiColorspace & (~videoFormatVFlip)) != videoFormatI420) + return 2; + switch (kiColorspace & (~videoFormatVFlip)) { + case videoFormatI420: + case videoFormatYV12: + pSrcPic->pData[0] = NULL; + pSrcPic->pData[1] = NULL; + pSrcPic->pData[2] = NULL; + pSrcPic->pData[3] = NULL; + pSrcPic->iStride[0] = kiWidth; + pSrcPic->iStride[2] = pSrcPic->iStride[1] = kiWidth >> 1; + pSrcPic->iStride[3] = 0; + break; + case videoFormatYUY2: + case videoFormatYVYU: + case videoFormatUYVY: + pSrcPic->pData[0] = NULL; + pSrcPic->pData[1] = NULL; + pSrcPic->pData[2] = NULL; + pSrcPic->pData[3] = NULL; + pSrcPic->iStride[0] = CALC_BI_STRIDE (kiWidth, 16); + pSrcPic->iStride[3] = pSrcPic->iStride[2] = pSrcPic->iStride[1] = 0; + break; + case videoFormatRGB: + case videoFormatBGR: + pSrcPic->pData[0] = NULL; + pSrcPic->pData[1] = NULL; + pSrcPic->pData[2] = NULL; + pSrcPic->pData[3] = NULL; + pSrcPic->iStride[0] = CALC_BI_STRIDE (kiWidth, 24); + pSrcPic->iStride[3] = pSrcPic->iStride[2] = pSrcPic->iStride[1] = 0; + if (kiColorspace & videoFormatVFlip) + pSrcPic->iColorFormat = kiColorspace & (~videoFormatVFlip); + else + pSrcPic->iColorFormat = kiColorspace | videoFormatVFlip; + break; + case videoFormatBGRA: + case videoFormatRGBA: + case videoFormatARGB: + case videoFormatABGR: + pSrcPic->pData[0] = NULL; + pSrcPic->pData[1] = NULL; + pSrcPic->pData[2] = NULL; + pSrcPic->pData[3] = NULL; + pSrcPic->iStride[0] = kiWidth << 2; + pSrcPic->iStride[3] = pSrcPic->iStride[2] = pSrcPic->iStride[1] = 0; + if (kiColorspace & videoFormatVFlip) + pSrcPic->iColorFormat = kiColorspace & (~videoFormatVFlip); + else + pSrcPic->iColorFormat = kiColorspace | videoFormatVFlip; + break; + default: + return 2; // any else? + } + + return 0; +} + + +void WelsInitBGDFunc (SWelsFuncPtrList* pFuncList, const bool kbEnableBackgroundDetection) { + if (kbEnableBackgroundDetection) { + pFuncList->pfInterMdBackgroundDecision = WelsMdInterJudgeBGDPskip; + pFuncList->pfMdBackgroundInfoUpdate = WelsMdUpdateBGDInfo; + } else { + pFuncList->pfInterMdBackgroundDecision = WelsMdInterJudgeBGDPskipFalse; + pFuncList->pfMdBackgroundInfoUpdate = WelsMdUpdateBGDInfoNULL; + } +} + +/*! + * \brief initialize function pointers that potentially used in Wels encoding + * \param pEncCtx sWelsEncCtx* + * \return successful - 0; otherwise none 0 for failed + */ +int32_t InitFunctionPointers (sWelsEncCtx* pEncCtx, SWelsSvcCodingParam* pParam, uint32_t uiCpuFlag) { + int32_t iReturn = ENC_RETURN_SUCCESS; + SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList; + bool bScreenContent = (SCREEN_CONTENT_REAL_TIME == pParam->iUsageType); + + /* Functionality utilization of CPU instructions dependency */ + pFuncList->pfSetMemZeroSize8 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage + pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage + pFuncList->pfSetMemZeroSize64 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage +#if defined(X86_ASM) + if (uiCpuFlag & WELS_CPU_MMXEXT) { + pFuncList->pfSetMemZeroSize8 = WelsSetMemZeroSize8_mmx; // confirmed_safe_unsafe_usage + pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage + pFuncList->pfSetMemZeroSize64 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage + } + if (uiCpuFlag & WELS_CPU_SSE2) { + pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZeroAligned64_sse2; // confirmed_safe_unsafe_usage + } +#endif//X86_ASM + +#if defined(HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfSetMemZeroSize8 = WelsSetMemZero_neon; + pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZero_neon; + pFuncList->pfSetMemZeroSize64 = WelsSetMemZero_neon; + } +#endif + +#if defined(HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfSetMemZeroSize8 = WelsSetMemZero_AArch64_neon; + pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZero_AArch64_neon; + pFuncList->pfSetMemZeroSize64 = WelsSetMemZero_AArch64_neon; + } +#endif + + InitExpandPictureFunc (& (pFuncList->sExpandPicFunc), uiCpuFlag); + + /* Intra_Prediction_fn*/ + WelsInitIntraPredFuncs (pFuncList, uiCpuFlag); + + /* ME func */ + WelsInitMeFunc (pFuncList, uiCpuFlag, bScreenContent); + + /* sad, satd, average */ + WelsInitSampleSadFunc (pFuncList, uiCpuFlag); + + // + WelsInitBGDFunc (pFuncList, pParam->bEnableBackgroundDetection); + WelsInitSCDPskipFunc (pFuncList, bScreenContent && (pParam->bEnableSceneChangeDetect)); + + // for pfGetVarianceFromIntraVaa function ptr adaptive by CPU features, 6/7/2010 + InitIntraAnalysisVaaInfo (pFuncList, uiCpuFlag); + + /* Motion compensation */ + /*init pixel average function*/ + /*get one column or row pixel when refinement*/ + InitMcFunc (&pFuncList->sMcFuncs, uiCpuFlag); + InitCoeffFunc (pFuncList, uiCpuFlag, pParam->iEntropyCodingModeFlag); + + WelsInitEncodingFuncs (pFuncList, uiCpuFlag); + WelsInitReconstructionFuncs (pFuncList, uiCpuFlag); + + DeblockingInit (&pFuncList->pfDeblocking, uiCpuFlag); + WelsBlockFuncInit (&pFuncList->pfSetNZCZero, uiCpuFlag); + + InitFillNeighborCacheInterFunc (pFuncList, pParam->bEnableBackgroundDetection); + + pFuncList->pParametersetStrategy = IWelsParametersetStrategy::CreateParametersetStrategy (pParam->eSpsPpsIdStrategy, + pParam->bSimulcastAVC, pParam->iSpatialLayerNum); + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, (NULL == pFuncList->pParametersetStrategy)) + + return iReturn; +} + +void UpdateFrameNum (sWelsEncCtx* pEncCtx, const int32_t kiDidx) { + SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[kiDidx]; + bool bNeedFrameNumIncreasing = false; + + if (NRI_PRI_LOWEST != pEncCtx->eLastNalPriority[kiDidx]) { + bNeedFrameNumIncreasing = true; + } + + if (bNeedFrameNumIncreasing) { + if (pParamInternal->iFrameNum < (1 << pEncCtx->pSps->uiLog2MaxFrameNum) - 1) + ++ pParamInternal->iFrameNum; + else + pParamInternal->iFrameNum = 0; // if iFrameNum overflow + } + + pEncCtx->eLastNalPriority[kiDidx] = NRI_PRI_LOWEST; +} + + +void LoadBackFrameNum (sWelsEncCtx* pEncCtx, const int32_t kiDidx) { + SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[kiDidx]; + bool bNeedFrameNumIncreasing = false; + + if (NRI_PRI_LOWEST != pEncCtx->eLastNalPriority[kiDidx]) { + bNeedFrameNumIncreasing = true; + } + + if (bNeedFrameNumIncreasing) { + if (pParamInternal->iFrameNum != 0) { + pParamInternal->iFrameNum --; + } else { + pParamInternal->iFrameNum = (1 << pEncCtx->pSps->uiLog2MaxFrameNum) - 1; + } + } +} + +void InitBitStream (sWelsEncCtx* pEncCtx) { + // for bitstream writing + pEncCtx->iPosBsBuffer = 0; // reset bs pBuffer position + pEncCtx->pOut->iNalIndex = 0; // reset NAL index + pEncCtx->pOut->iLayerBsIndex = 0; // reset index of Layer Bs + + InitBits (&pEncCtx->pOut->sBsWrite, pEncCtx->pOut->pBsBuffer, pEncCtx->pOut->uiSize); +} +/*! + * \brief initialize frame coding + */ +void InitFrameCoding (sWelsEncCtx* pEncCtx, const EVideoFrameType keFrameType, const int32_t kiDidx) { + SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[kiDidx]; + if (keFrameType == videoFrameTypeP) { + ++pParamInternal->iFrameIndex; + + if (pParamInternal->iPOC < (1 << pEncCtx->pSps->iLog2MaxPocLsb) - + 2) // if iPOC type is no 0, this need be modification + pParamInternal->iPOC += 2; // for POC type 0 + else + pParamInternal->iPOC = 0; + + UpdateFrameNum (pEncCtx, kiDidx); + + pEncCtx->eNalType = NAL_UNIT_CODED_SLICE; + pEncCtx->eSliceType = P_SLICE; + pEncCtx->eNalPriority = NRI_PRI_HIGH; + } else if (keFrameType == videoFrameTypeIDR) { + pParamInternal->iFrameNum = 0; + pParamInternal->iPOC = 0; + pParamInternal->bEncCurFrmAsIdrFlag = false; + pParamInternal->iFrameIndex = 0; + + pEncCtx->eNalType = NAL_UNIT_CODED_SLICE_IDR; + pEncCtx->eSliceType = I_SLICE; + pEncCtx->eNalPriority = NRI_PRI_HIGHEST; + + pParamInternal->iCodingIndex = 0; + + // reset_ref_list + + // rc_init_gop + } else if (keFrameType == videoFrameTypeI) { + if (pParamInternal->iPOC < (1 << pEncCtx->pSps->iLog2MaxPocLsb) - + 2) // if iPOC type is no 0, this need be modification + pParamInternal->iPOC += 2; // for POC type 0 + else + pParamInternal->iPOC = 0; + + UpdateFrameNum (pEncCtx, kiDidx); + + pEncCtx->eNalType = NAL_UNIT_CODED_SLICE; + pEncCtx->eSliceType = I_SLICE; + pEncCtx->eNalPriority = NRI_PRI_HIGHEST; + + // rc_init_gop + } else { // B pictures are not supported now, any else? + assert (0); + } + +#if defined(STAT_OUTPUT) + memset (&pEncCtx->sPerInfo, 0, sizeof (SStatSliceInfo)); +#endif//FRAME_INFO_OUTPUT +} + +EVideoFrameType DecideFrameType (sWelsEncCtx* pEncCtx, const int8_t kiSpatialNum, const int32_t kiDidx, + bool bSkipFrameFlag) { + SWelsSvcCodingParam* pSvcParam = pEncCtx->pSvcParam; + SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[kiDidx]; + EVideoFrameType iFrameType = videoFrameTypeInvalid; + bool bSceneChangeFlag = false; + if (pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + if ((!pSvcParam->bEnableSceneChangeDetect) || pEncCtx->pVaa->bIdrPeriodFlag || + (kiSpatialNum < pSvcParam->iSpatialLayerNum)) { + bSceneChangeFlag = false; + } else { + bSceneChangeFlag = pEncCtx->pVaa->bSceneChangeFlag; + } + if (pEncCtx->pVaa->bIdrPeriodFlag || pParamInternal->bEncCurFrmAsIdrFlag || (!pSvcParam->bEnableLongTermReference + && bSceneChangeFlag && !bSkipFrameFlag)) { + iFrameType = videoFrameTypeIDR; + } else if (pSvcParam->bEnableLongTermReference && (bSceneChangeFlag + || pEncCtx->pVaa->eSceneChangeIdc == LARGE_CHANGED_SCENE)) { + int iActualLtrcount = 0; + SPicture** pLongTermRefList = pEncCtx->ppRefPicListExt[0]->pLongRefList; + for (int i = 0; i < pSvcParam->iLTRRefNum; ++i) { + if (NULL != pLongTermRefList[i] && pLongTermRefList[i]->bUsedAsRef && pLongTermRefList[i]->bIsLongRef + && pLongTermRefList[i]->bIsSceneLTR) { + ++iActualLtrcount; + } + } + if (iActualLtrcount == pSvcParam->iLTRRefNum && bSceneChangeFlag) { + iFrameType = videoFrameTypeIDR; + } else { + iFrameType = videoFrameTypeP; + pEncCtx->bCurFrameMarkedAsSceneLtr = true; + } + } else { + iFrameType = videoFrameTypeP; + } + if (videoFrameTypeP == iFrameType && bSkipFrameFlag) { + iFrameType = videoFrameTypeSkip; + } else if (videoFrameTypeIDR == iFrameType) { + pParamInternal->iCodingIndex = 0; + pEncCtx->bCurFrameMarkedAsSceneLtr = true; + } + + } else { + // perform scene change detection + if ((!pSvcParam->bEnableSceneChangeDetect) || pEncCtx->pVaa->bIdrPeriodFlag || + (kiSpatialNum < pSvcParam->iSpatialLayerNum) + || (pParamInternal->iFrameIndex < (VGOP_SIZE << 1))) { // avoid too frequent I frame coding, rc control + bSceneChangeFlag = false; + } else { + bSceneChangeFlag = pEncCtx->pVaa->bSceneChangeFlag; + } + + //scene_changed_flag: RC enable && iSpatialNum == pSvcParam->iSpatialLayerNum + //bIdrPeriodFlag: RC disable || iSpatialNum != pSvcParam->iSpatialLayerNum + //pEncCtx->bEncCurFrmAsIdrFlag: 1. first frame should be IDR; 2. idr pause; 3. idr request + iFrameType = (pEncCtx->pVaa->bIdrPeriodFlag || bSceneChangeFlag + || pParamInternal->bEncCurFrmAsIdrFlag) ? videoFrameTypeIDR : videoFrameTypeP; + if ( videoFrameTypeIDR == iFrameType ) { + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "encoding videoFrameTypeIDR due to ( bIdrPeriodFlag %d, bSceneChangeFlag %d, bEncCurFrmAsIdrFlag %d )", + pEncCtx->pVaa->bIdrPeriodFlag, + bSceneChangeFlag, + pParamInternal->bEncCurFrmAsIdrFlag); + } + + if (videoFrameTypeP == iFrameType && bSkipFrameFlag) { // for frame skip, 1/5/2010 + iFrameType = videoFrameTypeSkip; + } else if (videoFrameTypeIDR == iFrameType) { + pParamInternal->iCodingIndex = 0; + } + } + return iFrameType; +} + +/*! + * \brief Dump reconstruction for dependency layer + */ + +extern "C" void DumpDependencyRec (SPicture* pCurPicture, const char* kpFileName, const int8_t kiDid, bool bAppend, + SDqLayer* pDqLayer, bool bSimulCastAVC) { + WelsFileHandle* pDumpRecFile = NULL; + int32_t iWrittenSize = 0; + const char* openMode = bAppend ? "ab" : "wb"; + SWelsSPS* pSpsTmp = NULL; + if (bSimulCastAVC || (kiDid == BASE_DEPENDENCY_ID)) { + pSpsTmp = pDqLayer->sLayerInfo.pSpsP; + } else { + pSpsTmp = & (pDqLayer->sLayerInfo.pSubsetSpsP->pSps); + } + bool bFrameCroppingFlag = pSpsTmp->bFrameCroppingFlag; + SCropOffset* pFrameCrop = &pSpsTmp->sFrameCrop; + + if (NULL == pCurPicture || NULL == kpFileName || kiDid >= MAX_DEPENDENCY_LAYER) + return; + if (strlen (kpFileName) > 0) // confirmed_safe_unsafe_usage + pDumpRecFile = WelsFopen (kpFileName, openMode); + else { + char sDependencyRecFileName[16] = {0}; + WelsSnprintf (sDependencyRecFileName, 16, "rec%d.yuv", kiDid); // confirmed_safe_unsafe_usage + pDumpRecFile = WelsFopen (sDependencyRecFileName, openMode); + } + if (NULL != pDumpRecFile && bAppend) + WelsFseek (pDumpRecFile, 0, SEEK_END); + + if (NULL != pDumpRecFile) { + int32_t i = 0; + int32_t j = 0; + const int32_t kiStrideY = pCurPicture->iLineSize[0]; + const int32_t kiLumaWidth = bFrameCroppingFlag ? (pCurPicture->iWidthInPixel - ((pFrameCrop->iCropLeft + + pFrameCrop->iCropRight) << 1)) : pCurPicture->iWidthInPixel; + const int32_t kiLumaHeight = bFrameCroppingFlag ? (pCurPicture->iHeightInPixel - ((pFrameCrop->iCropTop + + pFrameCrop->iCropBottom) << 1)) : pCurPicture->iHeightInPixel; + const int32_t kiChromaWidth = kiLumaWidth >> 1; + const int32_t kiChromaHeight = kiLumaHeight >> 1; + uint8_t* pSrc = NULL; + pSrc = bFrameCroppingFlag ? (pCurPicture->pData[0] + kiStrideY * (pFrameCrop->iCropTop << 1) + + (pFrameCrop->iCropLeft << 1)) : pCurPicture->pData[0]; + for (j = 0; j < kiLumaHeight; ++ j) { + iWrittenSize = WelsFwrite (pSrc + j * kiStrideY, 1, kiLumaWidth, pDumpRecFile); + assert (iWrittenSize == kiLumaWidth); + if (iWrittenSize < kiLumaWidth) { + assert (0); // make no sense for us if writing failed + WelsFclose (pDumpRecFile); + return; + } + } + for (i = 1; i < I420_PLANES; ++ i) { + const int32_t kiStrideUV = pCurPicture->iLineSize[i]; + pSrc = bFrameCroppingFlag ? (pCurPicture->pData[i] + kiStrideUV * pFrameCrop->iCropTop + pFrameCrop->iCropLeft) : + pCurPicture->pData[i]; + for (j = 0; j < kiChromaHeight; ++ j) { + iWrittenSize = WelsFwrite (pSrc + j * kiStrideUV, 1, kiChromaWidth, pDumpRecFile); + assert (iWrittenSize == kiChromaWidth); + if (iWrittenSize < kiChromaWidth) { + assert (0); // make no sense for us if writing failed + WelsFclose (pDumpRecFile); + return; + } + } + } + WelsFclose (pDumpRecFile); + pDumpRecFile = NULL; + } +} + +/*! + * \brief Dump the reconstruction pictures + */ + +void DumpRecFrame (SPicture* pCurPicture, const char* kpFileName, const int8_t kiDid, bool bAppend, + SDqLayer* pDqLayer) { + WelsFileHandle* pDumpRecFile = NULL; + SWelsSPS* pSpsTmp = (kiDid > BASE_DEPENDENCY_ID) ? & (pDqLayer->sLayerInfo.pSubsetSpsP->pSps) : + pDqLayer->sLayerInfo.pSpsP; + bool bFrameCroppingFlag = pSpsTmp->bFrameCroppingFlag; + SCropOffset* pFrameCrop = &pSpsTmp->sFrameCrop; + + int32_t iWrittenSize = 0; + const char* openMode = bAppend ? "ab" : "wb"; + + if (NULL == pCurPicture || NULL == kpFileName) + return; + + if (strlen (kpFileName) > 0) { // confirmed_safe_unsafe_usage + pDumpRecFile = WelsFopen (kpFileName, openMode); + } else { + pDumpRecFile = WelsFopen ("rec.yuv", openMode); + } + if (NULL != pDumpRecFile && bAppend) + WelsFseek (pDumpRecFile, 0, SEEK_END); + + if (NULL != pDumpRecFile) { + int32_t i = 0; + int32_t j = 0; + const int32_t kiStrideY = pCurPicture->iLineSize[0]; + const int32_t kiLumaWidth = bFrameCroppingFlag ? (pCurPicture->iWidthInPixel - ((pFrameCrop->iCropLeft + + pFrameCrop->iCropRight) << 1)) : pCurPicture->iWidthInPixel; + const int32_t kiLumaHeight = bFrameCroppingFlag ? (pCurPicture->iHeightInPixel - ((pFrameCrop->iCropTop + + pFrameCrop->iCropBottom) << 1)) : pCurPicture->iHeightInPixel; + const int32_t kiChromaWidth = kiLumaWidth >> 1; + const int32_t kiChromaHeight = kiLumaHeight >> 1; + uint8_t* pSrc = NULL; + pSrc = bFrameCroppingFlag ? (pCurPicture->pData[0] + kiStrideY * (pFrameCrop->iCropTop << 1) + + (pFrameCrop->iCropLeft << 1)) : pCurPicture->pData[0]; + for (j = 0; j < kiLumaHeight; ++ j) { + iWrittenSize = WelsFwrite (pSrc + j * kiStrideY, 1, kiLumaWidth, pDumpRecFile); + assert (iWrittenSize == kiLumaWidth); + if (iWrittenSize < kiLumaWidth) { + assert (0); // make no sense for us if writing failed + WelsFclose (pDumpRecFile); + return; + } + } + for (i = 1; i < I420_PLANES; ++ i) { + const int32_t kiStrideUV = pCurPicture->iLineSize[i]; + pSrc = bFrameCroppingFlag ? (pCurPicture->pData[i] + kiStrideUV * pFrameCrop->iCropTop + pFrameCrop->iCropLeft) : + pCurPicture->pData[i]; + for (j = 0; j < kiChromaHeight; ++ j) { + iWrittenSize = WelsFwrite (pSrc + j * kiStrideUV, 1, kiChromaWidth, pDumpRecFile); + assert (iWrittenSize == kiChromaWidth); + if (iWrittenSize < kiChromaWidth) { + assert (0); // make no sense for us if writing failed + WelsFclose (pDumpRecFile); + return; + } + } + } + WelsFclose (pDumpRecFile); + pDumpRecFile = NULL; + } +} + + + +/***********************************************************************************/ +void WelsSetMemZero_c (void* pDst, int32_t iSize) { // confirmed_safe_unsafe_usage + memset (pDst, 0, iSize); +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder_data_tables.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder_data_tables.cpp new file mode 100644 index 000000000..8d2065302 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder_data_tables.cpp @@ -0,0 +1,345 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +// exp_data.c +// export date cross various modules (.c) +#include "md.h" +#include "vlc_encoder.h" +namespace WelsEnc { +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// extern at mb_cache.h +const uint8_t g_kuiSmb4AddrIn256[16] = { + 0, 4, 16 * 4, 16 * 4 + 4, + 8, 12, 16 * 4 + 8, 16 * 4 + 12, + 16 * 8, 16 * 8 + 4, 16 * 12, 16 * 12 + 4, + 16 * 8 + 8, 16 * 8 + 12, 16 * 12 + 8, 16 * 12 + 12 +}; + + +const uint8_t g_kuiCache12_8x8RefIdx[4] = { //mv or uiRefIndex cache scan index, 4*4 block as basic unit + 5, 6, + 9, 10 +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// extern at md.h +const int32_t g_kiQpCostTable[52] = { + 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */ + 1, 1, 1, 1, /* 8-11 */ + 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */ + 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */ + 6, 7, 8, 9, 10, 11, 13, 14, /* 28-35 */ + 16, 18, 20, 23, 25, 29, 32, 36, /* 36-43 */ + 40, 45, 51, 57, 64, 72, 81, 91 /* 44-51 */ +}; +const int8_t g_kiMapModeI16x16[7] = { + 0, 1, 2, 3, 2, 2, 2 +};//{I16_PRED_V, I16_PRED_H, I16_PRED_DC, I16_PRED_P, I16_PRED_DC, I16_PRED_DC, I16_PRED_DC}; + +const int8_t g_kiMapModeIntraChroma[7] = { + 0, 1, 2, 3, 0, 0, 0 +};//{C_PRED_DC, C_PRED_H, C_PRED_V, C_PRED_P, C_PRED_DC_L, C_PRED_DC_T, C_PRED_DC_128}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// extern at vlc_encoder.h + +//g_kuiVlcCoeffToken[nc][total-coeff][trailing-ones][0--value, 1--bit count] +const uint8_t g_kuiVlcCoeffToken[5][17][4][2] = { + { + //0<=nc<2 + { { 1, 1}, { 0, 0}, { 0, 0}, { 0, 0} },//0 + { { 5, 6}, { 1, 2}, { 0, 0}, { 0, 0} },//1 + { { 7, 8}, { 4, 6}, { 1, 3}, { 0, 0} },//2 + { { 7, 9}, { 6, 8}, { 5, 7}, { 3, 5} },//3 + { { 7, 10}, { 6, 9}, { 5, 8}, { 3, 6} },//4 + { { 7, 11}, { 6, 10}, { 5, 9}, { 4, 7} },//5 + { {15, 13}, { 6, 11}, { 5, 10}, { 4, 8} },//6 + { {11, 13}, {14, 13}, { 5, 11}, { 4, 9} },//7 + { { 8, 13}, {10, 13}, {13, 13}, { 4, 10} },//8 + { {15, 14}, {14, 14}, { 9, 13}, { 4, 11} },//9 + { {11, 14}, {10, 14}, {13, 14}, {12, 13} },//10 + { {15, 15}, {14, 15}, { 9, 14}, {12, 14} },//11 + { {11, 15}, {10, 15}, {13, 15}, { 8, 14} },//12 + { {15, 16}, { 1, 15}, { 9, 15}, {12, 15} },//13 + { {11, 16}, {14, 16}, {13, 16}, { 8, 15} },//14 + { { 7, 16}, {10, 16}, { 9, 16}, {12, 16} },//15 + { { 4, 16}, { 6, 16}, { 5, 16}, { 8, 16} } //16 + }, + + { + //2<=nc<4 + { { 3, 2}, { 0, 0}, { 0, 0}, { 0, 0} },//0 + { {11, 6}, { 2, 2}, { 0, 0}, { 0, 0} },//1 + { { 7, 6}, { 7, 5}, { 3, 3}, { 0, 0} },//2 + { { 7, 7}, {10, 6}, { 9, 6}, { 5, 4} },//3 + { { 7, 8}, { 6, 6}, { 5, 6}, { 4, 4} },//4 + { { 4, 8}, { 6, 7}, { 5, 7}, { 6, 5} },//5 + { { 7, 9}, { 6, 8}, { 5, 8}, { 8, 6} },//6 + { {15, 11}, { 6, 9}, { 5, 9}, { 4, 6} },//7 + { {11, 11}, {14, 11}, {13, 11}, { 4, 7} },//8 + { {15, 12}, {10, 11}, { 9, 11}, { 4, 9} },//9 + { {11, 12}, {14, 12}, {13, 12}, {12, 11} },//10 + { { 8, 12}, {10, 12}, { 9, 12}, { 8, 11} },//11 + { {15, 13}, {14, 13}, {13, 13}, {12, 12} },//12 + { {11, 13}, {10, 13}, { 9, 13}, {12, 13} },//13 + { { 7, 13}, {11, 14}, { 6, 13}, { 8, 13} },//14 + { { 9, 14}, { 8, 14}, {10, 14}, { 1, 13} },//15 + { { 7, 14}, { 6, 14}, { 5, 14}, { 4, 14} } //16 + }, + + { + //4<=nc<8 + { {15, 4}, { 0, 0}, { 0, 0}, { 0, 0} },//0 + { {15, 6}, {14, 4}, { 0, 0}, { 0, 0} },//1 + { {11, 6}, {15, 5}, {13, 4}, { 0, 0} },//2 + { { 8, 6}, {12, 5}, {14, 5}, {12, 4} },//3 + { {15, 7}, {10, 5}, {11, 5}, {11, 4} },//4 + { {11, 7}, { 8, 5}, { 9, 5}, {10, 4} },//5 + { { 9, 7}, {14, 6}, {13, 6}, { 9, 4} },//6 + { { 8, 7}, {10, 6}, { 9, 6}, { 8, 4} },//7 + { {15, 8}, {14, 7}, {13, 7}, {13, 5} },//8 + { {11, 8}, {14, 8}, {10, 7}, {12, 6} },//9 + { {15, 9}, {10, 8}, {13, 8}, {12, 7} },//10 + { {11, 9}, {14, 9}, { 9, 8}, {12, 8} },//11 + { { 8, 9}, {10, 9}, {13, 9}, { 8, 8} },//12 + { {13, 10}, { 7, 9}, { 9, 9}, {12, 9} },//13 + { { 9, 10}, {12, 10}, {11, 10}, {10, 10} },//14 + { { 5, 10}, { 8, 10}, { 7, 10}, { 6, 10} },//15 + { { 1, 10}, { 4, 10}, { 3, 10}, { 2, 10} } //16 + }, + + { + //8<=nc + { { 3, 6}, { 0, 0}, { 0, 0}, { 0, 0} },//0 + { { 0, 6}, { 1, 6}, { 0, 0}, { 0, 0} },//1 + { { 4, 6}, { 5, 6}, { 6, 6}, { 0, 0} },//2 + { { 8, 6}, { 9, 6}, {10, 6}, {11, 6} },//3 + { {12, 6}, {13, 6}, {14, 6}, {15, 6} },//4 + { {16, 6}, {17, 6}, {18, 6}, {19, 6} },//5 + { {20, 6}, {21, 6}, {22, 6}, {23, 6} },//6 + { {24, 6}, {25, 6}, {26, 6}, {27, 6} },//7 + { {28, 6}, {29, 6}, {30, 6}, {31, 6} },//8 + { {32, 6}, {33, 6}, {34, 6}, {35, 6} },//9 + { {36, 6}, {37, 6}, {38, 6}, {39, 6} },//10 + { {40, 6}, {41, 6}, {42, 6}, {43, 6} },//11 + { {44, 6}, {45, 6}, {46, 6}, {47, 6} },//12 + { {48, 6}, {49, 6}, {50, 6}, {51, 6} },//13 + { {52, 6}, {53, 6}, {54, 6}, {55, 6} },//14 + { {56, 6}, {57, 6}, {58, 6}, {59, 6} },//15 + { {60, 6}, {61, 6}, {62, 6}, {63, 6} } //16 + }, + + { + //nc == -1 + { { 1, 2}, { 0, 0}, { 0, 0}, { 0, 0} },//0 + { { 7, 6}, { 1, 1}, { 0, 0}, { 0, 0} },//1 + { { 4, 6}, { 6, 6}, { 1, 3}, { 0, 0} },//2 + { { 3, 6}, { 3, 7}, { 2, 7}, { 5, 6} },//3 + { { 2, 6}, { 3, 8}, { 2, 8}, { 0, 7} },//4 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//5 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//6 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//7 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//8 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//9 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//10 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//11 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//12 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//13 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//14 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },//15 + { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} } //16 + } +}; + +//const uint8_t g_kuiVlcLevelPrefix[15][2] = +//{ +// {1, 1}, {1, 2} +//}; + +//g_kuiVlcTotalZeros[tzVlcIndex][total_zeros][0--value, 1--bit count] +const uint8_t g_kuiVlcTotalZeros[16][16][2] = { + { + //0 not available + {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //1 + {1, 1}, {3, 3}, {2, 3}, {3, 4}, {2, 4}, {3, 5}, {2, 5}, {3, 6}, {2, 6}, {3, 7}, {2, 7}, {3, 8}, {2, 8}, {3, 9}, {2, 9}, {1, 9} + }, + { + //2 + {7, 3}, {6, 3}, {5, 3}, {4, 3}, {3, 3}, {5, 4}, {4, 4}, {3, 4}, {2, 4}, {3, 5}, {2, 5}, {3, 6}, {2, 6}, {1, 6}, {0, 6}, {0, 0} + }, + { + //3 + {5, 4}, {7, 3}, {6, 3}, {5, 3}, {4, 4}, {3, 4}, {4, 3}, {3, 3}, {2, 4}, {3, 5}, {2, 5}, {1, 6}, {1, 5}, {0, 6}, {0, 0}, {0, 0} + }, + { + //4 + {3, 5}, {7, 3}, {5, 4}, {4, 4}, {6, 3}, {5, 3}, {4, 3}, {3, 4}, {3, 3}, {2, 4}, {2, 5}, {1, 5}, {0, 5}, {0, 0}, {0, 0}, {0, 0} + }, + { + //5 + {5, 4}, {4, 4}, {3, 4}, {7, 3}, {6, 3}, {5, 3}, {4, 3}, {3, 3}, {2, 4}, {1, 5}, {1, 4}, {0, 5}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //6 + {1, 6}, {1, 5}, {7, 3}, {6, 3}, {5, 3}, {4, 3}, {3, 3}, {2, 3}, {1, 4}, {1, 3}, {0, 6}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //7 + {1, 6}, {1, 5}, {5, 3}, {4, 3}, {3, 3}, {3, 2}, {2, 3}, {1, 4}, {1, 3}, {0, 6}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //8 + {1, 6}, {1, 4}, {1, 5}, {3, 3}, {3, 2}, {2, 2}, {2, 3}, {1, 3}, {0, 6}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //9 + {1, 6}, {0, 6}, {1, 4}, {3, 2}, {2, 2}, {1, 3}, {1, 2}, {1, 5}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //10 + {1, 5}, {0, 5}, {1, 3}, {3, 2}, {2, 2}, {1, 2}, {1, 4}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //11 + {0, 4}, {1, 4}, {1, 3}, {2, 3}, {1, 1}, {3, 3}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //12 + {0, 4}, {1, 4}, {1, 2}, {1, 1}, {1, 3}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //13 + {0, 3}, {1, 3}, {1, 1}, {1, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //14 + {0, 2}, {1, 2}, {1, 1}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //15 + {0, 1}, {1, 1}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + } +}; + +const uint8_t g_kuiVlcTotalZerosChromaDc[4][4][2] = { + { + {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + {1, 1}, {1, 2}, {1, 3}, {0, 3} + }, + { + {1, 1}, {1, 2}, {0, 2}, {0, 0} + }, + { + {1, 1}, {0, 1}, {0, 0}, {0, 0} + } +}; +// + +//g_kuiVlcRunBefore[zeros-left][run-before][0--value, 1--bit count] +const uint8_t g_kuiVlcRunBefore[8][15][2] = { + { + //0 not available + {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //1 + {1, 1}, {0, 1}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //2 + {1, 1}, {1, 2}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //3 + {3, 2}, {2, 2}, {1, 2}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //4 + {3, 2}, {2, 2}, {1, 2}, {1, 3}, {0, 3}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //5 + {3, 2}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {0, 3}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //6 + {3, 2}, {0, 3}, {1, 3}, {3, 3}, {2, 3}, {5, 3}, {4, 3}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} + }, + { + //>6 + {7, 3}, {6, 3}, {5, 3}, {4, 3}, {3, 3}, {2, 3}, {1, 3}, {1, 4}, {1, 5}, {1, 6}, {1, 7}, {1, 8}, {1, 9}, {1, 10}, {1, 11} + } +}; + +const ALIGNED_DECLARE (uint8_t, g_kuiEncNcMapTable[18], 16) = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 +}; + + + +const uint8_t g_kuiTemporalIdListTable[MAX_TEMPORAL_LEVEL][MAX_GOP_SIZE + 1] = { + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0 + }, // uiGopSize = 1 + { + 0, 1, 0, 0, 0, 0, 0, 0, + 0 + }, // uiGopSize = 2 + { + 0, 2, 1, 2, 0, 0, 0, 0, + 0 + }, // uiGopSize = 4 + { + 0, 3, 2, 3, 1, 3, 2, 3, + 0 + } //uiGopSize = 8 +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// extern at svc_encode_slice.h +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp new file mode 100644 index 000000000..9bc6e103b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp @@ -0,0 +1,4617 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file encoder_ext.c + * + * \brief core encoder for SVC + * + * \date 7/24/2009 Created + * + ************************************************************************************* + */ + +#include "encoder.h" +#include "cpu.h" +#include "utils.h" +#include "svc_enc_golomb.h" +#include "au_set.h" +#include "picture_handle.h" +#include "svc_base_layer_md.h" +#include "svc_encode_slice.h" +#include "svc_mode_decision.h" +#include "decode_mb_aux.h" +#include "deblocking.h" +#include "ref_list_mgr_svc.h" +#include "ls_defines.h" +#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms +#include "slice_multi_threading.h" +#include "measure_time.h" +#include "svc_set_mb_syn.h" + +namespace WelsEnc { + + +int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx, + SFrameBSInfo* pFrameBsInfo, + SLayerBSInfo* pLayerBsInfo, + int32_t* pNalIdxInLayer, + int32_t* pLayerSize, + int32_t iFirstMbIdxInPartition, + int32_t iEndMbIdxInPartition, + int32_t iStartSliceIdx + ); + + +int32_t WelsBitRateVerification (SLogContext* pLogCtx, SSpatialLayerConfig* pLayerParam, int32_t iLayerId) { + if ((pLayerParam->iSpatialBitrate <= 0) + || (static_cast (pLayerParam->iSpatialBitrate) < pLayerParam->fFrameRate)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "Invalid bitrate settings in layer %d, bitrate= %d at FrameRate(%f)", iLayerId, + pLayerParam->iSpatialBitrate, pLayerParam->fFrameRate); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + // deal with LEVEL_MAX_BR and MAX_BR setting + const SLevelLimits* pCurLevelLimit = g_ksLevelLimits; + while ((pCurLevelLimit->uiLevelIdc != LEVEL_5_2) && (pCurLevelLimit->uiLevelIdc != pLayerParam->uiLevelIdc)) + pCurLevelLimit++; + const int32_t iLevelMaxBitrate = pCurLevelLimit->uiMaxBR * CpbBrNalFactor; + const int32_t iLevel52MaxBitrate = g_ksLevelLimits[LEVEL_NUMBER - 1].uiMaxBR * CpbBrNalFactor; + if (UNSPECIFIED_BIT_RATE != iLevelMaxBitrate) { + if ((pLayerParam->iMaxSpatialBitrate == UNSPECIFIED_BIT_RATE) + || (pLayerParam->iMaxSpatialBitrate > iLevel52MaxBitrate)) { + pLayerParam->iMaxSpatialBitrate = iLevelMaxBitrate; + WelsLog (pLogCtx, WELS_LOG_INFO, + "Current MaxSpatialBitrate is invalid (UNSPECIFIED_BIT_RATE or larger than LEVEL5_2) but level setting is valid, set iMaxSpatialBitrate to %d from level (%d)", + pLayerParam->iMaxSpatialBitrate, pLayerParam->uiLevelIdc); + } else if (pLayerParam->iMaxSpatialBitrate > iLevelMaxBitrate) { + ELevelIdc iCurLevel = pLayerParam->uiLevelIdc; + WelsAdjustLevel (pLayerParam, pCurLevelLimit); + WelsLog (pLogCtx, WELS_LOG_INFO, + "LevelIdc is changed from (%d) to (%d) according to the iMaxSpatialBitrate(%d)", + iCurLevel, pLayerParam->uiLevelIdc, pLayerParam->iMaxSpatialBitrate); + } + } else if ((pLayerParam->iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE) + && (pLayerParam->iMaxSpatialBitrate > iLevel52MaxBitrate)) { + // no level limitation, just need to check if iMaxSpatialBitrate is too big from reasonable + WelsLog (pLogCtx, WELS_LOG_WARNING, + "No LevelIdc setting and iMaxSpatialBitrate (%d) is considered too big to be valid, changed to UNSPECIFIED_BIT_RATE", + pLayerParam->iMaxSpatialBitrate); + pLayerParam->iMaxSpatialBitrate = UNSPECIFIED_BIT_RATE; + } + + // deal with iSpatialBitrate and iMaxSpatialBitrate setting + if (pLayerParam->iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE) { + if (pLayerParam->iMaxSpatialBitrate == pLayerParam->iSpatialBitrate) { + WelsLog (pLogCtx, WELS_LOG_INFO, + "Setting MaxSpatialBitrate (%d) the same at SpatialBitrate (%d) will make the actual bit rate lower than SpatialBitrate", + pLayerParam->iMaxSpatialBitrate, pLayerParam->iSpatialBitrate); + } else if (pLayerParam->iMaxSpatialBitrate < pLayerParam->iSpatialBitrate) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "MaxSpatialBitrate (%d) should be larger than SpatialBitrate (%d), considering it as error setting", + pLayerParam->iMaxSpatialBitrate, pLayerParam->iSpatialBitrate); + return ENC_RETURN_UNSUPPORTED_PARA; + } + } + return ENC_RETURN_SUCCESS; +} + +void CheckProfileSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iLayer, EProfileIdc uiProfileIdc) { + SSpatialLayerConfig* pLayerInfo = &pParam->sSpatialLayers[iLayer]; + pLayerInfo->uiProfileIdc = uiProfileIdc; + if (pParam->bSimulcastAVC) { + if ((uiProfileIdc != PRO_BASELINE) && (uiProfileIdc != PRO_MAIN) && (uiProfileIdc != PRO_HIGH)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "layerId(%d) doesn't support profile(%d), change to UNSPECIFIC profile", iLayer, + uiProfileIdc); + pLayerInfo->uiProfileIdc = PRO_UNKNOWN; + } + } else { + if (iLayer == SPATIAL_LAYER_0) { + if ((uiProfileIdc != PRO_BASELINE) && (uiProfileIdc != PRO_MAIN) && (uiProfileIdc != PRO_HIGH)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "layerId(%d) doesn't support profile(%d), change to UNSPECIFIC profile", iLayer, + uiProfileIdc); + pLayerInfo->uiProfileIdc = PRO_UNKNOWN; + } + } else { + if ((uiProfileIdc != PRO_SCALABLE_BASELINE) && (uiProfileIdc != PRO_SCALABLE_HIGH)) { + pLayerInfo->uiProfileIdc = PRO_SCALABLE_BASELINE; + WelsLog (pLogCtx, WELS_LOG_WARNING, "layerId(%d) doesn't support profile(%d), change to scalable baseline profile", + iLayer, uiProfileIdc); + } + } + } +} +void CheckLevelSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iLayer, ELevelIdc uiLevelIdc) { + SSpatialLayerConfig* pLayerInfo = &pParam->sSpatialLayers[iLayer]; + pLayerInfo->uiLevelIdc = LEVEL_UNKNOWN; + int32_t iLevelIdx = LEVEL_NUMBER - 1; + do { + if (g_ksLevelLimits[iLevelIdx].uiLevelIdc == uiLevelIdc) { + pLayerInfo->uiLevelIdc = uiLevelIdc; + break; + } + iLevelIdx--; + } while (iLevelIdx >= 0); +} +void CheckReferenceNumSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iNumRef) { + int32_t iRefUpperBound = (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME) ? + MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA : MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN; + pParam->iNumRefFrame = iNumRef; + if ((iNumRef < MIN_REF_PIC_COUNT) || (iNumRef > iRefUpperBound)) { + pParam->iNumRefFrame = AUTO_REF_PIC_COUNT; + WelsLog (pLogCtx, WELS_LOG_WARNING, + "doesn't support the number of reference frame(%d) change to auto select mode", iNumRef); + } +} + +int32_t SliceArgumentValidationFixedSliceMode (SLogContext* pLogCtx, + SSliceArgument* pSliceArgument, const RC_MODES kiRCMode, + const int32_t kiPicWidth, const int32_t kiPicHeight) { + int32_t iCpuCores = 0; + int32_t iIdx = 0; + const int32_t iMbWidth = (kiPicWidth + 15) >> 4; + const int32_t iMbHeight = (kiPicHeight + 15) >> 4; + const int32_t iMbNumInFrame = iMbWidth * iMbHeight; + bool bSingleMode = false; + + pSliceArgument->uiSliceSizeConstraint = 0; + + if (pSliceArgument->uiSliceNum == 0) { + WelsCPUFeatureDetect (&iCpuCores); + if (0 == iCpuCores) { + // cpuid not supported or doesn't expose the number of cores, + // use high level system API as followed to detect number of pysical/logic processor + iCpuCores = DynamicDetectCpuCores(); + } + pSliceArgument->uiSliceNum = iCpuCores; + } + + if (pSliceArgument->uiSliceNum <= 1) { + WelsLog (pLogCtx, WELS_LOG_INFO, + "SliceArgumentValidationFixedSliceMode(), uiSliceNum(%d) you set for SM_FIXEDSLCNUM_SLICE, now turn to SM_SINGLE_SLICE type!", + pSliceArgument->uiSliceNum); + bSingleMode = true; + } + + // considering the coding efficient and performance, + // iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting + if (iMbNumInFrame <= MIN_NUM_MB_PER_SLICE) { + WelsLog (pLogCtx, WELS_LOG_INFO, + "SliceArgumentValidationFixedSliceMode(), uiSliceNum(%d) you set for SM_FIXEDSLCNUM_SLICE, now turn to SM_SINGLE_SLICE type as CountMbNum less than MIN_NUM_MB_PER_SLICE!", + pSliceArgument->uiSliceNum); + bSingleMode = true; + } + + if (bSingleMode) { + pSliceArgument->uiSliceMode = SM_SINGLE_SLICE; + pSliceArgument->uiSliceNum = 1; + for (iIdx = 0; iIdx < MAX_SLICES_NUM; iIdx++) { + pSliceArgument->uiSliceMbNum[iIdx] = 0; + } + return ENC_RETURN_SUCCESS; + } + + if (pSliceArgument->uiSliceNum > MAX_SLICES_NUM) { + pSliceArgument->uiSliceNum = MAX_SLICES_NUM; + WelsLog (pLogCtx, WELS_LOG_WARNING, + "SliceArgumentValidationFixedSliceMode(), uiSliceNum exceed MAX_SLICES_NUM! So setting slice num eqaul to MAX_SLICES_NUM(%d)!", + pSliceArgument->uiSliceNum); + } + + if (kiRCMode != RC_OFF_MODE) { // multiple slices verify with gom + //check uiSliceNum and set uiSliceMbNum with current uiSliceNum + if (!GomValidCheckSliceNum (iMbWidth, iMbHeight, &pSliceArgument->uiSliceNum)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "SliceArgumentValidationFixedSliceMode(), unsupported setting with Resolution and uiSliceNum combination under RC on! So uiSliceNum is changed to %d!", + pSliceArgument->uiSliceNum); + } + + if (pSliceArgument->uiSliceNum <= 1 || + !GomValidCheckSliceMbNum (iMbWidth, iMbHeight, pSliceArgument)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "SliceArgumentValidationFixedSliceMode(), unsupported setting with Resolution and uiSliceNum (%d) combination under RC on! Consider setting single slice with this resolution!", + pSliceArgument->uiSliceNum); + return ENC_RETURN_UNSUPPORTED_PARA; + } + } else if (!CheckFixedSliceNumMultiSliceSetting (iMbNumInFrame, pSliceArgument)) { + //check uiSliceMbNum with current uiSliceNum + WelsLog (pLogCtx, WELS_LOG_ERROR, + "SliceArgumentValidationFixedSliceMode(), invalid uiSliceMbNum (%d) settings!,now turn to SM_SINGLE_SLICE type", + pSliceArgument->uiSliceMbNum[0]); + pSliceArgument->uiSliceMode = SM_SINGLE_SLICE; + pSliceArgument->uiSliceNum = 1; + for (iIdx = 0; iIdx < MAX_SLICES_NUM; iIdx++) { + pSliceArgument->uiSliceMbNum[iIdx] = 0; + } + } + + return ENC_RETURN_SUCCESS; +} + + +/*! + * \brief validate checking in parameter configuration + * \pParam pParam SWelsSvcCodingParam* + * \return successful - 0; otherwise none 0 for failed + */ +int32_t ParamValidation (SLogContext* pLogCtx, SWelsSvcCodingParam* pCfg) { + const float fEpsn = 0.000001f; + int32_t i = 0; + + assert (pCfg != NULL); + + if (! (pCfg->iUsageType < INPUT_CONTENT_TYPE_ALL)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidation(),Invalid usage type = %d", pCfg->iUsageType); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) { + if (pCfg->iSpatialLayerNum > 1) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidation(),Invalid the number of Spatial layer(%d)for screen content", + pCfg->iSpatialLayerNum); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if (pCfg->bEnableAdaptiveQuant) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidation(), AdaptiveQuant(%d) is not supported yet for screen content, auto turned off", + pCfg->bEnableAdaptiveQuant); + pCfg->bEnableAdaptiveQuant = false; + } + if (pCfg->bEnableBackgroundDetection) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidation(), BackgroundDetection(%d) is not supported yet for screen content, auto turned off", + pCfg->bEnableBackgroundDetection); + pCfg->bEnableBackgroundDetection = false; + } + if (pCfg->bEnableSceneChangeDetect == false) { + pCfg->bEnableSceneChangeDetect = true; + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidation(), screen change detection should be turned on, change bEnableSceneChangeDetect as true"); + } + + } + + //turn off adaptive quant now, algorithms needs to be refactored + pCfg->bEnableAdaptiveQuant = false; + + if (pCfg->iSpatialLayerNum > 1) { + for (i = pCfg->iSpatialLayerNum - 1; i > 0; i--) { + SSpatialLayerConfig* fDlpUp = &pCfg->sSpatialLayers[i]; + SSpatialLayerConfig* fDlp = &pCfg->sSpatialLayers[i - 1]; + if ((fDlp->iVideoWidth > fDlpUp->iVideoWidth) || (fDlp->iVideoHeight > fDlpUp->iVideoHeight)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidation,Invalid resolution layer(%d) resolution(%d x %d) should be less than the upper spatial layer resolution(%d x %d) ", + i, fDlp->iVideoWidth, fDlp->iVideoHeight, fDlpUp->iVideoWidth, fDlpUp->iVideoHeight); + return ENC_RETURN_UNSUPPORTED_PARA; + } + } + } + + if (!CheckInRangeCloseOpen (pCfg->iLoopFilterDisableIdc, DEBLOCKING_IDC_0, DEBLOCKING_IDC_2 + 1) || + !CheckInRangeCloseOpen (pCfg->iLoopFilterAlphaC0Offset, DEBLOCKING_OFFSET_MINUS, DEBLOCKING_OFFSET + 1) || + !CheckInRangeCloseOpen (pCfg->iLoopFilterBetaOffset, DEBLOCKING_OFFSET_MINUS, DEBLOCKING_OFFSET + 1)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidation, Invalid iLoopFilterDisableIdc(%d) or iLoopFilterAlphaC0Offset(%d) or iLoopFilterBetaOffset(%d)!", + pCfg->iLoopFilterDisableIdc, pCfg->iLoopFilterAlphaC0Offset, pCfg->iLoopFilterBetaOffset); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + for (i = 0; i < pCfg->iSpatialLayerNum; ++ i) { + SSpatialLayerInternal* fDlp = &pCfg->sDependencyLayers[i]; + SSpatialLayerConfig* pConfig = &pCfg->sSpatialLayers[i]; + if (fDlp->fOutputFrameRate > fDlp->fInputFrameRate || (fDlp->fInputFrameRate >= -fEpsn + && fDlp->fInputFrameRate <= fEpsn) + || (fDlp->fOutputFrameRate >= -fEpsn && fDlp->fOutputFrameRate <= fEpsn)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "Invalid settings in input frame rate(%.6f) or output frame rate(%.6f) of layer #%d config file..", + fDlp->fInputFrameRate, fDlp->fOutputFrameRate, i); + return ENC_RETURN_INVALIDINPUT; + } + if (UINT_MAX == GetLogFactor (fDlp->fOutputFrameRate, fDlp->fInputFrameRate)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "AUTO CORRECT: Invalid settings in input frame rate(%.6f) and output frame rate(%.6f) of layer #%d config file: iResult of output frame rate divided by input frame rate should be power of 2(i.e,in/pOut=2^n). \n Auto correcting Output Framerate to Input Framerate %f!\n", + fDlp->fInputFrameRate, fDlp->fOutputFrameRate, i, fDlp->fInputFrameRate); + fDlp->fOutputFrameRate = fDlp->fInputFrameRate; + pConfig->fFrameRate = fDlp->fOutputFrameRate; + } + } + + if ((pCfg->iRCMode != RC_OFF_MODE) && (pCfg->iRCMode != RC_QUALITY_MODE) && (pCfg->iRCMode != RC_BUFFERBASED_MODE) + && (pCfg->iRCMode != RC_BITRATE_MODE) && (pCfg->iRCMode != RC_TIMESTAMP_MODE)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidation(),Invalid iRCMode = %d", pCfg->iRCMode); + return ENC_RETURN_UNSUPPORTED_PARA; + } + //bitrate setting validation + if (pCfg->iRCMode != RC_OFF_MODE) { + int32_t iTotalBitrate = 0; + if (pCfg->iTargetBitrate <= 0) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "Invalid bitrate settings in total configure, bitrate= %d", pCfg->iTargetBitrate); + return ENC_RETURN_INVALIDINPUT; + } + for (i = 0; i < pCfg->iSpatialLayerNum; ++ i) { + SSpatialLayerConfig* pSpatialLayer = &pCfg->sSpatialLayers[i]; + iTotalBitrate += pSpatialLayer->iSpatialBitrate; + + if (WelsBitRateVerification (pLogCtx, pSpatialLayer, i) != ENC_RETURN_SUCCESS) + return ENC_RETURN_INVALIDINPUT; + } + if (iTotalBitrate > pCfg->iTargetBitrate) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "Invalid settings in bitrate. the sum of each layer bitrate(%d) is larger than total bitrate setting(%d)", + iTotalBitrate, pCfg->iTargetBitrate); + return ENC_RETURN_INVALIDINPUT; + } + if ((pCfg->iRCMode == RC_QUALITY_MODE) || (pCfg->iRCMode == RC_BITRATE_MODE) || (pCfg->iRCMode == RC_TIMESTAMP_MODE)) + if (!pCfg->bEnableFrameSkip) + WelsLog (pLogCtx, WELS_LOG_WARNING, + "bEnableFrameSkip = %d,bitrate can't be controlled for RC_QUALITY_MODE,RC_BITRATE_MODE and RC_TIMESTAMP_MODE without enabling skip frame.", + pCfg->bEnableFrameSkip); + if ((pCfg->iMaxQp <= 0) || (pCfg->iMinQp <= 0)) { + if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) { + WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, MIN_SCREEN_QP, + MAX_SCREEN_QP); + pCfg->iMinQp = MIN_SCREEN_QP; + pCfg->iMaxQp = MAX_SCREEN_QP; + } else { + WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, + GOM_MIN_QP_MODE, MAX_LOW_BR_QP); + pCfg->iMinQp = GOM_MIN_QP_MODE; + pCfg->iMaxQp = MAX_LOW_BR_QP; + } + + } + pCfg->iMinQp = WELS_CLIP3 (pCfg->iMinQp, GOM_MIN_QP_MODE, QP_MAX_VALUE); + pCfg->iMaxQp = WELS_CLIP3 (pCfg->iMaxQp, pCfg->iMinQp, QP_MAX_VALUE); + } + // ref-frames validation + if (((pCfg->iUsageType == CAMERA_VIDEO_REAL_TIME) || (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME)) + ? WelsCheckRefFrameLimitationNumRefFirst (pLogCtx, pCfg) + : WelsCheckRefFrameLimitationLevelIdcFirst (pLogCtx, pCfg)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsCheckRefFrameLimitation failed"); + return ENC_RETURN_INVALIDINPUT; + } + return ENC_RETURN_SUCCESS; +} + + +int32_t ParamValidationExt (SLogContext* pLogCtx, SWelsSvcCodingParam* pCodingParam) { + int8_t i = 0; + int32_t iIdx = 0; + + assert (pCodingParam != NULL); + if (NULL == pCodingParam) + return ENC_RETURN_INVALIDINPUT; + + if ((pCodingParam->iUsageType != CAMERA_VIDEO_REAL_TIME) && (pCodingParam->iUsageType != SCREEN_CONTENT_REAL_TIME)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(),Invalid usage type = %d", pCodingParam->iUsageType); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if ((pCodingParam->iUsageType == SCREEN_CONTENT_REAL_TIME) && (!pCodingParam->bIsLosslessLink + && pCodingParam->bEnableLongTermReference)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidationExt(), setting lossy link for LTR under screen, which is not supported yet! Auto disabled LTR!"); + pCodingParam->bEnableLongTermReference = false; + } + if (pCodingParam->iSpatialLayerNum < 1 || pCodingParam->iSpatialLayerNum > MAX_DEPENDENCY_LAYER) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), monitor invalid pCodingParam->iSpatialLayerNum: %d!", + pCodingParam->iSpatialLayerNum); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + if (pCodingParam->iTemporalLayerNum < 1 || pCodingParam->iTemporalLayerNum > MAX_TEMPORAL_LEVEL) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), monitor invalid pCodingParam->iTemporalLayerNum: %d!", + pCodingParam->iTemporalLayerNum); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + if (pCodingParam->uiGopSize < 1 || pCodingParam->uiGopSize > MAX_GOP_SIZE) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), monitor invalid pCodingParam->uiGopSize: %d!", + pCodingParam->uiGopSize); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + + if (pCodingParam->uiIntraPeriod && pCodingParam->uiIntraPeriod < pCodingParam->uiGopSize) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidationExt(), uiIntraPeriod(%d) should be not less than that of uiGopSize(%d) or -1 specified!", + pCodingParam->uiIntraPeriod, pCodingParam->uiGopSize); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + if (pCodingParam->uiIntraPeriod && (pCodingParam->uiIntraPeriod & (pCodingParam->uiGopSize - 1)) != 0) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidationExt(), uiIntraPeriod(%d) should be multiple of uiGopSize(%d) or -1 specified!", + pCodingParam->uiIntraPeriod, pCodingParam->uiGopSize); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + //about iMultipleThreadIdc, bDeblockingParallelFlag, iLoopFilterDisableIdc, & uiSliceMode + // (1) Single Thread + // if (THREAD==1)//single thread + // no parallel_deblocking: bDeblockingParallelFlag = 0; + // (2) Multi Thread: see uiSliceMode decision + if (pCodingParam->iMultipleThreadIdc == 1) { + //now is single thread. no parallel deblocking, set flag=0 + pCodingParam->bDeblockingParallelFlag = false; + } else { + pCodingParam->bDeblockingParallelFlag = true; + } + + // eSpsPpsIdStrategy checkings + if (pCodingParam->iSpatialLayerNum > 1 && (!pCodingParam->bSimulcastAVC) + && (SPS_LISTING & pCodingParam->eSpsPpsIdStrategy)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidationExt(), eSpsPpsIdStrategy setting (%d) with multiple svc SpatialLayers (%d) not supported! eSpsPpsIdStrategy adjusted to CONSTANT_ID", + pCodingParam->eSpsPpsIdStrategy, pCodingParam->iSpatialLayerNum); + pCodingParam->eSpsPpsIdStrategy = CONSTANT_ID; + } + if (pCodingParam->iUsageType == SCREEN_CONTENT_REAL_TIME && (SPS_LISTING & pCodingParam->eSpsPpsIdStrategy)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidationExt(), eSpsPpsIdStrategy setting (%d) with iUsageType (%d) not supported! eSpsPpsIdStrategy adjusted to CONSTANT_ID", + pCodingParam->eSpsPpsIdStrategy, pCodingParam->iUsageType); + pCodingParam->eSpsPpsIdStrategy = CONSTANT_ID; + } + + if (pCodingParam->bSimulcastAVC && (SPS_LISTING & pCodingParam->eSpsPpsIdStrategy)) { + WelsLog (pLogCtx, WELS_LOG_INFO, + "ParamValidationExt(), eSpsPpsIdStrategy(%d) under bSimulcastAVC(%d) not supported yet, adjusted to INCREASING_ID", + pCodingParam->eSpsPpsIdStrategy, pCodingParam->bSimulcastAVC); + pCodingParam->eSpsPpsIdStrategy = INCREASING_ID; + } + + if (pCodingParam->bSimulcastAVC && pCodingParam->bPrefixNalAddingCtrl) { + WelsLog (pLogCtx, WELS_LOG_INFO, + "ParamValidationExt(), bSimulcastAVC(%d) is not compatible with bPrefixNalAddingCtrl(%d) true, adjusted bPrefixNalAddingCtrl to false", + pCodingParam->eSpsPpsIdStrategy, pCodingParam->bSimulcastAVC); + pCodingParam->bPrefixNalAddingCtrl = false; + } + + for (i = 0; i < pCodingParam->iSpatialLayerNum; ++ i) { + SSpatialLayerConfig* pSpatialLayer = &pCodingParam->sSpatialLayers[i]; + int32_t kiPicWidth = pSpatialLayer->iVideoWidth; + int32_t kiPicHeight = pSpatialLayer->iVideoHeight; + uint32_t iMbWidth = 0; + uint32_t iMbHeight = 0; + int32_t iMbNumInFrame = 0; + uint32_t iMaxSliceNum = MAX_SLICES_NUM; + int32_t iReturn = 0; + + if ((pCodingParam->iPicWidth > 0) && (pCodingParam->iPicHeight > 0) + && (kiPicWidth == 0) && (kiPicHeight == 0) + && (pCodingParam->iSpatialLayerNum == 1)) { + kiPicWidth = pSpatialLayer->iVideoWidth = pCodingParam->iPicWidth; + kiPicHeight = pSpatialLayer->iVideoHeight = pCodingParam->iPicHeight; + WelsLog (pLogCtx, WELS_LOG_DEBUG, + "ParamValidationExt(), layer resolution is not set, set to general resolution %d x %d", + pSpatialLayer->iVideoWidth, pSpatialLayer->iVideoHeight); + } + + if ((kiPicWidth <= 0) || (kiPicHeight <= 0) || (kiPicWidth * kiPicHeight > (MAX_MBS_PER_FRAME << 8))) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidationExt(), width > 0, height > 0, width * height <= %d, invalid %d x %d in dependency layer settings!", + (MAX_MBS_PER_FRAME << 8), kiPicWidth, kiPicHeight); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if ((kiPicWidth & 0x0F) != 0 || (kiPicHeight & 0x0F) != 0) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidationExt(), in layer #%d iWidth x iHeight(%d x %d) both should be multiple of 16, can not support with arbitrary size currently!", + i, kiPicWidth, kiPicHeight); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + if (pSpatialLayer->sSliceArgument.uiSliceMode >= SM_RESERVED) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMode (%d) settings!", + pSpatialLayer->sSliceArgument.uiSliceMode); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if ((pCodingParam->uiMaxNalSize != 0) && (pSpatialLayer->sSliceArgument.uiSliceMode != SM_SIZELIMITED_SLICE)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidationExt(), current layer %d uiSliceMode (%d) settings may not fulfill MaxNalSize = %d", i, + pSpatialLayer->sSliceArgument.uiSliceMode, pCodingParam->uiMaxNalSize); + } + CheckProfileSetting (pLogCtx, pCodingParam, i, pSpatialLayer->uiProfileIdc); + CheckLevelSetting (pLogCtx, pCodingParam, i, pSpatialLayer->uiLevelIdc); + //check pSlice settings under multi-pSlice + if (kiPicWidth <= 16 && kiPicHeight <= 16) { + //only have one MB, set to single_slice + pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE; + } + switch (pSpatialLayer->sSliceArgument.uiSliceMode) { + case SM_SINGLE_SLICE: + pSpatialLayer->sSliceArgument.uiSliceNum = 1; + pSpatialLayer->sSliceArgument.uiSliceSizeConstraint = 0; + for (iIdx = 0; iIdx < MAX_SLICES_NUM; iIdx++) { + pSpatialLayer->sSliceArgument.uiSliceMbNum[iIdx] = 0; + } + break; + case SM_FIXEDSLCNUM_SLICE: { + iReturn = SliceArgumentValidationFixedSliceMode (pLogCtx, &pSpatialLayer->sSliceArgument, pCodingParam->iRCMode, + kiPicWidth, kiPicHeight); + if (iReturn) + return ENC_RETURN_UNSUPPORTED_PARA; + } + break; + case SM_RASTER_SLICE: { + pSpatialLayer->sSliceArgument.uiSliceSizeConstraint = 0; + + iMbWidth = (kiPicWidth + 15) >> 4; + iMbHeight = (kiPicHeight + 15) >> 4; + iMbNumInFrame = iMbWidth * iMbHeight; + iMaxSliceNum = MAX_SLICES_NUM; + if (pSpatialLayer->sSliceArgument.uiSliceMbNum[0] == 0) { + if (iMbHeight > iMaxSliceNum) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceNum (%d) settings more than MAX(%d)!", + iMbHeight, MAX_SLICES_NUM); + return ENC_RETURN_UNSUPPORTED_PARA; + } + pSpatialLayer->sSliceArgument.uiSliceNum = iMbHeight; + for (uint32_t j = 0; j < iMbHeight; j++) { + pSpatialLayer->sSliceArgument.uiSliceMbNum[j] = iMbWidth; + } + if (!CheckRowMbMultiSliceSetting (iMbWidth, + &pSpatialLayer->sSliceArgument)) { // verify interleave mode settings + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!", + pSpatialLayer->sSliceArgument.uiSliceMbNum[0]); + return ENC_RETURN_UNSUPPORTED_PARA; + } + break; + } + + if (!CheckRasterMultiSliceSetting (iMbNumInFrame, + &pSpatialLayer->sSliceArgument)) { // verify interleave mode settings + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!", + pSpatialLayer->sSliceArgument.uiSliceMbNum[0]); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if (pSpatialLayer->sSliceArgument.uiSliceNum <= 0 + || pSpatialLayer->sSliceArgument.uiSliceNum > iMaxSliceNum) { // verify interleave mode settings + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceNum (%d) in SM_RASTER_SLICE settings!", + pSpatialLayer->sSliceArgument.uiSliceNum); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if (pSpatialLayer->sSliceArgument.uiSliceNum == 1) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidationExt(), pSlice setting for SM_RASTER_SLICE now turn to SM_SINGLE_SLICE!"); + pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE; + break; + } + if ((pCodingParam->iRCMode != RC_OFF_MODE) && pSpatialLayer->sSliceArgument.uiSliceNum > 1) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), WARNING: GOM based RC do not support SM_RASTER_SLICE!"); + } + // considering the coding efficient and performance, iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting + if (iMbNumInFrame <= MIN_NUM_MB_PER_SLICE) { + pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE; + pSpatialLayer->sSliceArgument.uiSliceNum = 1; + break; + } + } + break; + case SM_SIZELIMITED_SLICE: { + iMbWidth = (kiPicWidth + 15) >> 4; + iMbHeight = (kiPicHeight + 15) >> 4; + if (pSpatialLayer->sSliceArgument.uiSliceSizeConstraint <= MAX_MACROBLOCK_SIZE_IN_BYTE) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidationExt(), invalid iSliceSize (%d) settings!should be larger than MAX_MACROBLOCK_SIZE_IN_BYTE(%d)", + pSpatialLayer->sSliceArgument.uiSliceSizeConstraint, MAX_MACROBLOCK_SIZE_IN_BYTE); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + if (pCodingParam->uiMaxNalSize > 0) { + if (pCodingParam->uiMaxNalSize < (NAL_HEADER_ADD_0X30BYTES + MAX_MACROBLOCK_SIZE_IN_BYTE)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "ParamValidationExt(), invalid uiMaxNalSize (%d) settings! should be larger than (NAL_HEADER_ADD_0X30BYTES + MAX_MACROBLOCK_SIZE_IN_BYTE)(%d)", + pCodingParam->uiMaxNalSize, (NAL_HEADER_ADD_0X30BYTES + MAX_MACROBLOCK_SIZE_IN_BYTE)); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + if (pSpatialLayer->sSliceArgument.uiSliceSizeConstraint > (pCodingParam->uiMaxNalSize - + NAL_HEADER_ADD_0X30BYTES)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidationExt(), slice mode = SM_SIZELIMITED_SLICE, uiSliceSizeConstraint = %d ,uiMaxNalsize = %d, will take uiMaxNalsize!", + pSpatialLayer->sSliceArgument.uiSliceSizeConstraint, pCodingParam->uiMaxNalSize); + pSpatialLayer->sSliceArgument.uiSliceSizeConstraint = pCodingParam->uiMaxNalSize - NAL_HEADER_ADD_0X30BYTES; + } + } + pSpatialLayer->sSliceArgument.uiSliceSizeConstraint -= NAL_HEADER_ADD_0X30BYTES; + } + break; + default: { + WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMode (%d) settings!", + pCodingParam->sSpatialLayers[0].sSliceArgument.uiSliceMode); + return ENC_RETURN_UNSUPPORTED_PARA; + + } + break; + } + } + for (i = 0; i < pCodingParam->iSpatialLayerNum; ++ i) { + SSpatialLayerConfig* pLayerInfo = &pCodingParam->sSpatialLayers[i]; + if ((pLayerInfo->uiProfileIdc == PRO_BASELINE) || (pLayerInfo->uiProfileIdc == PRO_SCALABLE_BASELINE)) { + if (pCodingParam->iEntropyCodingModeFlag != 0) { + pCodingParam->iEntropyCodingModeFlag = 0; + WelsLog (pLogCtx, WELS_LOG_WARNING, "layerId(%d) Profile is baseline, Change CABAC to CAVLC", i); + } + } else if (pLayerInfo->uiProfileIdc == PRO_UNKNOWN) { + if ((i == 0) || pCodingParam->bSimulcastAVC) { + pLayerInfo->uiProfileIdc = (pCodingParam->iEntropyCodingModeFlag) ? PRO_HIGH : PRO_BASELINE; + } else { + pLayerInfo->uiProfileIdc = PRO_SCALABLE_BASELINE; + } + } + } + return ParamValidation (pLogCtx, pCodingParam); +} + + +void WelsEncoderApplyFrameRate (SWelsSvcCodingParam* pParam) { + SSpatialLayerInternal* pLayerParamInternal; + SSpatialLayerConfig* pLayerParam; + const float kfEpsn = 0.000001f; + const int32_t kiNumLayer = pParam->iSpatialLayerNum; + int32_t i; + const float kfMaxFrameRate = pParam->fMaxFrameRate; + float fRatio; + float fTargetOutputFrameRate; + + //set input frame rate to each layer + for (i = 0; i < kiNumLayer; i++) { + pLayerParamInternal = & (pParam->sDependencyLayers[i]); + pLayerParam = & (pParam->sSpatialLayers[i]); + fRatio = pLayerParamInternal->fOutputFrameRate / pLayerParamInternal->fInputFrameRate; + if ((kfMaxFrameRate - pLayerParamInternal->fInputFrameRate) > kfEpsn + || (kfMaxFrameRate - pLayerParamInternal->fInputFrameRate) < -kfEpsn) { + pLayerParamInternal->fInputFrameRate = kfMaxFrameRate; + fTargetOutputFrameRate = kfMaxFrameRate * fRatio; + pLayerParamInternal->fOutputFrameRate = (fTargetOutputFrameRate >= 6) ? fTargetOutputFrameRate : + (pLayerParamInternal->fInputFrameRate); + pLayerParam->fFrameRate = pLayerParamInternal->fOutputFrameRate; + //TODO:{Sijia} from design, there is no sense to have temporal layer when under 6fps even with such setting? + } + } +} + +int32_t WelsEncoderApplyBitRate (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int iLayer) { + //TODO (Sijia): this is a temporary solution which keep the ratio between layers + //but it is also possible to fulfill the bitrate of lower layer first + + SSpatialLayerConfig* pLayerParam; + const int32_t iNumLayers = pParam->iSpatialLayerNum; + int32_t i, iOrigTotalBitrate = 0; + if (iLayer == SPATIAL_LAYER_ALL) { + //read old BR + for (i = 0; i < iNumLayers; i++) { + iOrigTotalBitrate += pParam->sSpatialLayers[i].iSpatialBitrate; + } + //write new BR + float fRatio = 0.0; + for (i = 0; i < iNumLayers; i++) { + pLayerParam = & (pParam->sSpatialLayers[i]); + fRatio = pLayerParam->iSpatialBitrate / (static_cast (iOrigTotalBitrate)); + pLayerParam->iSpatialBitrate = static_cast (pParam->iTargetBitrate * fRatio); + + if (WelsBitRateVerification (pLogCtx, pLayerParam, i) != ENC_RETURN_SUCCESS) + return ENC_RETURN_UNSUPPORTED_PARA; + } + } else { + return WelsBitRateVerification (pLogCtx, & (pParam->sSpatialLayers[iLayer]), iLayer); + } + return ENC_RETURN_SUCCESS; +} +int32_t WelsEncoderApplyBitVaryRang (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iRang) { + SSpatialLayerConfig* pLayerParam; + const int32_t iNumLayers = pParam->iSpatialLayerNum; + for (int32_t i = 0; i < iNumLayers; i++) { + pLayerParam = & (pParam->sSpatialLayers[i]); + pLayerParam->iMaxSpatialBitrate = WELS_MIN ((int) (pLayerParam->iSpatialBitrate * (1 + iRang / 100.0)), + pLayerParam->iMaxSpatialBitrate); + if (WelsBitRateVerification (pLogCtx, pLayerParam, i) != ENC_RETURN_SUCCESS) + return ENC_RETURN_UNSUPPORTED_PARA; + WelsLog (pLogCtx, WELS_LOG_INFO, + "WelsEncoderApplyBitVaryRang:UpdateMaxBitrate layerId= %d,iMaxSpatialBitrate = %d", i, pLayerParam->iMaxSpatialBitrate); + } + return ENC_RETURN_SUCCESS; +} + +/*! + * \brief acquire count number of layers and NALs based on configurable paramters dependency + * \pParam pCtx sWelsEncCtx* + * \pParam pParam SWelsSvcCodingParam* + * \pParam pCountLayers pointer of count number of layers indeed + * \pParam iCountNals pointer of count number of nals indeed + * \return 0 - successful; otherwise failed + */ +int32_t AcquireLayersNals (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam, int32_t* pCountLayers, + int32_t* pCountNals) { + int32_t iCountNumLayers = 0; + int32_t iCountNumNals = 0; + int32_t iNumDependencyLayers = 0; + int32_t iDIndex = 0; + + if (NULL == pParam || NULL == ppCtx || NULL == *ppCtx) + return 1; + + iNumDependencyLayers = pParam->iSpatialLayerNum; + + do { + SSpatialLayerConfig* pDLayer = &pParam->sSpatialLayers[iDIndex]; +// pDLayer->ptr_cfg = pParam; + int32_t iOrgNumNals = iCountNumNals; + + //Note: Sep. 2010 + //Review this part and suggest no change, since the memory over-use + //(1) counts little to the overall performance + //(2) should not be critial even under mobile case + if (SM_SIZELIMITED_SLICE == pDLayer->sSliceArgument.uiSliceMode) { + iCountNumNals += MAX_SLICES_NUM; + // plus prefix NALs + if (iDIndex == 0) + iCountNumNals += MAX_SLICES_NUM; + // MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME ensured at svc_enc_slice_segment.h + if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, + "AcquireLayersNals(), num_of_slice(%d) > existing slice(%d) at (iDid= %d), max=%d", + iCountNumNals, iOrgNumNals, iDIndex, MAX_NAL_UNITS_IN_LAYER); + return 1; + } + } else { /*if ( SM_SINGLE_SLICE != pDLayer->sSliceArgument.uiSliceMode )*/ + const int32_t kiNumOfSlice = GetInitialSliceNum (&pDLayer->sSliceArgument); + + // NEED check iCountNals value in case multiple slices is used + iCountNumNals += kiNumOfSlice; // for pSlice VCL NALs + // plus prefix NALs + if (iDIndex == 0) + iCountNumNals += kiNumOfSlice; + assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER); + if (kiNumOfSlice > MAX_SLICES_NUM) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, + "AcquireLayersNals(), num_of_slice(%d) > MAX_SLICES_NUM(%d) per (iDid= %d, qid= %d) settings!", + kiNumOfSlice, MAX_SLICES_NUM, iDIndex, 0); + return 1; + } + } + + if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, + "AcquireLayersNals(), num_of_nals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) per (iDid= %d, qid= %d) settings!", + (iCountNumNals - iOrgNumNals), MAX_NAL_UNITS_IN_LAYER, iDIndex, 0); + return 1; + } + + iCountNumLayers ++; + + ++ iDIndex; + } while (iDIndex < iNumDependencyLayers); + + if (NULL == (*ppCtx)->pFuncList || NULL == (*ppCtx)->pFuncList->pParametersetStrategy) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, + "AcquireLayersNals(), pFuncList and pParametersetStrategy needed to be initialized first!"); + return 1; + } + // count parasets + iCountNumNals += 1 + iNumDependencyLayers + (iCountNumLayers << 1) + + iCountNumLayers // plus iCountNumLayers for reserved application + + (*ppCtx)->pFuncList->pParametersetStrategy->GetAllNeededParasetNum(); + + // to check number of layers / nals / slices dependencies, 12/8/2010 + if (iCountNumLayers > MAX_LAYER_NUM_OF_FRAME) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, "AcquireLayersNals(), iCountNumLayers(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", + iCountNumLayers, MAX_LAYER_NUM_OF_FRAME); + return 1; + } + + if (NULL != pCountLayers) + *pCountLayers = iCountNumLayers; + if (NULL != pCountNals) + *pCountNals = iCountNumNals; + return 0; +} + +static void InitMbInfo (sWelsEncCtx* pEnc, SMB* pList, SDqLayer* pLayer, const int32_t kiDlayerId, + const int32_t kiMaxMbNum) { + int32_t iMbWidth = pLayer->iMbWidth; + int32_t iMbHeight = pLayer->iMbHeight; + int32_t iIdx; + int32_t iMbNum = iMbWidth * iMbHeight; + uint32_t uiNeighborAvail; + const int32_t kiOffset = (kiDlayerId & 0x01) * kiMaxMbNum; + SMVUnitXY (*pLayerMvUnitBlock4x4)[MB_BLOCK4x4_NUM] = (SMVUnitXY (*)[MB_BLOCK4x4_NUM]) ( + &pEnc->pMvUnitBlock4x4[MB_BLOCK4x4_NUM * kiOffset]); + int8_t (*pLayerRefIndexBlock8x8)[MB_BLOCK8x8_NUM] = (int8_t (*)[MB_BLOCK8x8_NUM]) ( + &pEnc->pRefIndexBlock4x4[MB_BLOCK8x8_NUM * kiOffset]); + + for (iIdx = 0; iIdx < iMbNum; iIdx++) { + bool bLeft; + bool bTop; + bool bLeftTop; + bool bRightTop; + int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY; + uint16_t uiSliceIdc; //[0..65535] > 36864 of LEVEL5.2 + + pList[iIdx].iMbX = pEnc->pStrideTab->pMbIndexX[kiDlayerId][iIdx]; + pList[iIdx].iMbY = pEnc->pStrideTab->pMbIndexY[kiDlayerId][iIdx]; + pList[iIdx].iMbXY = iIdx; + + uiSliceIdc = WelsMbToSliceIdc (pLayer, iIdx); + iLeftXY = iIdx - 1; + iTopXY = iIdx - iMbWidth; + iLeftTopXY = iTopXY - 1; + iRightTopXY = iTopXY + 1; + + bLeft = (pList[iIdx].iMbX > 0) && (uiSliceIdc == WelsMbToSliceIdc (pLayer, iLeftXY)); + bTop = (pList[iIdx].iMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pLayer, iTopXY)); + bLeftTop = (pList[iIdx].iMbX > 0) && (pList[iIdx].iMbY > 0) && (uiSliceIdc == + WelsMbToSliceIdc (pLayer, iLeftTopXY)); + bRightTop = (pList[iIdx].iMbX < (iMbWidth - 1)) && (pList[iIdx].iMbY > 0) && (uiSliceIdc == + WelsMbToSliceIdc (pLayer, iRightTopXY)); + + uiNeighborAvail = 0; + if (bLeft) { + uiNeighborAvail |= LEFT_MB_POS; + } + if (bTop) { + uiNeighborAvail |= TOP_MB_POS; + } + if (bLeftTop) { + uiNeighborAvail |= TOPLEFT_MB_POS; + } + if (bRightTop) { + uiNeighborAvail |= TOPRIGHT_MB_POS; + } + pList[iIdx].uiSliceIdc = uiSliceIdc; // merge from svc_hd_opt_b for multiple slices coding + pList[iIdx].uiNeighborAvail = uiNeighborAvail; + uiNeighborAvail = 0; + if (pList[iIdx].iMbX >= BASE_MV_MB_NMB) + uiNeighborAvail |= LEFT_MB_POS; + if (pList[iIdx].iMbX <= (iMbWidth - 1 - BASE_MV_MB_NMB)) + uiNeighborAvail |= RIGHT_MB_POS; + if (pList[iIdx].iMbY >= BASE_MV_MB_NMB) + uiNeighborAvail |= TOP_MB_POS; + if (pList[iIdx].iMbY <= (iMbHeight - 1 - BASE_MV_MB_NMB)) + uiNeighborAvail |= BOTTOM_MB_POS; + + pList[iIdx].sMv = pLayerMvUnitBlock4x4[iIdx]; + pList[iIdx].pRefIndex = pLayerRefIndexBlock8x8[iIdx]; + pList[iIdx].pSadCost = &pEnc->pSadCostMb[iIdx]; + pList[iIdx].pIntra4x4PredMode = &pEnc->pIntra4x4PredModeBlocks[iIdx * INTRA_4x4_MODE_NUM]; + pList[iIdx].pNonZeroCount = &pEnc->pNonZeroCountBlocks[iIdx * MB_LUMA_CHROMA_BLOCK4x4_NUM]; + } +} + + +int32_t InitMbListD (sWelsEncCtx** ppCtx) { + int32_t iNumDlayer = (*ppCtx)->pSvcParam->iSpatialLayerNum; + int32_t iMbSize[MAX_DEPENDENCY_LAYER] = { 0 }; + int32_t iOverallMbNum = 0; + int32_t iMbWidth = 0; + int32_t iMbHeight = 0; + int32_t i; + + if (iNumDlayer > MAX_DEPENDENCY_LAYER) + return 1; + + for (i = 0; i < iNumDlayer; i++) { + iMbWidth = ((*ppCtx)->pSvcParam->sSpatialLayers[i].iVideoWidth + 15) >> 4; + iMbHeight = ((*ppCtx)->pSvcParam->sSpatialLayers[i].iVideoHeight + 15) >> 4; + iMbSize[i] = iMbWidth * iMbHeight; + iOverallMbNum += iMbSize[i]; + } + + (*ppCtx)->ppMbListD = static_cast ((*ppCtx)->pMemAlign->WelsMallocz (iNumDlayer * sizeof (SMB*), "ppMbListD")); + (*ppCtx)->ppMbListD[0] = NULL; + WELS_VERIFY_RETURN_IF (1, (*ppCtx)->ppMbListD == NULL) + (*ppCtx)->ppMbListD[0] = static_cast ((*ppCtx)->pMemAlign->WelsMallocz (iOverallMbNum * sizeof (SMB), + "ppMbListD[0]")); + WELS_VERIFY_RETURN_IF (1, (*ppCtx)->ppMbListD[0] == NULL) + (*ppCtx)->ppDqLayerList[0]->sMbDataP = (*ppCtx)->ppMbListD[0]; + InitMbInfo (*ppCtx, (*ppCtx)->ppMbListD[0], (*ppCtx)->ppDqLayerList[0], 0, iMbSize[iNumDlayer - 1]); + for (i = 1; i < iNumDlayer; i++) { + (*ppCtx)->ppMbListD[i] = (*ppCtx)->ppMbListD[i - 1] + iMbSize[i - 1]; + (*ppCtx)->ppDqLayerList[i]->sMbDataP = (*ppCtx)->ppMbListD[i]; + InitMbInfo (*ppCtx, (*ppCtx)->ppMbListD[i], (*ppCtx)->ppDqLayerList[i], i, iMbSize[iNumDlayer - 1]); + } + + return 0; +} + +void FreeSliceInLayer (SDqLayer* pDq, CMemoryAlign* pMa) { + int32_t iIdx = 0; + for (; iIdx < MAX_THREADS_NUM; iIdx ++) { + FreeSliceBuffer (pDq->sSliceBufferInfo[iIdx].pSliceBuffer, + pDq->sSliceBufferInfo[iIdx].iMaxSliceNum, + pMa, "pSliceBuffer"); + } +} + +void FreeDqLayer (SDqLayer*& pDq, CMemoryAlign* pMa) { + if (NULL == pDq) { + return; + } + + FreeSliceInLayer (pDq, pMa); + + if (pDq->ppSliceInLayer) { + pMa->WelsFree (pDq->ppSliceInLayer, "ppSliceInLayer"); + pDq->ppSliceInLayer = NULL; + } + + if (pDq->pFirstMbIdxOfSlice) { + pMa->WelsFree (pDq->pFirstMbIdxOfSlice, "pFirstMbIdxOfSlice"); + pDq->pFirstMbIdxOfSlice = NULL; + } + + if (pDq->pCountMbNumInSlice) { + pMa->WelsFree (pDq->pCountMbNumInSlice, "pCountMbNumInSlice"); + pDq->pCountMbNumInSlice = NULL; + } + + if (pDq->pFeatureSearchPreparation) { + ReleaseFeatureSearchPreparation (pMa, pDq->pFeatureSearchPreparation->pFeatureOfBlock); + pMa->WelsFree (pDq->pFeatureSearchPreparation, "pFeatureSearchPreparation"); + pDq->pFeatureSearchPreparation = NULL; + } + + UninitSlicePEncCtx (pDq, pMa); + pDq->iMaxSliceNum = 0; + + pMa->WelsFree (pDq, "pDqLayer"); + pDq = NULL; +} + +void FreeRefList (SRefList*& pRefList, CMemoryAlign* pMa, const int iMaxNumRefFrame) { + if (NULL == pRefList) { + return; + } + + int32_t iRef = 0; + do { + if (pRefList->pRef[iRef] != NULL) { + FreePicture (pMa, &pRefList->pRef[iRef]); + } + ++ iRef; + } while (iRef < 1 + iMaxNumRefFrame); + + pMa->WelsFree (pRefList, "pRefList"); + pRefList = NULL; +} + +/*! + * \brief initialize ppDqLayerList and slicepEncCtx_list due to count number of layers available + * \pParam pCtx sWelsEncCtx* + * \return 0 - successful; otherwise failed + */ +static inline int32_t InitDqLayers (sWelsEncCtx** ppCtx, SExistingParasetList* pExistingParasetList) { + SWelsSvcCodingParam* pParam = NULL; + SWelsSPS* pSps = NULL; + SSubsetSps* pSubsetSps = NULL; + SWelsPPS* pPps = NULL; + CMemoryAlign* pMa = NULL; + int32_t iDlayerCount = 0; + int32_t iDlayerIndex = 0; + int32_t iSpsId = 0; + uint32_t iPpsId = 0; + uint32_t iNumRef = 0; + int32_t iResult = 0; + + if (NULL == ppCtx || NULL == *ppCtx) + return 1; + + pMa = (*ppCtx)->pMemAlign; + pParam = (*ppCtx)->pSvcParam; + iDlayerCount = pParam->iSpatialLayerNum; + iNumRef = pParam->iMaxNumRefFrame; + + const int32_t kiFeatureStrategyIndex = FME_DEFAULT_FEATURE_INDEX; + const int32_t kiMe16x16 = ME_DIA_CROSS; + const int32_t kiMe8x8 = ME_DIA_CROSS_FME; + const int32_t kiNeedFeatureStorage = (pParam->iUsageType != SCREEN_CONTENT_REAL_TIME) ? 0 : + ((kiFeatureStrategyIndex << 16) + ((kiMe16x16 & 0x00FF) << 8) + (kiMe8x8 & 0x00FF)); + + iDlayerIndex = 0; + while (iDlayerIndex < iDlayerCount) { + SRefList* pRefList = NULL; + uint32_t i = 0; + const int32_t kiWidth = pParam->sSpatialLayers[iDlayerIndex].iVideoWidth; + const int32_t kiHeight = pParam->sSpatialLayers[iDlayerIndex].iVideoHeight; + int32_t iPicWidth = WELS_ALIGN (kiWidth, MB_WIDTH_LUMA) + (PADDING_LENGTH << 1); // with iWidth of horizon + int32_t iPicChromaWidth = iPicWidth >> 1; + + iPicWidth = WELS_ALIGN (iPicWidth, + 32); // 32(or 16 for chroma below) to match original imp. here instead of iCacheLineSize + iPicChromaWidth = WELS_ALIGN (iPicChromaWidth, 16); + + WelsGetEncBlockStrideOffset ((*ppCtx)->pStrideTab->pStrideEncBlockOffset[iDlayerIndex], iPicWidth, iPicChromaWidth); + + // pRef list + pRefList = (SRefList*)pMa->WelsMallocz (sizeof (SRefList), "pRefList"); + WELS_VERIFY_RETURN_IF (1, (NULL == pRefList)) + do { + pRefList->pRef[i] = AllocPicture (pMa, kiWidth, kiHeight, true, + (iDlayerIndex == iDlayerCount - 1) ? kiNeedFeatureStorage : 0); // to use actual size of current layer + WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList->pRef[i]), FreeRefList (pRefList, pMa, iNumRef)) + ++ i; + } while (i < 1 + iNumRef); + + pRefList->pNextBuffer = pRefList->pRef[0]; + (*ppCtx)->ppRefPicListExt[iDlayerIndex] = pRefList; + ++ iDlayerIndex; + } + + iDlayerIndex = 0; + while (iDlayerIndex < iDlayerCount) { + SDqLayer* pDqLayer = NULL; + SSpatialLayerConfig* pDlayer = &pParam->sSpatialLayers[iDlayerIndex]; + SSpatialLayerInternal* pParamInternal = &pParam->sDependencyLayers[iDlayerIndex]; + const int32_t kiMbW = (pDlayer->iVideoWidth + 0x0f) >> 4; + const int32_t kiMbH = (pDlayer->iVideoHeight + 0x0f) >> 4; + + pParamInternal->iCodingIndex = 0; + pParamInternal->iFrameIndex = 0; + pParamInternal->iFrameNum = 0; + pParamInternal->iPOC = 0; + pParamInternal->uiIdrPicId = 0; + pParamInternal->bEncCurFrmAsIdrFlag = true; // make sure first frame is IDR + // pDq layers list + pDqLayer = (SDqLayer*)pMa->WelsMallocz (sizeof (SDqLayer), "pDqLayer"); + WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer), FreeDqLayer (pDqLayer, pMa)) + + pDqLayer->bNeedAdjustingSlicing = false; + + pDqLayer->iMbWidth = kiMbW; + pDqLayer->iMbHeight = kiMbH; + + int32_t iMaxSliceNum = 1; + const int32_t kiSliceNum = GetInitialSliceNum (&pDlayer->sSliceArgument); + if (iMaxSliceNum < kiSliceNum) + iMaxSliceNum = kiSliceNum; + pDqLayer->iMaxSliceNum = iMaxSliceNum; + + iResult = InitSliceInLayer (*ppCtx, pDqLayer, iDlayerIndex, pMa); + if (iResult) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "InitDqLayers(), InitSliceInLayer failed(%d)!", iResult); + FreeDqLayer (pDqLayer, pMa); + return iResult; + } + + //deblocking parameters initialization + //target-layer deblocking + pDqLayer->iLoopFilterDisableIdc = pParam->iLoopFilterDisableIdc; + pDqLayer->iLoopFilterAlphaC0Offset = (pParam->iLoopFilterAlphaC0Offset) << 1; + pDqLayer->iLoopFilterBetaOffset = (pParam->iLoopFilterBetaOffset) << 1; + //parallel deblocking + pDqLayer->bDeblockingParallelFlag = pParam->bDeblockingParallelFlag; + + //deblocking parameter adjustment + if (SM_SINGLE_SLICE == pDlayer->sSliceArgument.uiSliceMode) { + //iLoopFilterDisableIdc: will be 0 or 1 under single_slice + if (2 == pParam->iLoopFilterDisableIdc) { + pDqLayer->iLoopFilterDisableIdc = 0; + } + //bDeblockingParallelFlag + pDqLayer->bDeblockingParallelFlag = false; + } else { + //multi-pSlice + if (0 == pDqLayer->iLoopFilterDisableIdc) { + pDqLayer->bDeblockingParallelFlag = false; + } + } + + // + if (kiNeedFeatureStorage && iDlayerIndex == iDlayerCount - 1) { + pDqLayer->pFeatureSearchPreparation = static_cast (pMa->WelsMallocz (sizeof ( + SFeatureSearchPreparation), "pFeatureSearchPreparation")); + WELS_VERIFY_RETURN_IF (1, NULL == pDqLayer->pFeatureSearchPreparation) + int32_t iReturn = RequestFeatureSearchPreparation (pMa, pDlayer->iVideoWidth, pDlayer->iVideoHeight, + kiNeedFeatureStorage, + pDqLayer->pFeatureSearchPreparation); + WELS_VERIFY_RETURN_IF (1, ENC_RETURN_SUCCESS != iReturn) + } else { + pDqLayer->pFeatureSearchPreparation = NULL; + } + + (*ppCtx)->ppDqLayerList[iDlayerIndex] = pDqLayer; + + ++ iDlayerIndex; + } + + // for dynamically malloc for parameter sets memory instead of maximal items for standard to reduce size, 3/18/2010 + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pFuncList)) + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pFuncList->pParametersetStrategy)) + const int32_t kiNeededSpsNum = (*ppCtx)->pFuncList->pParametersetStrategy->GetNeededSpsNum(); + const int32_t kiNeededSubsetSpsNum = (*ppCtx)->pFuncList->pParametersetStrategy->GetNeededSubsetSpsNum(); + (*ppCtx)->pSpsArray = (SWelsSPS*)pMa->WelsMallocz (kiNeededSpsNum * sizeof (SWelsSPS), "pSpsArray"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pSpsArray)) + if (kiNeededSubsetSpsNum > 0) { + (*ppCtx)->pSubsetArray = (SSubsetSps*)pMa->WelsMallocz (kiNeededSubsetSpsNum * sizeof (SSubsetSps), "pSubsetArray"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pSubsetArray)) + } else { + (*ppCtx)->pSubsetArray = NULL; + } + + // PPS + const int32_t kiNeededPpsNum = (*ppCtx)->pFuncList->pParametersetStrategy->GetNeededPpsNum(); + (*ppCtx)->pPPSArray = (SWelsPPS*)pMa->WelsMallocz (kiNeededPpsNum * sizeof (SWelsPPS), "pPPSArray"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pPPSArray)) + + (*ppCtx)->pFuncList->pParametersetStrategy->LoadPrevious (pExistingParasetList, (*ppCtx)->pSpsArray, + (*ppCtx)->pSubsetArray, (*ppCtx)->pPPSArray); + + + (*ppCtx)->pDqIdcMap = (SDqIdc*)pMa->WelsMallocz (iDlayerCount * sizeof (SDqIdc), "pDqIdcMap"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pDqIdcMap)) + + iDlayerIndex = 0; + while (iDlayerIndex < iDlayerCount) { + SDqIdc* pDqIdc = & (*ppCtx)->pDqIdcMap[iDlayerIndex]; + const bool bUseSubsetSps = (!pParam->bSimulcastAVC) && (iDlayerIndex > BASE_DEPENDENCY_ID); + SSpatialLayerConfig* pDlayerParam = &pParam->sSpatialLayers[iDlayerIndex]; + bool bSvcBaselayer = (!pParam->bSimulcastAVC) && (iDlayerCount > BASE_DEPENDENCY_ID) + && (iDlayerIndex == BASE_DEPENDENCY_ID); + pDqIdc->uiSpatialId = iDlayerIndex; + + iSpsId = (*ppCtx)->pFuncList->pParametersetStrategy->GenerateNewSps (*ppCtx, bUseSubsetSps, iDlayerIndex, + iDlayerCount, iSpsId, pSps, pSubsetSps, bSvcBaselayer); + WELS_VERIFY_RETURN_IF (ENC_RETURN_UNSUPPORTED_PARA, (0 > iSpsId)) + if (!bUseSubsetSps) { + pSps = & ((*ppCtx)->pSpsArray[iSpsId]); + } else { + pSubsetSps = & ((*ppCtx)->pSubsetArray[iSpsId]); + } + + iPpsId = (*ppCtx)->pFuncList->pParametersetStrategy->InitPps ((*ppCtx), iSpsId, pSps, pSubsetSps, iPpsId, true, + bUseSubsetSps, pParam->iEntropyCodingModeFlag != 0); + pPps = & ((*ppCtx)->pPPSArray[iPpsId]); + + // Not using FMO in SVC coding so far, come back if need FMO + { + iResult = InitSlicePEncCtx ((*ppCtx)->ppDqLayerList[iDlayerIndex], + (*ppCtx)->pMemAlign, + false, + pSps->iMbWidth, + pSps->iMbHeight, + & (pDlayerParam->sSliceArgument), + pPps); + if (iResult) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "InitDqLayers(), InitSlicePEncCtx failed(%d)!", iResult); + return iResult; + } + } + pDqIdc->iSpsId = iSpsId; + pDqIdc->iPpsId = iPpsId; + + if ((pParam->bSimulcastAVC) || (bUseSubsetSps)) + ++ iSpsId; + ++ iPpsId; + if (bUseSubsetSps) { + ++ (*ppCtx)->iSubsetSpsNum; + } else { + ++ (*ppCtx)->iSpsNum; + } + ++ (*ppCtx)->iPpsNum; + + ++ iDlayerIndex; + } + + (*ppCtx)->pFuncList->pParametersetStrategy->UpdateParaSetNum ((*ppCtx)); + return ENC_RETURN_SUCCESS; +} + +int32_t AllocStrideTables (sWelsEncCtx** ppCtx, const int32_t kiNumSpatialLayers) { + CMemoryAlign* pMa = (*ppCtx)->pMemAlign; + SWelsSvcCodingParam* pParam = (*ppCtx)->pSvcParam; + SStrideTables* pPtr = NULL; + int16_t* pTmpRow = NULL, *pRowX = NULL, *pRowY = NULL, *p = NULL; + uint8_t* pBase = NULL; + uint8_t* pBaseDec = NULL, *pBaseEnc = NULL, *pBaseMbX = NULL, *pBaseMbY = NULL; + struct { + int32_t iMbWidth; + int32_t iCountMbNum; // count number of SMB in each spatial + int32_t iSizeAllMbAlignCache; // cache line size aligned in each spatial + } sMbSizeMap[MAX_DEPENDENCY_LAYER] = {{ 0 }}; + int32_t iLineSizeY[MAX_DEPENDENCY_LAYER][2] = {{ 0 }}; + int32_t iLineSizeUV[MAX_DEPENDENCY_LAYER][2] = {{ 0 }}; + int32_t iMapSpatialIdx[MAX_DEPENDENCY_LAYER][2] = {{ 0 }}; + int32_t iSizeDec = 0; + int32_t iSizeEnc = 0; + int32_t iCountLayersNeedCs[2] = {0}; + const int32_t kiUnit1Size = 24 * sizeof (int32_t); + int32_t iUnit2Size = 0; + int32_t iNeedAllocSize = 0; + int32_t iRowSize = 0; + int16_t iMaxMbWidth = 0; + int16_t iMaxMbHeight = 0; + int32_t i = 0; + int32_t iSpatialIdx = 0; + int32_t iTemporalIdx = 0; + int32_t iCntTid = 0; + + if (kiNumSpatialLayers <= 0 || kiNumSpatialLayers > MAX_DEPENDENCY_LAYER) + return 1; + + pPtr = (SStrideTables*)pMa->WelsMallocz (sizeof (SStrideTables), "SStrideTables"); + if (NULL == pPtr) + return 1; + (*ppCtx)->pStrideTab = pPtr; + + iCntTid = pParam->iTemporalLayerNum > 1 ? 2 : 1; + + iSpatialIdx = 0; + while (iSpatialIdx < kiNumSpatialLayers) { + const int32_t kiTmpWidth = (pParam->sSpatialLayers[iSpatialIdx].iVideoWidth + 15) >> 4; + const int32_t kiTmpHeight = (pParam->sSpatialLayers[iSpatialIdx].iVideoHeight + 15) >> 4; + int32_t iNumMb = kiTmpWidth * kiTmpHeight; + + sMbSizeMap[iSpatialIdx].iMbWidth = kiTmpWidth; + sMbSizeMap[iSpatialIdx].iCountMbNum = iNumMb; + + iNumMb *= sizeof (int16_t); + sMbSizeMap[iSpatialIdx].iSizeAllMbAlignCache = iNumMb; + iUnit2Size += iNumMb; + + ++ iSpatialIdx; + } + + // Adaptive size_cs, size_fdec by implementation dependency + iTemporalIdx = 0; + while (iTemporalIdx < iCntTid) { + const bool kbBaseTemporalFlag = (iTemporalIdx == 0); + + iSpatialIdx = 0; + while (iSpatialIdx < kiNumSpatialLayers) { + SSpatialLayerConfig* fDlp = &pParam->sSpatialLayers[iSpatialIdx]; + + const int32_t kiWidthPad = WELS_ALIGN (fDlp->iVideoWidth, 16) + (PADDING_LENGTH << 1); + iLineSizeY[iSpatialIdx][kbBaseTemporalFlag] = WELS_ALIGN (kiWidthPad, 32); + iLineSizeUV[iSpatialIdx][kbBaseTemporalFlag] = WELS_ALIGN ((kiWidthPad >> 1), 16); + + iMapSpatialIdx[iCountLayersNeedCs[kbBaseTemporalFlag]][kbBaseTemporalFlag] = iSpatialIdx; + ++ iCountLayersNeedCs[kbBaseTemporalFlag]; + ++ iSpatialIdx; + } + ++ iTemporalIdx; + } + iSizeDec = kiUnit1Size * (iCountLayersNeedCs[0] + iCountLayersNeedCs[1]); + iSizeEnc = kiUnit1Size * kiNumSpatialLayers; + + iNeedAllocSize = iSizeDec + iSizeEnc + (iUnit2Size << 1); + + pBase = (uint8_t*)pMa->WelsMallocz (iNeedAllocSize, "pBase"); + if (NULL == pBase) { + return 1; + } + + pBaseDec = pBase; // iCountLayersNeedCs + pBaseEnc = pBaseDec + iSizeDec; // iNumSpatialLayers + pBaseMbX = pBaseEnc + iSizeEnc; // iNumSpatialLayers + pBaseMbY = pBaseMbX + iUnit2Size; // iNumSpatialLayers + + iTemporalIdx = 0; + while (iTemporalIdx < iCntTid) { + const bool kbBaseTemporalFlag = (iTemporalIdx == 0); + + iSpatialIdx = 0; + while (iSpatialIdx < iCountLayersNeedCs[kbBaseTemporalFlag]) { + const int32_t kiActualSpatialIdx = iMapSpatialIdx[iSpatialIdx][kbBaseTemporalFlag]; + const int32_t kiLumaWidth = iLineSizeY[kiActualSpatialIdx][kbBaseTemporalFlag]; + const int32_t kiChromaWidth = iLineSizeUV[kiActualSpatialIdx][kbBaseTemporalFlag]; + + WelsGetEncBlockStrideOffset ((int32_t*)pBaseDec, kiLumaWidth, kiChromaWidth); + + pPtr->pStrideDecBlockOffset[kiActualSpatialIdx][kbBaseTemporalFlag] = (int32_t*)pBaseDec; + pBaseDec += kiUnit1Size; + + ++ iSpatialIdx; + } + ++ iTemporalIdx; + } + iTemporalIdx = 0; + while (iTemporalIdx < iCntTid) { + const bool kbBaseTemporalFlag = (iTemporalIdx == 0); + + iSpatialIdx = 0; + while (iSpatialIdx < kiNumSpatialLayers) { + int32_t iMatchIndex = 0; + bool bInMap = false; + bool bMatchFlag = false; + + i = 0; + while (i < iCountLayersNeedCs[kbBaseTemporalFlag]) { + const int32_t kiActualIdx = iMapSpatialIdx[i][kbBaseTemporalFlag]; + if (kiActualIdx == iSpatialIdx) { + bInMap = true; + break; + } + if (!bMatchFlag) { + iMatchIndex = kiActualIdx; + bMatchFlag = true; + } + ++ i; + } + + if (bInMap) { + ++ iSpatialIdx; + continue; + } + + // not in spatial map and assign match one to it + pPtr->pStrideDecBlockOffset[iSpatialIdx][kbBaseTemporalFlag] = + pPtr->pStrideDecBlockOffset[iMatchIndex][kbBaseTemporalFlag]; + + ++ iSpatialIdx; + } + ++ iTemporalIdx; + } + + iSpatialIdx = 0; + while (iSpatialIdx < kiNumSpatialLayers) { + const int32_t kiAllocMbSize = sMbSizeMap[iSpatialIdx].iSizeAllMbAlignCache; + + pPtr->pStrideEncBlockOffset[iSpatialIdx] = (int32_t*)pBaseEnc; + + pPtr->pMbIndexX[iSpatialIdx] = (int16_t*)pBaseMbX; + pPtr->pMbIndexY[iSpatialIdx] = (int16_t*)pBaseMbY; + + pBaseEnc += kiUnit1Size; + pBaseMbX += kiAllocMbSize; + pBaseMbY += kiAllocMbSize; + + ++ iSpatialIdx; + } + + while (iSpatialIdx < MAX_DEPENDENCY_LAYER) { + pPtr->pStrideDecBlockOffset[iSpatialIdx][0] = NULL; + pPtr->pStrideDecBlockOffset[iSpatialIdx][1] = NULL; + pPtr->pStrideEncBlockOffset[iSpatialIdx] = NULL; + pPtr->pMbIndexX[iSpatialIdx] = NULL; + pPtr->pMbIndexY[iSpatialIdx] = NULL; + + ++ iSpatialIdx; + } + + // initialize pMbIndexX and pMbIndexY tables as below + + iMaxMbWidth = sMbSizeMap[kiNumSpatialLayers - 1].iMbWidth; + iMaxMbWidth = WELS_ALIGN (iMaxMbWidth, 4); // 4 loops for int16_t required introduced as below + iRowSize = iMaxMbWidth * sizeof (int16_t); + + pTmpRow = (int16_t*)pMa->WelsMallocz (iRowSize, "pTmpRow"); + if (NULL == pTmpRow) { + return 1; + } + pRowX = pTmpRow; + pRowY = pRowX; + // initialize pRowX & pRowY + i = 0; + p = pRowX; + while (i < iMaxMbWidth) { + *p = i; + * (p + 1) = 1 + i; + * (p + 2) = 2 + i; + * (p + 3) = 3 + i; + + p += 4; + i += 4; + } + + iSpatialIdx = kiNumSpatialLayers; + while (--iSpatialIdx >= 0) { + int16_t* pMbIndexX = pPtr->pMbIndexX[iSpatialIdx]; + const int32_t kiMbWidth = sMbSizeMap[iSpatialIdx].iMbWidth; + const int32_t kiMbHeight = sMbSizeMap[iSpatialIdx].iCountMbNum / kiMbWidth; + const int32_t kiLineSize = kiMbWidth * sizeof (int16_t); + + i = 0; + while (i < kiMbHeight) { + memcpy (pMbIndexX, pRowX, kiLineSize); // confirmed_safe_unsafe_usage + + pMbIndexX += kiMbWidth; + ++ i; + } + } + + memset (pRowY, 0, iRowSize); + iMaxMbHeight = sMbSizeMap[kiNumSpatialLayers - 1].iCountMbNum / sMbSizeMap[kiNumSpatialLayers - 1].iMbWidth; + i = 0; + for (;;) { + ENFORCE_STACK_ALIGN_1D (int16_t, t, 4, 16) + + int32_t t32 = 0; + int16_t j = 0; + + for (iSpatialIdx = kiNumSpatialLayers - 1; iSpatialIdx >= 0; -- iSpatialIdx) { + const int32_t kiMbWidth = sMbSizeMap[iSpatialIdx].iMbWidth; + const int32_t kiMbHeight = sMbSizeMap[iSpatialIdx].iCountMbNum / kiMbWidth; + const int32_t kiLineSize = kiMbWidth * sizeof (int16_t); + int16_t* pMbIndexY = pPtr->pMbIndexY[iSpatialIdx] + i * kiMbWidth; + + if (i < kiMbHeight) { + memcpy (pMbIndexY, pRowY, kiLineSize); // confirmed_safe_unsafe_usage + } + } + ++ i; + if (i >= iMaxMbHeight) + break; + + t32 = i | (i << 16); + ST32 (t, t32); + ST32 (t + 2, t32); + + p = pRowY; + while (j < iMaxMbWidth) { + ST64 (p, LD64 (t)); + + p += 4; + j += 4; + } + } + + pMa->WelsFree (pTmpRow, "pTmpRow"); + pTmpRow = NULL; + + return 0; +} +int32_t RequestMemoryVaaScreen (SVAAFrameInfo* pVaa, CMemoryAlign* pMa, const int32_t iNumRef, + const int32_t iCountMax8x8BNum) { + SVAAFrameInfoExt* pVaaExt = static_cast (pVaa); + + pVaaExt->pVaaBlockStaticIdc[0] = (static_cast (pMa->WelsMallocz (iNumRef * iCountMax8x8BNum * sizeof ( + uint8_t), "pVaa->pVaaBlockStaticIdc[0]"))); + if (NULL == pVaaExt->pVaaBlockStaticIdc[0]) { + return 1; + } + + for (int32_t idx = 1; idx < iNumRef; idx++) { + pVaaExt->pVaaBlockStaticIdc[idx] = pVaaExt->pVaaBlockStaticIdc[idx - 1] + iCountMax8x8BNum; + } + return 0; +} +void ReleaseMemoryVaaScreen (SVAAFrameInfo* pVaa, CMemoryAlign* pMa, const int32_t iNumRef) { + SVAAFrameInfoExt* pVaaExt = static_cast (pVaa); + if (pVaaExt && pMa && pVaaExt->pVaaBlockStaticIdc[0]) { + pMa->WelsFree (pVaaExt->pVaaBlockStaticIdc[0], "pVaa->pVaaBlockStaticIdc[0]"); + + for (int32_t idx = 0; idx < iNumRef; idx++) { + pVaaExt->pVaaBlockStaticIdc[idx] = NULL; + } + } +} +/*! + * \brief request specific memory for SVC + * \pParam pEncCtx sWelsEncCtx* + * \return successful - 0; otherwise none 0 for failed + */ +void GetMvMvdRange (SWelsSvcCodingParam* pParam, int32_t& iMvRange, int32_t& iMvdRange) { + ELevelIdc iMinLevelIdc = LEVEL_5_2; + int32_t iMinMv = 0; + int32_t iMaxMv = 0; + int32_t iFixMvRange = pParam->iUsageType ? EXPANDED_MV_RANGE : CAMERA_STARTMV_RANGE; + int32_t iFixMvdRange = (pParam->iUsageType ? EXPANDED_MVD_RANGE : ((pParam->iSpatialLayerNum == 1) ? CAMERA_MVD_RANGE : + CAMERA_HIGHLAYER_MVD_RANGE)); + for (int32_t iLayer = 0; iLayer < pParam->iSpatialLayerNum; iLayer++) { + if (pParam->sSpatialLayers[iLayer].uiLevelIdc < iMinLevelIdc) + iMinLevelIdc = pParam->sSpatialLayers[iLayer].uiLevelIdc; + } + const SLevelLimits* pLevelLimit = g_ksLevelLimits; + while ((pLevelLimit->uiLevelIdc != LEVEL_5_2) && (pLevelLimit->uiLevelIdc != iMinLevelIdc)) + pLevelLimit++; + iMinMv = (pLevelLimit->iMinVmv) >> 2; + iMaxMv = (pLevelLimit->iMaxVmv) >> 2; + + iMvRange = WELS_MIN (WELS_ABS (iMinMv), iMaxMv); + + iMvRange = WELS_MIN (iMvRange, iFixMvRange); + + iMvdRange = (iMvRange + 1) << 1; + + iMvdRange = WELS_MIN (iMvdRange, iFixMvdRange); +} +int32_t RequestMemorySvc (sWelsEncCtx** ppCtx, SExistingParasetList* pExistingParasetList) { + SWelsSvcCodingParam* pParam = (*ppCtx)->pSvcParam; + CMemoryAlign* pMa = (*ppCtx)->pMemAlign; + SSpatialLayerConfig* pFinalSpatial = NULL; + int32_t iCountBsLen = 0; + int32_t iCountNals = 0; + int32_t iMaxPicWidth = 0; + int32_t iMaxPicHeight = 0; + int32_t iCountMaxMbNum = 0; + int32_t iIndex = 0; + int32_t iCountLayers = 0; + int32_t iResult = 0; + float fCompressRatioThr = .5f; + const int32_t kiNumDependencyLayers = pParam->iSpatialLayerNum; + int32_t iVclLayersBsSizeCount = 0; + int32_t iNonVclLayersBsSizeCount = 0; + int32_t iTargetSpatialBsSize = 0; + + if (kiNumDependencyLayers < 1 || kiNumDependencyLayers > MAX_DEPENDENCY_LAYER) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc() failed due to invalid iNumDependencyLayers(%d)!", + kiNumDependencyLayers); + return 1; + } + + if (pParam->uiGopSize == 0 || (pParam->uiIntraPeriod && ((pParam->uiIntraPeriod % pParam->uiGopSize) != 0))) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, + "RequestMemorySvc() failed due to invalid uiIntraPeriod(%d) (=multipler of uiGopSize(%d)!", + pParam->uiIntraPeriod, pParam->uiGopSize); + return 1; + } + + pFinalSpatial = &pParam->sSpatialLayers[kiNumDependencyLayers - 1]; + iMaxPicWidth = pFinalSpatial->iVideoWidth; + iMaxPicHeight = pFinalSpatial->iVideoHeight; + iCountMaxMbNum = ((15 + iMaxPicWidth) >> 4) * ((15 + iMaxPicHeight) >> 4); + + iResult = AcquireLayersNals (ppCtx, pParam, &iCountLayers, &iCountNals); + if (iResult) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AcquireLayersNals failed(%d)!", iResult); + return 1; + } + + const int32_t kiSpsSize = (*ppCtx)->pFuncList->pParametersetStrategy->GetNeededSpsNum() * SPS_BUFFER_SIZE; + const int32_t kiPpsSize = (*ppCtx)->pFuncList->pParametersetStrategy->GetNeededPpsNum() * PPS_BUFFER_SIZE; + iNonVclLayersBsSizeCount = SSEI_BUFFER_SIZE + kiSpsSize + kiPpsSize; + + bool bDynamicSlice = false; + uint32_t uiMaxSliceNumEstimation = 0; + int32_t iSliceBufferSize = 0; + int32_t iMaxSliceBufferSize = 0; + int32_t iTotalLength = 0; + int32_t iLayerBsSize = 0; + iIndex = 0; + while (iIndex < pParam->iSpatialLayerNum) { + SSpatialLayerConfig* fDlp = &pParam->sSpatialLayers[iIndex]; + + fCompressRatioThr = COMPRESS_RATIO_THR; + + iLayerBsSize = WELS_ROUND (((3 * fDlp->iVideoWidth * fDlp->iVideoHeight) >> 1) * fCompressRatioThr) + + MAX_MACROBLOCK_SIZE_IN_BYTE_x2; + iLayerBsSize = WELS_ALIGN (iLayerBsSize, 4); // 4 bytes alinged + iVclLayersBsSizeCount += iLayerBsSize; + + SSliceArgument* pSliceArgument = & (fDlp->sSliceArgument); + if (pSliceArgument->uiSliceMode == SM_SIZELIMITED_SLICE) { + bDynamicSlice = true; + uiMaxSliceNumEstimation = WELS_MIN (AVERSLICENUM_CONSTRAINT, + (iLayerBsSize / pSliceArgument->uiSliceSizeConstraint) + 1); + (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, (int) uiMaxSliceNumEstimation); + iSliceBufferSize = (WELS_MAX (pSliceArgument->uiSliceSizeConstraint, + iLayerBsSize / uiMaxSliceNumEstimation) << 1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2; + } else { + (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, (int) pSliceArgument->uiSliceNum); + iSliceBufferSize = ((iLayerBsSize / pSliceArgument->uiSliceNum) << 1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2; + } + iMaxSliceBufferSize = WELS_MAX (iMaxSliceBufferSize, iSliceBufferSize); + (*ppCtx)->iSliceBufferSize[iIndex] = iSliceBufferSize; + ++ iIndex; + } + iTargetSpatialBsSize = iLayerBsSize; + iCountBsLen = iNonVclLayersBsSizeCount + iVclLayersBsSizeCount; + + iMaxSliceBufferSize = WELS_MIN (iMaxSliceBufferSize, iTargetSpatialBsSize); + iTotalLength = iCountBsLen; + + pParam->iNumRefFrame = WELS_CLIP3 (pParam->iNumRefFrame, MIN_REF_PIC_COUNT, + (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA : + MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN)); + + // Output + (*ppCtx)->pOut = (SWelsEncoderOutput*)pMa->WelsMallocz (sizeof (SWelsEncoderOutput), "SWelsEncoderOutput"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pOut)) + (*ppCtx)->pOut->pBsBuffer = (uint8_t*)pMa->WelsMallocz (iCountBsLen, "pOut->pBsBuffer"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pOut->pBsBuffer)) + (*ppCtx)->pOut->uiSize = iCountBsLen; + (*ppCtx)->pOut->sNalList = (SWelsNalRaw*)pMa->WelsMallocz (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pOut->sNalList)) + (*ppCtx)->pOut->pNalLen = (int32_t*)pMa->WelsMallocz (iCountNals * sizeof (int32_t), "pOut->pNalLen"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pOut->pNalLen)) + (*ppCtx)->pOut->iCountNals = iCountNals; + (*ppCtx)->pOut->iNalIndex = 0; + (*ppCtx)->pOut->iLayerBsIndex = 0; + + (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iTotalLength, "pFrameBs"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pFrameBs)) + (*ppCtx)->iFrameBsSize = iTotalLength; + (*ppCtx)->iPosBsBuffer = 0; + + // for dynamic slice mode&& CABAC,allocate slice buffer to restore slice data + if (bDynamicSlice && pParam->iEntropyCodingModeFlag) { + for (int32_t iIdx = 0; iIdx < MAX_THREADS_NUM; iIdx++) { + (*ppCtx)->pDynamicBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iMaxSliceBufferSize, "DynamicSliceBs"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pDynamicBsBuffer[iIdx])) + } + } + // for pSlice bs buffers + if (pParam->iMultipleThreadIdc > 1 + && RequestMtResource (ppCtx, pParam, iCountBsLen, iMaxSliceBufferSize, bDynamicSlice)) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMtResource failed!"); + return 1; + } + + (*ppCtx)->pReferenceStrategy = IWelsReferenceStrategy::CreateReferenceStrategy ((*ppCtx), pParam->iUsageType, + pParam->bEnableLongTermReference); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pReferenceStrategy)) + + (*ppCtx)->pIntra4x4PredModeBlocks = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * INTRA_4x4_MODE_NUM, "pIntra4x4PredModeBlocks")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pIntra4x4PredModeBlocks)) + + (*ppCtx)->pNonZeroCountBlocks = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * MB_LUMA_CHROMA_BLOCK4x4_NUM, "pNonZeroCountBlocks")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pNonZeroCountBlocks)) + + (*ppCtx)->pMvUnitBlock4x4 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * 2 * MB_BLOCK4x4_NUM * sizeof (SMVUnitXY), "pMvUnitBlock4x4")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pMvUnitBlock4x4)) + + (*ppCtx)->pRefIndexBlock4x4 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * 2 * MB_BLOCK8x8_NUM * sizeof (int8_t), "pRefIndexBlock4x4")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pRefIndexBlock4x4)) + + (*ppCtx)->pSadCostMb = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pSadCostMb")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pSadCostMb)) + + (*ppCtx)->iGlobalQp = 26; // global qp in default + + (*ppCtx)->pLtr = (SLTRState*)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SLTRState), "SLTRState"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pLtr)) + int32_t i = 0; + for (i = 0; i < kiNumDependencyLayers; i++) { + ResetLtrState (& (*ppCtx)->pLtr[i]); + } + + // stride tables + if (AllocStrideTables (ppCtx, kiNumDependencyLayers)) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AllocStrideTables failed!"); + return 1; + } + + //Rate control module memory allocation + // only malloc once for RC pData, 12/14/2009 + (*ppCtx)->pWelsSvcRc = (SWelsSvcRc*)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SWelsSvcRc), "pWelsSvcRc"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pWelsSvcRc)) + //End of Rate control module memory allocation + + //pVaa memory allocation + if (pParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + (*ppCtx)->pVaa = (SVAAFrameInfoExt*)pMa->WelsMallocz (sizeof (SVAAFrameInfoExt), "pVaa"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa)) + if (RequestMemoryVaaScreen ((*ppCtx)->pVaa, pMa, (*ppCtx)->pSvcParam->iMaxNumRefFrame, iCountMaxMbNum << 2)) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMemoryVaaScreen failed!"); + return 1; + } + } else { + (*ppCtx)->pVaa = (SVAAFrameInfo*)pMa->WelsMallocz (sizeof (SVAAFrameInfo), "pVaa"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa)) + } + + if ((*ppCtx)->pSvcParam->bEnableAdaptiveQuant) { //malloc mem + (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureUnit = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * sizeof (SMotionTextureUnit), "pVaa->sAdaptiveQuantParam.pMotionTextureUnit")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureUnit)) + (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int8_t), "pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp)) + } + + (*ppCtx)->pVaa->pVaaBackgroundMbFlag = (int8_t*)pMa->WelsMallocz (iCountMaxMbNum * sizeof (int8_t), + "pVaa->pVaaBackgroundMbFlag"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->pVaaBackgroundMbFlag)) + + (*ppCtx)->pVaa->sVaaCalcInfo.pSad8x8 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (int32_t), "pVaa->sVaaCalcInfo.sad8x8")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSad8x8)) + (*ppCtx)->pVaa->sVaaCalcInfo.pSsd16x16 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSsd16x16")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSsd16x16)) + (*ppCtx)->pVaa->sVaaCalcInfo.pSum16x16 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSum16x16")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSum16x16)) + (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfSquare16x16 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSumOfSquare16x16")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfSquare16x16)) + + if ((*ppCtx)->pSvcParam->bEnableBackgroundDetection) { //BGD control + (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfDiff8x8 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSumOfDiff8x8")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfDiff8x8)) + (*ppCtx)->pVaa->sVaaCalcInfo.pMad8x8 = static_cast + (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (uint8_t), "pVaa->sVaaCalcInfo.pMad8x8")); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pMad8x8)) + } + + //End of pVaa memory allocation + + (*ppCtx)->ppRefPicListExt = (SRefList**)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SRefList*), + "ppRefPicListExt"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->ppRefPicListExt)) + + (*ppCtx)->ppDqLayerList = (SDqLayer**)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SDqLayer*), "ppDqLayerList"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->ppDqLayerList)) + + iResult = InitDqLayers (ppCtx, pExistingParasetList); + if (iResult) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitDqLayers failed(%d)!", iResult); + return iResult; + } + + if (InitMbListD (ppCtx)) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitMbListD failed!"); + return 1; + } + + int32_t iMvdRange = 0; + GetMvMvdRange (pParam, (*ppCtx)->iMvRange, iMvdRange); + const uint32_t kuiMvdInterTableSize = (iMvdRange << 2); //intepel*4=qpel + const uint32_t kuiMvdInterTableStride = 1 + (kuiMvdInterTableSize << 1);//qpel_mv_range*2=(+/-); + const uint32_t kuiMvdCacheAlignedSize = kuiMvdInterTableStride * sizeof (uint16_t); + + (*ppCtx)->iMvdCostTableSize = kuiMvdInterTableSize; + (*ppCtx)->iMvdCostTableStride = kuiMvdInterTableStride; + (*ppCtx)->pMvdCostTable = (uint16_t*)pMa->WelsMallocz (52 * kuiMvdCacheAlignedSize, "pMvdCostTable"); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pMvdCostTable)) + MvdCostInit ((*ppCtx)->pMvdCostTable, kuiMvdInterTableStride); //should put to a better place? + + if ((*ppCtx)->ppRefPicListExt[0] != NULL && (*ppCtx)->ppRefPicListExt[0]->pRef[0] != NULL) + (*ppCtx)->pDecPic = (*ppCtx)->ppRefPicListExt[0]->pRef[0]; + else + (*ppCtx)->pDecPic = NULL; // error here + + (*ppCtx)->pSps = & (*ppCtx)->pSpsArray[0]; + (*ppCtx)->pPps = & (*ppCtx)->pPPSArray[0]; + + return 0; +} + + +/*! + * \brief free memory in SVC core encoder + * \pParam pEncCtx sWelsEncCtx* + * \return none + */ +void FreeMemorySvc (sWelsEncCtx** ppCtx) { + if (NULL != *ppCtx) { + sWelsEncCtx* pCtx = *ppCtx; + CMemoryAlign* pMa = pCtx->pMemAlign; + SWelsSvcCodingParam* pParam = pCtx->pSvcParam; + int32_t ilayer = 0; + + // SStrideTables + if (NULL != pCtx->pStrideTab) { + if (NULL != pCtx->pStrideTab->pStrideDecBlockOffset[0][1]) { + pMa->WelsFree (pCtx->pStrideTab->pStrideDecBlockOffset[0][1], "pBase"); + pCtx->pStrideTab->pStrideDecBlockOffset[0][1] = NULL; + } + pMa->WelsFree (pCtx->pStrideTab, "SStrideTables"); + pCtx->pStrideTab = NULL; + } + // pDq idc map + if (NULL != pCtx->pDqIdcMap) { + pMa->WelsFree (pCtx->pDqIdcMap, "pDqIdcMap"); + pCtx->pDqIdcMap = NULL; + } + + if (NULL != pCtx->pOut) { + // bs pBuffer + if (NULL != pCtx->pOut->pBsBuffer) { + pMa->WelsFree (pCtx->pOut->pBsBuffer, "pOut->pBsBuffer"); + pCtx->pOut->pBsBuffer = NULL; + } + // NALs list + if (NULL != pCtx->pOut->sNalList) { + pMa->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList"); + pCtx->pOut->sNalList = NULL; + } + // NALs len + if (NULL != pCtx->pOut->pNalLen) { + pMa->WelsFree (pCtx->pOut->pNalLen, "pOut->pNalLen"); + pCtx->pOut->pNalLen = NULL; + } + pMa->WelsFree (pCtx->pOut, "SWelsEncoderOutput"); + pCtx->pOut = NULL; + } + + if (pParam != NULL && pParam->iMultipleThreadIdc > 1) + ReleaseMtResource (ppCtx); + + if (NULL != pCtx->pReferenceStrategy) { + WELS_DELETE_OP (pCtx->pReferenceStrategy); + } + + // frame bitstream pBuffer + if (NULL != pCtx->pFrameBs) { + pMa->WelsFree (pCtx->pFrameBs, "pFrameBs"); + pCtx->pFrameBs = NULL; + } + for (int32_t iIdx = 0; iIdx < MAX_THREADS_NUM; iIdx++) { + pMa->WelsFree (pCtx->pDynamicBsBuffer[iIdx], "DynamicSliceBs"); + pCtx->pDynamicBsBuffer[iIdx] = NULL; + + } + // pSpsArray + if (NULL != pCtx->pSpsArray) { + pMa->WelsFree (pCtx->pSpsArray, "pSpsArray"); + pCtx->pSpsArray = NULL; + } + // pPPSArray + if (NULL != pCtx->pPPSArray) { + pMa->WelsFree (pCtx->pPPSArray, "pPPSArray"); + pCtx->pPPSArray = NULL; + } + // subset_sps_array + if (NULL != pCtx->pSubsetArray) { + pMa->WelsFree (pCtx->pSubsetArray, "pSubsetArray"); + pCtx->pSubsetArray = NULL; + } + + if (NULL != pCtx->pIntra4x4PredModeBlocks) { + pMa->WelsFree (pCtx->pIntra4x4PredModeBlocks, "pIntra4x4PredModeBlocks"); + pCtx->pIntra4x4PredModeBlocks = NULL; + } + + if (NULL != pCtx->pNonZeroCountBlocks) { + pMa->WelsFree (pCtx->pNonZeroCountBlocks, "pNonZeroCountBlocks"); + pCtx->pNonZeroCountBlocks = NULL; + } + + if (NULL != pCtx->pMvUnitBlock4x4) { + pMa->WelsFree (pCtx->pMvUnitBlock4x4, "pMvUnitBlock4x4"); + pCtx->pMvUnitBlock4x4 = NULL; + } + + if (NULL != pCtx->pRefIndexBlock4x4) { + pMa->WelsFree (pCtx->pRefIndexBlock4x4, "pRefIndexBlock4x4"); + pCtx->pRefIndexBlock4x4 = NULL; + } + + if (NULL != pCtx->ppMbListD) { + if (NULL != pCtx->ppMbListD[0]) { + pMa->WelsFree (pCtx->ppMbListD[0], "ppMbListD[0]"); + (*ppCtx)->ppMbListD[0] = NULL; + } + pMa->WelsFree (pCtx->ppMbListD, "ppMbListD"); + pCtx->ppMbListD = NULL; + } + + if (NULL != pCtx->pSadCostMb) { + pMa->WelsFree (pCtx->pSadCostMb, "pSadCostMb"); + pCtx->pSadCostMb = NULL; + } + + // SLTRState + if (NULL != pCtx->pLtr) { + pMa->WelsFree (pCtx->pLtr, "SLTRState"); + pCtx->pLtr = NULL; + } + + // pDq layers list + ilayer = 0; + if (NULL != pCtx->ppDqLayerList && pParam != NULL) { + while (ilayer < pParam->iSpatialLayerNum) { + SDqLayer* pDq = pCtx->ppDqLayerList[ilayer]; + // pDq layers + if (NULL != pDq) { + FreeDqLayer (pDq, pMa); + pCtx->ppDqLayerList[ilayer] = NULL; + } + ++ ilayer; + } + pMa->WelsFree (pCtx->ppDqLayerList, "ppDqLayerList"); + pCtx->ppDqLayerList = NULL; + } + // reference picture list extension + if (NULL != pCtx->ppRefPicListExt && pParam != NULL) { + ilayer = 0; + while (ilayer < pParam->iSpatialLayerNum) { + FreeRefList (pCtx->ppRefPicListExt[ilayer], pMa, pParam->iMaxNumRefFrame); + pCtx->ppRefPicListExt[ilayer] = NULL; + ++ ilayer; + } + + pMa->WelsFree (pCtx->ppRefPicListExt, "ppRefPicListExt"); + pCtx->ppRefPicListExt = NULL; + } + + // VAA + if (NULL != pCtx->pVaa) { + if (pCtx->pSvcParam->bEnableAdaptiveQuant) { //free mem + pMa->WelsFree (pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureUnit, "pVaa->sAdaptiveQuantParam.pMotionTextureUnit"); + pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureUnit = NULL; + pMa->WelsFree (pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp, + "pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp"); + pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp = NULL; + } + + pMa->WelsFree (pCtx->pVaa->pVaaBackgroundMbFlag, "pVaa->pVaaBackgroundMbFlag"); + pCtx->pVaa->pVaaBackgroundMbFlag = NULL; + pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSad8x8, "pVaa->sVaaCalcInfo.sad8x8"); + pCtx->pVaa->sVaaCalcInfo.pSad8x8 = NULL; + pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSsd16x16, "pVaa->sVaaCalcInfo.pSsd16x16"); + pCtx->pVaa->sVaaCalcInfo.pSsd16x16 = NULL; + pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSum16x16, "pVaa->sVaaCalcInfo.pSum16x16"); + pCtx->pVaa->sVaaCalcInfo.pSum16x16 = NULL; + pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSumOfSquare16x16, "pVaa->sVaaCalcInfo.pSumOfSquare16x16"); + pCtx->pVaa->sVaaCalcInfo.pSumOfSquare16x16 = NULL; + + if (pCtx->pSvcParam->bEnableBackgroundDetection) { //BGD control + pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSumOfDiff8x8, "pVaa->sVaaCalcInfo.pSumOfDiff8x8"); + pCtx->pVaa->sVaaCalcInfo.pSumOfDiff8x8 = NULL; + pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pMad8x8, "pVaa->sVaaCalcInfo.pMad8x8"); + pCtx->pVaa->sVaaCalcInfo.pMad8x8 = NULL; + } + if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) + ReleaseMemoryVaaScreen (pCtx->pVaa, pMa, pCtx->pSvcParam->iMaxNumRefFrame); + pMa->WelsFree (pCtx->pVaa, "pVaa"); + pCtx->pVaa = NULL; + } + + // rate control module memory free + if (NULL != pCtx->pWelsSvcRc) { + WelsRcFreeMemory (pCtx); + pMa->WelsFree (pCtx->pWelsSvcRc, "pWelsSvcRc"); + pCtx->pWelsSvcRc = NULL; + } + + /* MVD cost tables for Inter */ + if (NULL != pCtx->pMvdCostTable) { + pMa->WelsFree (pCtx->pMvdCostTable, "pMvdCostTable"); + pCtx->pMvdCostTable = NULL; + } + + FreeCodingParam (&pCtx->pSvcParam, pMa); + if (NULL != pCtx->pFuncList) { + if (NULL != pCtx->pFuncList->pParametersetStrategy) { + WELS_DELETE_OP (pCtx->pFuncList->pParametersetStrategy); + } + + pMa->WelsFree (pCtx->pFuncList, "SWelsFuncPtrList"); + pCtx->pFuncList = NULL; + } + +#if defined(MEMORY_MONITOR) + assert (pMa->WelsGetMemoryUsage() == 0); // ensure all memory free well +#endif//MEMORY_MONITOR + + if ((*ppCtx)->pMemAlign != NULL) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, "FreeMemorySvc(), verify memory usage (%d bytes) after free..", + (*ppCtx)->pMemAlign->WelsGetMemoryUsage()); + WELS_DELETE_OP ((*ppCtx)->pMemAlign); + } + + free (*ppCtx); + *ppCtx = NULL; + } +} + +int32_t InitSliceSettings (SLogContext* pLogCtx, SWelsSvcCodingParam* pCodingParam, + const int32_t kiCpuCores, int16_t* pMaxSliceCount) { + int32_t iSpatialIdx = 0, iSpatialNum = pCodingParam->iSpatialLayerNum; + uint16_t iMaxSliceCount = 0; + + do { + SSpatialLayerConfig* pDlp = &pCodingParam->sSpatialLayers[iSpatialIdx]; + SSliceArgument* pSliceArgument = &pDlp->sSliceArgument; + int32_t iReturn = 0; + + switch (pSliceArgument->uiSliceMode) { + case SM_SIZELIMITED_SLICE: + iMaxSliceCount = AVERSLICENUM_CONSTRAINT; + break; // go through for SM_SIZELIMITED_SLICE? + case SM_FIXEDSLCNUM_SLICE: { + iReturn = SliceArgumentValidationFixedSliceMode (pLogCtx, &pDlp->sSliceArgument, pCodingParam->iRCMode, + pDlp->iVideoWidth, pDlp->iVideoHeight); + if (iReturn) + return ENC_RETURN_UNSUPPORTED_PARA; + + if (pSliceArgument->uiSliceNum > iMaxSliceCount) { + iMaxSliceCount = pSliceArgument->uiSliceNum; + } + } + break; + case SM_SINGLE_SLICE: + if (pSliceArgument->uiSliceNum > iMaxSliceCount) + iMaxSliceCount = pSliceArgument->uiSliceNum; + break; + case SM_RASTER_SLICE: + if (pSliceArgument->uiSliceNum > iMaxSliceCount) + iMaxSliceCount = pSliceArgument->uiSliceNum; + break; + default: + break; + } + + ++ iSpatialIdx; + } while (iSpatialIdx < iSpatialNum); + + pCodingParam->iMultipleThreadIdc = WELS_MIN (kiCpuCores, iMaxSliceCount); + if (pCodingParam->iLoopFilterDisableIdc == 0 + && pCodingParam->iMultipleThreadIdc != 1) // Loop filter requested to be enabled, with threading enabled + pCodingParam->iLoopFilterDisableIdc = + 2; // Disable loop filter on slice boundaries since that's not allowed with multithreading + *pMaxSliceCount = iMaxSliceCount; + + return ENC_RETURN_SUCCESS; +} + +/*! + * \brief log output for cpu features/capabilities + */ +void OutputCpuFeaturesLog (SLogContext* pLogCtx, uint32_t uiCpuFeatureFlags, uint32_t uiCpuCores, + int32_t iCacheLineSize) { + // welstracer output + WelsLog (pLogCtx, WELS_LOG_INFO, "WELS CPU features/capacities (0x%x) detected: \t" + "HTT: %c, " + "MMX: %c, " + "MMXEX: %c, " + "SSE: %c, " + "SSE2: %c, " + "SSE3: %c, " + "SSSE3: %c, " + "SSE4.1: %c, " + "SSE4.2: %c, " + "AVX: %c, " + "FMA: %c, " + "X87-FPU: %c, " + "3DNOW: %c, " + "3DNOWEX: %c, " + "ALTIVEC: %c, " + "CMOV: %c, " + "MOVBE: %c, " + "AES: %c, " + "NUMBER OF LOGIC PROCESSORS ON CHIP: %d, " + "CPU CACHE LINE SIZE (BYTES): %d", + uiCpuFeatureFlags, + (uiCpuFeatureFlags & WELS_CPU_HTT) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_MMX) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_MMXEXT) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_SSE) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_SSE2) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_SSE3) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_SSSE3) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_SSE41) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_SSE42) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_AVX) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_FMA) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_FPU) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_3DNOW) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_3DNOWEXT) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_ALTIVEC) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_CMOV) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_MOVBE) ? 'Y' : 'N', + (uiCpuFeatureFlags & WELS_CPU_AES) ? 'Y' : 'N', + uiCpuCores, + iCacheLineSize); +} +/* + * + * status information output + */ +#if defined(STAT_OUTPUT) +void StatOverallEncodingExt (sWelsEncCtx* pCtx) { + int8_t i = 0; + int8_t j = 0; + for (i = 0; i < pCtx->pSvcParam->iSpatialLayerNum; i++) { + fprintf (stdout, "\nDependency layer : %d\n", i); + fprintf (stdout, "Quality layer : %d\n", j); + { + const int32_t iCount = pCtx->sStatData[i][j].sSliceData.iSliceCount[I_SLICE] + + pCtx->sStatData[i][j].sSliceData.iSliceCount[P_SLICE] + + pCtx->sStatData[i][j].sSliceData.iSliceCount[B_SLICE]; +#if defined(MB_TYPES_CHECK) + if (iCount > 0) { + int32_t iCountNumIMb = pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra4x4] + + pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra16x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][7]; + int32_t iCountNumPMb = pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra4x4] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra16x16] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][7] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][PSkip]; + int32_t count_p_mbL0 = pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10]; + + int32_t iMbCount = iCountNumIMb + iCountNumPMb; + if (iMbCount > 0) { + fprintf (stderr, + "SVC: overall Slices MBs: %d Avg\nI4x4: %.3f%% I16x16: %.3f%% IBL: %.3f%%\nP16x16: %.3f%% P16x8: %.3f%% P8x16: %.3f%% P8x8: %.3f%% SUBP8x8: %.3f%% PSKIP: %.3f%%\nILP(All): %.3f%% ILP(PL0): %.3f%% BLSKIP(PL0): %.3f%% RP(PL0): %.3f%%\n", + iMbCount, + (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra4x4] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra4x4]) / iMbCount), + (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra16x16] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra16x16]) / iMbCount), + (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][7] + + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][7]) / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10] / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][PSkip] / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][11] / iMbCount), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][11] / count_p_mbL0), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][8] / count_p_mbL0), + (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][9] / count_p_mbL0) + ); + } + } +#endif //#if defined(MB_TYPES_CHECK) + + if (iCount > 0) { + fprintf (stdout, "SVC: overall PSNR Y: %2.3f U: %2.3f V: %2.3f kb/s: %.1f fps: %.3f\n\n", + (pCtx->sStatData[i][j].sQualityStat.rYPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rYPsnr[P_SLICE] + + pCtx->sStatData[i][j].sQualityStat.rYPsnr[B_SLICE]) / (float) (iCount), + (pCtx->sStatData[i][j].sQualityStat.rUPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rUPsnr[P_SLICE] + + pCtx->sStatData[i][j].sQualityStat.rUPsnr[B_SLICE]) / (float) (iCount), + (pCtx->sStatData[i][j].sQualityStat.rVPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rVPsnr[P_SLICE] + + pCtx->sStatData[i][j].sQualityStat.rVPsnr[B_SLICE]) / (float) (iCount), + 1.0f * pCtx->pSvcParam->sDependencyLayers[i].fOutputFrameRate * (pCtx->sStatData[i][j].sSliceData.iSliceSize[I_SLICE] + + pCtx->sStatData[i][j].sSliceData.iSliceSize[P_SLICE] + pCtx->sStatData[i][j].sSliceData.iSliceSize[B_SLICE]) / (float) ( + iCount + pCtx->pWelsSvcRc[i].iSkipFrameNum) / 1000, + 1.0f * pCtx->pSvcParam->sDependencyLayers[i].fOutputFrameRate); + + } + + } + + } +} +#endif + + +int32_t GetMultipleThreadIdc (SLogContext* pLogCtx, SWelsSvcCodingParam* pCodingParam, int16_t& iSliceNum, + int32_t& iCacheLineSize, uint32_t& uiCpuFeatureFlags) { + // for cpu features detection, Only detect once?? + int32_t uiCpuCores = + 0; // number of logic processors on physical processor package, zero logic processors means HTT not supported + uiCpuFeatureFlags = WelsCPUFeatureDetect (&uiCpuCores); // detect cpu capacity features + +#ifdef X86_ASM + if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_128) + iCacheLineSize = 128; + else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_64) + iCacheLineSize = 64; + else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_32) + iCacheLineSize = 32; + else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_16) + iCacheLineSize = 16; + OutputCpuFeaturesLog (pLogCtx, uiCpuFeatureFlags, uiCpuCores, iCacheLineSize); +#else + iCacheLineSize = 16; // 16 bytes aligned in default +#endif//X86_ASM + + if (0 == pCodingParam->iMultipleThreadIdc && uiCpuCores == 0) { + // cpuid not supported or doesn't expose the number of cores, + // use high level system API as followed to detect number of pysical/logic processor + uiCpuCores = DynamicDetectCpuCores(); + } + + if (0 == pCodingParam->iMultipleThreadIdc) + pCodingParam->iMultipleThreadIdc = (uiCpuCores > 0) ? uiCpuCores : 1; + + // So far so many cpu cores up to MAX_THREADS_NUM mean for server platforms, + // for client application here it is constrained by maximal to MAX_THREADS_NUM + pCodingParam->iMultipleThreadIdc = WELS_CLIP3 (pCodingParam->iMultipleThreadIdc, 1, MAX_THREADS_NUM); + uiCpuCores = pCodingParam->iMultipleThreadIdc; + + if (InitSliceSettings (pLogCtx, pCodingParam, uiCpuCores, &iSliceNum)) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "GetMultipleThreadIdc(), InitSliceSettings failed."); + return 1; + } + return 0; +} + +/*! + * \brief uninitialize Wels encoder core library + * \pParam pEncCtx sWelsEncCtx* + * \return none + */ +void WelsUninitEncoderExt (sWelsEncCtx** ppCtx) { + if (NULL == ppCtx || NULL == *ppCtx) + return; + + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsUninitEncoderExt(), pCtx= %p, iMultipleThreadIdc= %d.", + (void*) (*ppCtx), (*ppCtx)->pSvcParam->iMultipleThreadIdc); + +#if defined(STAT_OUTPUT) + StatOverallEncodingExt (*ppCtx); +#endif + + if ((*ppCtx)->pSvcParam->iMultipleThreadIdc > 1 && (*ppCtx)->pSliceThreading != NULL) { + const int32_t iThreadCount = (*ppCtx)->pSvcParam->iMultipleThreadIdc; + int32_t iThreadIdx = 0; + + while (iThreadIdx < iThreadCount) { + int res = 0; + if ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]) { + + res = WelsThreadJoin ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]); // waiting thread exit + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pthread_join(pThreadHandles%d) return %d..", + iThreadIdx, + res); + (*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] = 0; + } + ++ iThreadIdx; + } + } + + if ((*ppCtx)->pVpp) { + (*ppCtx)->pVpp->FreeSpatialPictures (*ppCtx); + WELS_DELETE_OP ((*ppCtx)->pVpp); + } + FreeMemorySvc (ppCtx); + *ppCtx = NULL; +} + +/*! + * \brief initialize Wels avc encoder core library + * \pParam ppCtx sWelsEncCtx** + * \pParam pParam SWelsSvcCodingParam* + * \return successful - 0; otherwise none 0 for failed + */ +int32_t WelsInitEncoderExt (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, SLogContext* pLogCtx, + SExistingParasetList* pExistingParasetList) { + sWelsEncCtx* pCtx = NULL; + int32_t iRet = 0; + int16_t iSliceNum = 1; // number of slices used + int32_t iCacheLineSize = 16; // on chip cache line size in byte + uint32_t uiCpuFeatureFlags = 0; + if (NULL == ppCtx || NULL == pCodingParam) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), NULL == ppCtx(0x%p) or NULL == pCodingParam(0x%p).", + (void*)ppCtx, (void*)pCodingParam); + return 1; + } + + iRet = ParamValidationExt (pLogCtx, pCodingParam); + if (iRet != 0) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), ParamValidationExt failed return %d.", iRet); + return iRet; + } + iRet = pCodingParam->DetermineTemporalSettings(); + if (iRet != ENC_RETURN_SUCCESS) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "WelsInitEncoderExt(), DetermineTemporalSettings failed return %d (check in/out frame rate and temporal layer setting! -- in/out = 2^x, x <= temppral_layer_num)", + iRet); + return iRet; + } + iRet = GetMultipleThreadIdc (pLogCtx, pCodingParam, iSliceNum, iCacheLineSize, uiCpuFeatureFlags); + if (iRet != 0) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), GetMultipleThreadIdc failed return %d.", iRet); + return iRet; + } + + + *ppCtx = NULL; + + pCtx = static_cast (malloc (sizeof (sWelsEncCtx))); + + WELS_VERIFY_RETURN_IF (1, (NULL == pCtx)) + memset (pCtx, 0, sizeof (sWelsEncCtx)); + + pCtx->sLogCtx = *pLogCtx; + + pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize); + WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pCtx->pMemAlign), WelsUninitEncoderExt (&pCtx)) + + iRet = AllocCodingParam (&pCtx->pSvcParam, pCtx->pMemAlign); + if (iRet != 0) { + WelsUninitEncoderExt (&pCtx); + return iRet; + } + memcpy (pCtx->pSvcParam, pCodingParam, sizeof (SWelsSvcCodingParam)); // confirmed_safe_unsafe_usage + + pCtx->pFuncList = (SWelsFuncPtrList*)pCtx->pMemAlign->WelsMallocz (sizeof (SWelsFuncPtrList), "SWelsFuncPtrList"); + if (NULL == pCtx->pFuncList) { + WelsUninitEncoderExt (&pCtx); + return 1; + } + InitFunctionPointers (pCtx, pCtx->pSvcParam, uiCpuFeatureFlags); + + pCtx->iActiveThreadsNum = pCodingParam->iMultipleThreadIdc; + pCtx->iMaxSliceCount = iSliceNum; + iRet = RequestMemorySvc (&pCtx, pExistingParasetList); + if (iRet != 0) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), RequestMemorySvc failed return %d.", iRet); + WelsUninitEncoderExt (&pCtx); + return iRet; + } + + if (pCodingParam->iEntropyCodingModeFlag) + WelsCabacInit (pCtx); + WelsRcInitModule (pCtx, pCtx->pSvcParam->iRCMode); + + pCtx->pVpp = CWelsPreProcess::CreatePreProcess (pCtx); + if (pCtx->pVpp == NULL) { + iRet = 1; + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), pOut of memory in case new CWelsPreProcess()."); + WelsUninitEncoderExt (&pCtx); + return iRet; + } + if ((iRet = pCtx->pVpp->AllocSpatialPictures (pCtx, pCtx->pSvcParam)) != 0) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), pVPP alloc spatial pictures failed"); + WelsUninitEncoderExt (&pCtx); + return iRet; + } + +#if defined(MEMORY_MONITOR) + WelsLog (pLogCtx, WELS_LOG_INFO, "WelsInitEncoderExt() exit, overall memory usage: %llu bytes", + static_cast (sizeof (sWelsEncCtx) /* requested size from malloc() or new operator */ + + pCtx->pMemAlign->WelsGetMemoryUsage()) /* requested size from CMemoryAlign::WelsMalloc() */ + ); +#endif//MEMORY_MONITOR + + pCtx->iStatisticsLogInterval = STATISTICS_LOG_INTERVAL_MS; + pCtx->uiLastTimestamp = -1; + pCtx->bDeliveryFlag = true; + *ppCtx = pCtx; + + WelsLog (pLogCtx, WELS_LOG_INFO, "WelsInitEncoderExt(), pCtx= 0x%p.", (void*)pCtx); + + return 0; +} +/*! + * \brief get temporal level due to configuration and coding context + */ +int32_t GetTemporalLevel (SSpatialLayerInternal* fDlp, const int32_t kiFrameNum, const int32_t kiGopSize) { + const int32_t kiCodingIdx = kiFrameNum & (kiGopSize - 1); + + return fDlp->uiCodingIdx2TemporalId[kiCodingIdx]; +} + +void DynslcUpdateMbNeighbourInfoListForAllSlices (SDqLayer* pCurDq, SMB* pMbList) { + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + const int32_t kiMbWidth = pSliceCtx->iMbWidth; + const int32_t kiEndMbInSlice = pSliceCtx->iMbNumInFrame - 1; + int32_t iIdx = 0; + + do { + SMB* pMb = &pMbList[iIdx]; + UpdateMbNeighbor (pCurDq, pMb, kiMbWidth, WelsMbToSliceIdc (pCurDq, pMb->iMbXY)); + ++ iIdx; + } while (iIdx <= kiEndMbInSlice); +} + +/* + * TUNE back if number of picture partition decision algorithm based on past if available + */ +int32_t PicPartitionNumDecision (sWelsEncCtx* pCtx) { + int32_t iPartitionNum = 1; + if (pCtx->pSvcParam->iMultipleThreadIdc > 1) { + iPartitionNum = pCtx->pSvcParam->iMultipleThreadIdc; + } + return iPartitionNum; +} + +void WelsInitCurrentQBLayerMltslc (sWelsEncCtx* pCtx) { + //pData init + SDqLayer* pCurDq = pCtx->pCurDqLayer; + //mb_neighbor + DynslcUpdateMbNeighbourInfoListForAllSlices (pCurDq, pCurDq->sMbDataP); +} + +void UpdateSlicepEncCtxWithPartition (SDqLayer* pCurDq, int32_t iPartitionNum) { + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + const int32_t kiMbNumInFrame = pSliceCtx->iMbNumInFrame; + int32_t iCountMbNumPerPartition = kiMbNumInFrame; + int32_t iAssignableMbLeft = kiMbNumInFrame; + int32_t iCountMbNumInPartition = 0; + int32_t iFirstMbIdx = 0; + int32_t i/*, j*/; + + if (iPartitionNum <= 0) + iPartitionNum = 1; + else if (iPartitionNum > AVERSLICENUM_CONSTRAINT) + iPartitionNum = AVERSLICENUM_CONSTRAINT; // AVERSLICENUM_CONSTRAINT might be variable, however not fixed by MACRO + iCountMbNumPerPartition /= iPartitionNum; + if (iCountMbNumPerPartition == 0 || iCountMbNumPerPartition == 1) { + iCountMbNumPerPartition = kiMbNumInFrame; + iPartitionNum = 1; + } + + pSliceCtx->iSliceNumInFrame = iPartitionNum; + + i = 0; + while (i < iPartitionNum) { + if (i + 1 == iPartitionNum) { + iCountMbNumInPartition = iAssignableMbLeft; + } else { + iCountMbNumInPartition = iCountMbNumPerPartition; + } + + pCurDq->FirstMbIdxOfPartition[i] = iFirstMbIdx; + pCurDq->EndMbIdxOfPartition[i] = iFirstMbIdx + iCountMbNumInPartition - 1; + pCurDq->LastCodedMbIdxOfPartition[i] = 0; + pCurDq->NumSliceCodedOfPartition[i] = 0; + + WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdx, i, + iCountMbNumInPartition, sizeof (uint16_t)); + + // for next partition(or pSlice) + iFirstMbIdx += iCountMbNumInPartition; + iAssignableMbLeft -= iCountMbNumInPartition; + ++ i; + } + + while (i < MAX_THREADS_NUM) { + pCurDq->FirstMbIdxOfPartition[i] = 0; + pCurDq->EndMbIdxOfPartition[i] = 0; + pCurDq->LastCodedMbIdxOfPartition[i] = 0; + pCurDq->NumSliceCodedOfPartition[i] = 0; + ++ i; + } +} + +void WelsInitCurrentDlayerMltslc (sWelsEncCtx* pCtx, int32_t iPartitionNum) { + SDqLayer* pCurDq = pCtx->pCurDqLayer; + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + uint32_t uiMiniPacketSize = 0; + + UpdateSlicepEncCtxWithPartition (pCurDq, iPartitionNum); + + if (I_SLICE == pCtx->eSliceType) { //check if uiSliceSizeConstraint too small +#define byte_complexIMBat26 (60) + uint8_t iCurDid = pCtx->uiDependencyId; + uint32_t uiFrmByte = 0; + + if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { + //RC case + uiFrmByte = ( + ((uint32_t) (pCtx->pSvcParam->sSpatialLayers[iCurDid].iSpatialBitrate) + / (uint32_t) (pCtx->pSvcParam->sDependencyLayers[iCurDid].fInputFrameRate)) >> 3); + } else { + //fixed QP case + const int32_t iTtlMbNumInFrame = pSliceCtx->iMbNumInFrame; + int32_t iQDeltaTo26 = (26 - pCtx->pSvcParam->sSpatialLayers[iCurDid].iDLayerQp); + + uiFrmByte = (iTtlMbNumInFrame * byte_complexIMBat26); + if (iQDeltaTo26 > 0) { + //smaller QP than 26 + uiFrmByte = (uint32_t) (uiFrmByte * ((float)iQDeltaTo26 / 4)); + } else if (iQDeltaTo26 < 0) { + //larger QP than 26 + iQDeltaTo26 = ((-iQDeltaTo26) >> 2); //delta mod 4 + uiFrmByte = (uiFrmByte >> (iQDeltaTo26)); //if delta 4, byte /2 + } + } + + //MINPACKETSIZE_CONSTRAINT + //suppose 16 byte per mb at average + uiMiniPacketSize = (uint32_t) (uiFrmByte / pSliceCtx->iMaxSliceNumConstraint); + if (pSliceCtx->uiSliceSizeConstraint < uiMiniPacketSize) { + WelsLog (& (pCtx->sLogCtx), + WELS_LOG_WARNING, + "Set-SliceConstraint(%d) too small for current resolution (MB# %d) under QP/BR!", + pSliceCtx->uiSliceSizeConstraint, + pSliceCtx->iMbNumInFrame + ); + } + } + + WelsInitCurrentQBLayerMltslc (pCtx); +} + +/*! + * \brief initialize current layer + */ +void WelsInitCurrentLayer (sWelsEncCtx* pCtx, + const int32_t kiWidth, + const int32_t kiHeight) { + SWelsSvcCodingParam* pParam = pCtx->pSvcParam; + SPicture* pEncPic = pCtx->pEncPic; + SPicture* pDecPic = pCtx->pDecPic; + SDqLayer* pCurDq = pCtx->pCurDqLayer; + SSlice* pBaseSlice = pCurDq->ppSliceInLayer[0]; + const uint8_t kiCurDid = pCtx->uiDependencyId; + const bool kbUseSubsetSpsFlag = (!pParam->bSimulcastAVC) && (kiCurDid > BASE_DEPENDENCY_ID); + SNalUnitHeaderExt* pNalHdExt = &pCurDq->sLayerInfo.sNalHeaderExt; + SNalUnitHeader* pNalHd = &pNalHdExt->sNalUnitHeader; + SDqIdc* pDqIdc = &pCtx->pDqIdcMap[kiCurDid]; + int32_t iIdx = 0; + int32_t iSliceCount = pCurDq->iMaxSliceNum; + SSpatialLayerInternal* pParamInternal = &pParam->sDependencyLayers[kiCurDid]; + if (NULL == pCurDq || NULL == pBaseSlice) + return; + + pCurDq->pDecPic = pDecPic; + + assert (iSliceCount > 0); + + int32_t iCurPpsId = pDqIdc->iPpsId; + int32_t iCurSpsId = pDqIdc->iSpsId; + + iCurPpsId = pCtx->pFuncList->pParametersetStrategy->GetCurrentPpsId (iCurPpsId, + WELS_ABS (pParamInternal->uiIdrPicId - 1) % MAX_PPS_COUNT); + + pBaseSlice->sSliceHeaderExt.sSliceHeader.iPpsId = iCurPpsId; + pCurDq->sLayerInfo.pPpsP = + pBaseSlice->sSliceHeaderExt.sSliceHeader.pPps = &pCtx->pPPSArray[iCurPpsId]; + + pBaseSlice->sSliceHeaderExt.sSliceHeader.iSpsId = iCurSpsId; + if (kbUseSubsetSpsFlag) { + pCurDq->sLayerInfo.pSubsetSpsP = &pCtx->pSubsetArray[iCurSpsId]; + pCurDq->sLayerInfo.pSpsP = + pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps = &pCurDq->sLayerInfo.pSubsetSpsP->pSps; + } else { + pCurDq->sLayerInfo.pSubsetSpsP = NULL; + pCurDq->sLayerInfo.pSpsP = + pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps = &pCtx->pSpsArray[iCurSpsId]; + } + + pBaseSlice->bSliceHeaderExtFlag = (NAL_UNIT_CODED_SLICE_EXT == pCtx->eNalType); + + iIdx = 1; + while (iIdx < iSliceCount) { + InitSliceHeadWithBase (pCurDq->ppSliceInLayer[iIdx], pBaseSlice); + ++ iIdx; + } + + memset (pNalHdExt, 0, sizeof (SNalUnitHeaderExt)); + pNalHd->uiNalRefIdc = pCtx->eNalPriority; + pNalHd->eNalUnitType = pCtx->eNalType; + + pNalHdExt->uiDependencyId = kiCurDid; + pNalHdExt->bDiscardableFlag = (pCtx->bNeedPrefixNalFlag) ? (pNalHd->uiNalRefIdc == NRI_PRI_LOWEST) : false; + pNalHdExt->bIdrFlag = (pParamInternal->iFrameNum == 0) + && ((pCtx->eNalType == NAL_UNIT_CODED_SLICE_IDR) + || (pCtx->eSliceType == I_SLICE)); + pNalHdExt->uiTemporalId = pCtx->uiTemporalId; + + // pEncPic pData + pCurDq->pEncData[0] = pEncPic->pData[0]; + pCurDq->pEncData[1] = pEncPic->pData[1]; + pCurDq->pEncData[2] = pEncPic->pData[2]; + pCurDq->iEncStride[0] = pEncPic->iLineSize[0]; + pCurDq->iEncStride[1] = pEncPic->iLineSize[1]; + pCurDq->iEncStride[2] = pEncPic->iLineSize[2]; + // cs pData + pCurDq->pCsData[0] = pDecPic->pData[0]; + pCurDq->pCsData[1] = pDecPic->pData[1]; + pCurDq->pCsData[2] = pDecPic->pData[2]; + pCurDq->iCsStride[0] = pDecPic->iLineSize[0]; + pCurDq->iCsStride[1] = pDecPic->iLineSize[1]; + pCurDq->iCsStride[2] = pDecPic->iLineSize[2]; + + if (pCurDq->pRefLayer != NULL) { + pCurDq->bBaseLayerAvailableFlag = true; + } else { + pCurDq->bBaseLayerAvailableFlag = false; + } + + if (pCtx->pTaskManage) { + pCtx->pTaskManage->InitFrame (kiCurDid); + } +} + +static inline void SetFastCodingFunc (SWelsFuncPtrList* pFuncList) { + pFuncList->pfIntraFineMd = WelsMdIntraFinePartitionVaa; + pFuncList->sSampleDealingFuncs.pfMdCost = pFuncList->sSampleDealingFuncs.pfSampleSad; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad; +} +static inline void SetNormalCodingFunc (SWelsFuncPtrList* pFuncList) { + pFuncList->pfIntraFineMd = WelsMdIntraFinePartition; + pFuncList->sSampleDealingFuncs.pfMdCost = pFuncList->sSampleDealingFuncs.pfSampleSatd; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd; + pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3 = + pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd; +} +bool SetMeMethod (const uint8_t uiMethod, PSearchMethodFunc& pSearchMethodFunc) { + switch (uiMethod) { + case ME_DIA: + pSearchMethodFunc = WelsDiamondSearch; + break; + case ME_CROSS: + pSearchMethodFunc = WelsMotionCrossSearch; + break; + case ME_DIA_CROSS: + pSearchMethodFunc = WelsDiamondCrossSearch; + break; + case ME_DIA_CROSS_FME: + pSearchMethodFunc = WelsDiamondCrossFeatureSearch; + break; + case ME_FULL: + pSearchMethodFunc = WelsDiamondSearch; + return false; + default: + pSearchMethodFunc = WelsDiamondSearch; + return false; + } + return true; +} + + + +void PreprocessSliceCoding (sWelsEncCtx* pCtx) { + SDqLayer* pCurLayer = pCtx->pCurDqLayer; + //const bool kbBaseAvail = pCurLayer->bBaseLayerAvailableFlag; + bool bFastMode = (pCtx->pSvcParam->iComplexityMode == LOW_COMPLEXITY); + SWelsFuncPtrList* pFuncList = pCtx->pFuncList; + SLogContext* pLogCtx = & (pCtx->sLogCtx); + /* function pointers conditional assignment under sWelsEncCtx, layer_mb_enc_rec (in stack) is exclusive */ + if ((pCtx->pSvcParam->iUsageType == CAMERA_VIDEO_REAL_TIME && bFastMode) || + (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME && P_SLICE == pCtx->eSliceType + && bFastMode) //TODO: here is for sync with the origin code, consider the design again with more tests + ) { + SetFastCodingFunc (pFuncList); + } else { + SetNormalCodingFunc (pFuncList); + } + + if (P_SLICE == pCtx->eSliceType) { + for (int i = 0; i < BLOCK_STATIC_IDC_ALL; i++) { + pFuncList->pfMotionSearch[i] = WelsMotionEstimateSearch; + } + pFuncList->pfSearchMethod[BLOCK_16x16] = + pFuncList->pfSearchMethod[BLOCK_16x8] = + pFuncList->pfSearchMethod[BLOCK_8x16] = + pFuncList->pfSearchMethod[BLOCK_8x8] = + pFuncList->pfSearchMethod[BLOCK_4x4] = + pFuncList->pfSearchMethod[BLOCK_8x4] = + pFuncList->pfSearchMethod[BLOCK_4x8] = WelsDiamondSearch; + pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode; + pFuncList->sSampleDealingFuncs.pfMeCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd; + pFuncList->pfSetScrollingMv = SetScrollingMvToMdNull; + + if (bFastMode) { + pFuncList->pfCalculateSatd = NotCalculateSatdCost; + pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaa; + } else { + pFuncList->pfCalculateSatd = CalculateSatdCost; + pFuncList->pfInterFineMd = WelsMdInterFinePartition; + } + } else { + pFuncList->sSampleDealingFuncs.pfMeCost = NULL; + } + + //to init at each frame will be needed when dealing with hybrid content (camera+screen) + if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + if (P_SLICE == pCtx->eSliceType) { + //MD related func pointers + pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaaOnScreen; + + //ME related func pointers + SVAAFrameInfoExt* pVaaExt = static_cast (pCtx->pVaa); + if (pVaaExt->sScrollDetectInfo.bScrollDetectFlag + && (pVaaExt->sScrollDetectInfo.iScrollMvX | pVaaExt->sScrollDetectInfo.iScrollMvY)) { + pFuncList->pfSetScrollingMv = SetScrollingMvToMd; + } else { + pFuncList->pfSetScrollingMv = SetScrollingMvToMdNull; + } + + pFuncList->pfMotionSearch[NO_STATIC] = WelsMotionEstimateSearch; + pFuncList->pfMotionSearch[COLLOCATED_STATIC] = WelsMotionEstimateSearchStatic; + pFuncList->pfMotionSearch[SCROLLED_STATIC] = WelsMotionEstimateSearchScrolled; + //ME16x16 + if (!SetMeMethod (ME_DIA_CROSS, pFuncList->pfSearchMethod[BLOCK_16x16])) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "SetMeMethod(BLOCK_16x16) ME_DIA_CROSS unsuccessful, switched to default search"); + } + //ME8x8 + SFeatureSearchPreparation* pFeatureSearchPreparation = pCurLayer->pFeatureSearchPreparation; + if (pFeatureSearchPreparation) { + pFeatureSearchPreparation->iHighFreMbCount = 0; + + //calculate bFMESwitchFlag + SVAAFrameInfoExt* pVaaExt = static_cast (pCtx->pVaa); + const int32_t kiMbSize = pCurLayer->iMbHeight * pCurLayer->iMbWidth; + pFeatureSearchPreparation->bFMESwitchFlag = CalcFMESwitchFlag (pFeatureSearchPreparation->uiFMEGoodFrameCount, + pFeatureSearchPreparation->iHighFreMbCount * 100 / kiMbSize, pCtx->pVaa->sVaaCalcInfo.iFrameSad / kiMbSize, + pVaaExt->sScrollDetectInfo.bScrollDetectFlag); + + //PerformFMEPreprocess + SScreenBlockFeatureStorage* pScreenBlockFeatureStorage = pCurLayer->pRefPic->pScreenBlockFeatureStorage; + pFeatureSearchPreparation->pRefBlockFeature = pScreenBlockFeatureStorage; + if (pFeatureSearchPreparation->bFMESwitchFlag + && !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) { + SPicture* pRef = (pCtx->pSvcParam->bEnableLongTermReference ? pCurLayer->pRefOri[0] : pCurLayer->pRefPic); + PerformFMEPreprocess (pFuncList, pRef, pFeatureSearchPreparation->pFeatureOfBlock, + pScreenBlockFeatureStorage); + } + + //assign ME pointer + if (pFeatureSearchPreparation->bFMESwitchFlag && pScreenBlockFeatureStorage->bRefBlockFeatureCalculated + && (!pScreenBlockFeatureStorage->iIs16x16)) { + if (!SetMeMethod (ME_DIA_CROSS_FME, pFuncList->pfSearchMethod[BLOCK_8x8])) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "SetMeMethod(BLOCK_8x8) ME_DIA_CROSS_FME unsuccessful, switched to default search"); + } + } + + //assign UpdateFMESwitch pointer + if (pFeatureSearchPreparation->bFMESwitchFlag) { + pFuncList->pfUpdateFMESwitch = UpdateFMESwitch; + } else { + pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull; + } + }//if (pFeatureSearchPreparation) + } else { + //reset some status when at I_SLICE + pCurLayer->pFeatureSearchPreparation->bFMESwitchFlag = true; + pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM; + } + } + + // update some layer dependent variable to save judgements in mb-level + pCurLayer->bSatdInMdFlag = ((pFuncList->sSampleDealingFuncs.pfMeCost == pFuncList->sSampleDealingFuncs.pfSampleSatd) + && (pFuncList->sSampleDealingFuncs.pfMdCost == pFuncList->sSampleDealingFuncs.pfSampleSatd)); + + const int32_t kiCurDid = pCtx->uiDependencyId; + const int32_t kiCurTid = pCtx->uiTemporalId; + if (pCurLayer->bDeblockingParallelFlag && (pCurLayer->iLoopFilterDisableIdc != 1) +#if !defined(ENABLE_FRAME_DUMP) + && (NRI_PRI_LOWEST != pCtx->eNalPriority) + && (pCtx->pSvcParam->sDependencyLayers[kiCurDid].iHighestTemporalId == 0 + || kiCurTid < pCtx->pSvcParam->sDependencyLayers[kiCurDid].iHighestTemporalId) +#endif// !ENABLE_FRAME_DUMP + ) { + pFuncList->pfDeblocking.pfDeblockingFilterSlice = DeblockingFilterSliceAvcbase; + } else { + pFuncList->pfDeblocking.pfDeblockingFilterSlice = DeblockingFilterSliceAvcbaseNull; + } +} + +/*! + * \brief swap pDq layers between current pDq layer and reference pDq layer + */ + +static inline void WelsSwapDqLayers (sWelsEncCtx* pCtx, const int32_t kiNextDqIdx) { + // swap and assign reference + SDqLayer* pTmpLayer = pCtx->ppDqLayerList[kiNextDqIdx]; + SDqLayer* pRefLayer = pCtx->pCurDqLayer; + pCtx->pCurDqLayer = pTmpLayer; + pCtx->pCurDqLayer->pRefLayer = pRefLayer; +} + +/*! + * \brief prefetch reference picture after WelsBuildRefList + */ +static inline void PrefetchReferencePicture (sWelsEncCtx* pCtx, const EVideoFrameType keFrameType) { + const int32_t kiSliceCount = pCtx->pCurDqLayer->iMaxSliceNum; + int32_t iIdx = 0; + uint8_t uiRefIdx = -1; + + assert (kiSliceCount > 0); + if (keFrameType != videoFrameTypeIDR) { + assert (pCtx->iNumRef0 > 0); + pCtx->pRefPic = pCtx->pRefList0[0]; // always get item 0 due to reordering done + pCtx->pCurDqLayer->pRefPic = pCtx->pRefPic; + uiRefIdx = 0; // reordered reference iIndex + } else { // safe for IDR coding + pCtx->pRefPic = NULL; + pCtx->pCurDqLayer->pRefPic = NULL; + } + + iIdx = 0; + while (iIdx < kiSliceCount) { + pCtx->pCurDqLayer->ppSliceInLayer[iIdx]->sSliceHeaderExt.sSliceHeader.uiRefIndex = uiRefIdx; + ++ iIdx; + } +} + +int32_t WelsWriteOneSPS (sWelsEncCtx* pCtx, const int32_t kiSpsIdx, int32_t& iNalSize) { + int iNal = pCtx->pOut->iNalIndex; + WelsLoadNal (pCtx->pOut, NAL_UNIT_SPS, NRI_PRI_HIGHEST); + + WelsWriteSpsNal (&pCtx->pSpsArray[kiSpsIdx], &pCtx->pOut->sBsWrite, + pCtx->pFuncList->pParametersetStrategy->GetSpsIdOffsetList (PARA_SET_TYPE_AVCSPS)); + WelsUnloadNal (pCtx->pOut); + + int32_t iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer,//available buffer to be written, so need to substract the used length + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &iNalSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pCtx->iPosBsBuffer += iNalSize; + return ENC_RETURN_SUCCESS; +} + +int32_t WelsWriteOnePPS (sWelsEncCtx* pCtx, const int32_t kiPpsIdx, int32_t& iNalSize) { + //TODO + int32_t iNal = pCtx->pOut->iNalIndex; + /* generate picture parameter set */ + WelsLoadNal (pCtx->pOut, NAL_UNIT_PPS, NRI_PRI_HIGHEST); + + WelsWritePpsSyntax (&pCtx->pPPSArray[kiPpsIdx], &pCtx->pOut->sBsWrite, + pCtx->pFuncList->pParametersetStrategy); + WelsUnloadNal (pCtx->pOut); + + int32_t iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer, + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &iNalSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pCtx->iPosBsBuffer += iNalSize; + return ENC_RETURN_SUCCESS; +} + + +/*! + * \brief write all parameter sets introduced in SVC extension + * \return writing results, success or error + */ +int32_t WelsWriteParameterSets (sWelsEncCtx* pCtx, int32_t* pNalLen, int32_t* pNumNal, int32_t* pTotalLength) { + int32_t iSize = 0; + int32_t iNal = 0; + int32_t iIdx = 0; + int32_t iId = 0; + int32_t iCountNal = 0; + int32_t iNalLength = 0; + int32_t iReturn = ENC_RETURN_SUCCESS; + + if (NULL == pCtx || NULL == pNalLen || NULL == pNumNal || NULL == pCtx->pFuncList->pParametersetStrategy) + return ENC_RETURN_UNEXPECTED; + + *pTotalLength = 0; + /* write all SPS */ + iIdx = 0; + while (iIdx < pCtx->iSpsNum) { + pCtx->pFuncList->pParametersetStrategy->Update (pCtx->pSpsArray[iIdx].uiSpsId, PARA_SET_TYPE_AVCSPS); + /* generate sequence parameters set */ + iId = pCtx->pFuncList->pParametersetStrategy->GetSpsIdx (iIdx); + + WelsWriteOneSPS (pCtx, iId, iNalLength); + + pNalLen[iCountNal] = iNalLength; + iSize += iNalLength; + + ++ iIdx; + ++ iCountNal; + } + + /* write all Subset SPS */ + iIdx = 0; + while (iIdx < pCtx->iSubsetSpsNum) { + iNal = pCtx->pOut->iNalIndex; + + pCtx->pFuncList->pParametersetStrategy->Update (pCtx->pSubsetArray[iIdx].pSps.uiSpsId, PARA_SET_TYPE_SUBSETSPS); + + iId = iIdx; + + /* generate Subset SPS */ + WelsLoadNal (pCtx->pOut, NAL_UNIT_SUBSET_SPS, NRI_PRI_HIGHEST); + + WelsWriteSubsetSpsSyntax (&pCtx->pSubsetArray[iId], &pCtx->pOut->sBsWrite, + pCtx->pFuncList->pParametersetStrategy->GetSpsIdOffsetList (PARA_SET_TYPE_SUBSETSPS)); + WelsUnloadNal (pCtx->pOut); + + iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer,//available buffer to be written, so need to substract the used length + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &iNalLength); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + pNalLen[iCountNal] = iNalLength; + + pCtx->iPosBsBuffer += iNalLength; + iSize += iNalLength; + + ++ iIdx; + ++ iCountNal; + } + + pCtx->pFuncList->pParametersetStrategy->UpdatePpsList (pCtx); + + iIdx = 0; + while (iIdx < pCtx->iPpsNum) { + pCtx->pFuncList->pParametersetStrategy->Update (pCtx->pPPSArray[iIdx].iPpsId, PARA_SET_TYPE_PPS); + + WelsWriteOnePPS (pCtx, iIdx, iNalLength); + + pNalLen[iCountNal] = iNalLength; + iSize += iNalLength; + + ++ iIdx; + ++ iCountNal; + } + + *pNumNal = iCountNal; + *pTotalLength = iSize; + + return ENC_RETURN_SUCCESS; +} + +static inline int32_t AddPrefixNal (sWelsEncCtx* pCtx, + SLayerBSInfo* pLayerBsInfo, + int32_t* pNalLen, + int32_t* pNalIdxInLayer, + const EWelsNalUnitType keNalType, + const EWelsNalRefIdc keNalRefIdc, + int32_t& iPayloadSize) { + int32_t iReturn = ENC_RETURN_SUCCESS; + iPayloadSize = 0; + + if (keNalRefIdc != NRI_PRI_LOWEST) { + WelsLoadNal (pCtx->pOut, NAL_UNIT_PREFIX, keNalRefIdc); + + WelsWriteSVCPrefixNal (&pCtx->pOut->sBsWrite, keNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == keNalType)); + + WelsUnloadNal (pCtx->pOut); + + iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], + &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer, + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &pNalLen[*pNalIdxInLayer]); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + iPayloadSize = pNalLen[*pNalIdxInLayer]; + + pCtx->iPosBsBuffer += iPayloadSize; + + (*pNalIdxInLayer) ++; + } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension + WelsLoadNal (pCtx->pOut, NAL_UNIT_PREFIX, keNalRefIdc); + // No need write any syntax of prefix NAL Unit RBSP here + WelsUnloadNal (pCtx->pOut); + + iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], + &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer, + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &pNalLen[*pNalIdxInLayer]); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + iPayloadSize = pNalLen[*pNalIdxInLayer]; + + pCtx->iPosBsBuffer += iPayloadSize; + + (*pNalIdxInLayer) ++; + } + + return ENC_RETURN_SUCCESS; +} + +int32_t WritePadding (sWelsEncCtx* pCtx, int32_t iLen, int32_t& iSize) { + int32_t i = 0; + int32_t iNal = 0; + SBitStringAux* pBs = NULL; + int32_t iNalLen; + + iSize = 0; + iNal = pCtx->pOut->iNalIndex; + pBs = &pCtx->pOut->sBsWrite; // SBitStringAux instance for non VCL NALs decoding + + if ((pBs->pEndBuf - pBs->pCurBuf) < iLen || iNal >= pCtx->pOut->iCountNals) { +#if GOM_TRACE_FLAG + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "[RC] paddingcal pBuffer overflow, bufferlen=%lld, paddinglen=%d, iNalIdx= %d, iCountNals= %d", + static_cast (pBs->pEndBuf - pBs->pCurBuf), iLen, iNal, pCtx->pOut->iCountNals); +#endif + return ENC_RETURN_MEMOVERFLOWFOUND; + } + + WelsLoadNal (pCtx->pOut, NAL_UNIT_FILLER_DATA, NRI_PRI_LOWEST); + + for (i = 0; i < iLen; i++) { + BsWriteBits (pBs, 8, 0xff); + } + + BsRbspTrailingBits (pBs); + + WelsUnloadNal (pCtx->pOut); + int32_t iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer, + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &iNalLen); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pCtx->iPosBsBuffer += iNalLen; + iSize += iNalLen; + + return ENC_RETURN_SUCCESS; +} + +/* + * Force coding IDR as follows + */ +int32_t ForceCodingIDR (sWelsEncCtx* pCtx, int32_t iLayerId) { + if (NULL == pCtx) + return 1; + if ((iLayerId < 0) || (iLayerId >= MAX_SPATIAL_LAYER_NUM) || (!pCtx->pSvcParam->bSimulcastAVC)) { + for (int32_t iDid = 0; iDid < pCtx->pSvcParam->iSpatialLayerNum; iDid++) { + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[iDid]; + pParamInternal->iCodingIndex = 0; + pParamInternal->iFrameIndex = 0; + pParamInternal->iFrameNum = 0; + pParamInternal->iPOC = 0; + pParamInternal->bEncCurFrmAsIdrFlag = true; + pCtx->sEncoderStatistics[0].uiIDRReqNum++; + } + WelsLog (&pCtx->sLogCtx, WELS_LOG_INFO, "ForceCodingIDR(iDid 0-%d)at InputFrameCount=%d\n", + pCtx->pSvcParam->iSpatialLayerNum - 1, pCtx->sEncoderStatistics[0].uiInputFrameCount); + + + + } else { + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[iLayerId]; + pParamInternal->iCodingIndex = 0; + pParamInternal->iFrameIndex = 0; + pParamInternal->iFrameNum = 0; + pParamInternal->iPOC = 0; + pParamInternal->bEncCurFrmAsIdrFlag = true; + pCtx->sEncoderStatistics[iLayerId].uiIDRReqNum++; + WelsLog (&pCtx->sLogCtx, WELS_LOG_INFO, "ForceCodingIDR(iDid %d)at InputFrameCount=%d\n", iLayerId, + pCtx->sEncoderStatistics[iLayerId].uiInputFrameCount); + } + pCtx->bCheckWindowStatusRefreshFlag = false; + + + return 0; +} + +int32_t WelsEncoderEncodeParameterSets (sWelsEncCtx* pCtx, void* pDst) { + if (NULL == pCtx || NULL == pDst) { + return ENC_RETURN_UNEXPECTED; + } + + SFrameBSInfo* pFbi = (SFrameBSInfo*)pDst; + SLayerBSInfo* pLayerBsInfo = &pFbi->sLayerInfo[0]; + int32_t iCountNal = 0; + int32_t iTotalLength = 0; + + pLayerBsInfo->pBsBuf = pCtx->pFrameBs; + pLayerBsInfo->pNalLengthInByte = pCtx->pOut->pNalLen; + InitBits (&pCtx->pOut->sBsWrite, pCtx->pOut->pBsBuffer, pCtx->pOut->uiSize); + + pCtx->iPosBsBuffer = 0; + int32_t iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iTotalLength); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pLayerBsInfo->uiSpatialId = 0; + pLayerBsInfo->uiTemporalId = 0; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; + pLayerBsInfo->iNalCount = iCountNal; + pLayerBsInfo->eFrameType = videoFrameTypeInvalid; + pLayerBsInfo->iSubSeqId = 0; + //pCtx->eLastNalPriority = NRI_PRI_HIGHEST; + pFbi->iLayerNum = 1; + pFbi->eFrameType = videoFrameTypeInvalid; + WelsEmms(); + + return ENC_RETURN_SUCCESS; +} + +int32_t GetSubSequenceId (sWelsEncCtx* pCtx, EVideoFrameType eFrameType) { + int32_t iSubSeqId = 0; + if (eFrameType == videoFrameTypeIDR) + iSubSeqId = 0; + else if (eFrameType == videoFrameTypeI) + iSubSeqId = 1; + else if (eFrameType == videoFrameTypeP) { + if (pCtx->bCurFrameMarkedAsSceneLtr) + iSubSeqId = 2; + else + iSubSeqId = 3 + pCtx->uiTemporalId; //T0:3 T1:4 T2:5 T3:6 + } else + iSubSeqId = 3 + MAX_TEMPORAL_LAYER_NUM; + return iSubSeqId; +} + +// writing parasets for (simulcast) svc +int32_t WriteSsvcParaset (sWelsEncCtx* pCtx, const int32_t kiSpatialNum, + SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) { + int32_t iNonVclSize = 0, iCountNal = 0, iReturn = ENC_RETURN_SUCCESS; + iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iNonVclSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + for (int32_t iSpatialId = 0; iSpatialId < kiSpatialNum; iSpatialId++) { + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[iSpatialId]; + if (pParamInternal->uiIdrPicId < 65535) { + ++ pParamInternal->uiIdrPicId; + } else { + pParamInternal->uiIdrPicId = 0; + } + } + pLayerBsInfo->uiSpatialId = 0; + pLayerBsInfo->uiTemporalId = 0; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; + pLayerBsInfo->iNalCount = iCountNal; + pLayerBsInfo->eFrameType = videoFrameTypeIDR; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, videoFrameTypeIDR); + //point to next pLayerBsInfo + ++ pLayerBsInfo; + ++ pCtx->pOut->iLayerBsIndex; + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal; + + //update for external countings + ++ iLayerNum; + iFrameSize += iNonVclSize; + return iReturn; +} + +// writing parasets for simulcast avc +int32_t WriteSavcParaset (sWelsEncCtx* pCtx, const int32_t iIdx, + SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) { + int32_t iNonVclSize = 0, iCountNal = 0, iReturn = ENC_RETURN_SUCCESS; + + // write SPS + iNonVclSize = 0; + + //writing one NAL + int32_t iNalSize = 0; + iCountNal = 0; + + + if (pCtx->pFuncList->pParametersetStrategy) { + pCtx->pFuncList->pParametersetStrategy->Update (pCtx->pSpsArray[iIdx].uiSpsId, PARA_SET_TYPE_AVCSPS); + } + + iReturn = WelsWriteOneSPS (pCtx, iIdx, iNalSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize; + iNonVclSize += iNalSize; + iCountNal = 1; + + //finish writing one NAL + + pLayerBsInfo->uiSpatialId = iIdx; + pLayerBsInfo->uiTemporalId = 0; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; + pLayerBsInfo->iNalCount = iCountNal; + pLayerBsInfo->eFrameType = videoFrameTypeIDR; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, videoFrameTypeIDR); + //point to next pLayerBsInfo + ++ pLayerBsInfo; + ++ pCtx->pOut->iLayerBsIndex; + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal; + //update for external countings + ++ iLayerNum; + + // write PPS + + //TODO: under new strategy, will PPS be correctly updated? + + //writing one NAL + iNalSize = 0; + iCountNal = 0; + + if (pCtx->pFuncList->pParametersetStrategy) { + pCtx->pFuncList->pParametersetStrategy->Update (pCtx->pPPSArray[iIdx].iPpsId, PARA_SET_TYPE_PPS); + } + + iReturn = WelsWriteOnePPS (pCtx, iIdx, iNalSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize; + iNonVclSize += iNalSize; + iCountNal = 1; + //finish writing one NAL + + pLayerBsInfo->uiSpatialId = iIdx; + pLayerBsInfo->uiTemporalId = 0; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; + pLayerBsInfo->iNalCount = iCountNal; + pLayerBsInfo->eFrameType = videoFrameTypeIDR; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, videoFrameTypeIDR); + //point to next pLayerBsInfo + ++ pLayerBsInfo; + ++ pCtx->pOut->iLayerBsIndex; + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal; + //update for external countings + ++ iLayerNum; + + // to check number of layers / nals / slices dependencies + if (iLayerNum > MAX_LAYER_NUM_OF_FRAME) { + WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WriteSavcParaset(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", + iLayerNum, MAX_LAYER_NUM_OF_FRAME); + return 1; + } + + iFrameSize += iNonVclSize; + return iReturn; +} + +//cover the logic of simulcast avc + sps_pps_listing +int32_t WriteSavcParaset_Listing (sWelsEncCtx* pCtx, const int32_t kiSpatialNum, + SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) { + int32_t iNonVclSize = 0, iCountNal = 0, iReturn = ENC_RETURN_SUCCESS; + + // write SPS + iNonVclSize = 0; + + for (int32_t iSpatialId = 0; iSpatialId < kiSpatialNum; iSpatialId++) { + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[iSpatialId]; + if (pParamInternal->uiIdrPicId < 65535) { + ++ pParamInternal->uiIdrPicId; + } else { + pParamInternal->uiIdrPicId = 0; + } + + iCountNal = 0; + + for (int32_t iIdx = 0; iIdx < pCtx->iSpsNum; iIdx++) { + //writing one NAL + int32_t iNalSize = 0; + iReturn = WelsWriteOneSPS (pCtx, iIdx, iNalSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize; + iNonVclSize += iNalSize; + iCountNal ++; + //finish writing one NAL + } + + pLayerBsInfo->uiSpatialId = iSpatialId; + pLayerBsInfo->uiTemporalId = 0; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; + pLayerBsInfo->iNalCount = iCountNal; + pLayerBsInfo->eFrameType = videoFrameTypeIDR; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, videoFrameTypeIDR); + //point to next pLayerBsInfo + ++ pLayerBsInfo; + ++ pCtx->pOut->iLayerBsIndex; + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal; + //update for external countings + ++ iLayerNum; + } + + // write PPS + pCtx->pFuncList->pParametersetStrategy->UpdatePpsList (pCtx); + + //TODO: under new strategy, will PPS be correctly updated? + for (int32_t iSpatialId = 0; iSpatialId < kiSpatialNum; iSpatialId++) { + iCountNal = 0; + for (int32_t iIdx = 0; iIdx < pCtx->iPpsNum; iIdx++) { + //writing one NAL + int32_t iNalSize = 0; + iReturn = WelsWriteOnePPS (pCtx, iIdx, iNalSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize; + iNonVclSize += iNalSize; + iCountNal ++; + //finish writing one NAL + } + + pLayerBsInfo->uiSpatialId = iSpatialId; + pLayerBsInfo->uiTemporalId = 0; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; + pLayerBsInfo->iNalCount = iCountNal; + pLayerBsInfo->eFrameType = videoFrameTypeIDR; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, videoFrameTypeIDR); + //point to next pLayerBsInfo + ++ pLayerBsInfo; + ++ pCtx->pOut->iLayerBsIndex; + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal; + //update for external countings + ++ iLayerNum; + } + + // to check number of layers / nals / slices dependencies + if (iLayerNum > MAX_LAYER_NUM_OF_FRAME) { + WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WriteSavcParaset(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", + iLayerNum, MAX_LAYER_NUM_OF_FRAME); + return ENC_RETURN_UNEXPECTED; + } + + iFrameSize += iNonVclSize; + return iReturn; +} + +void StackBackEncoderStatus (sWelsEncCtx* pEncCtx, + EVideoFrameType keFrameType) { + SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + // for bitstream writing + pEncCtx->iPosBsBuffer = 0; // reset bs pBuffer position + pEncCtx->pOut->iNalIndex = 0; // reset NAL index + pEncCtx->pOut->iLayerBsIndex = 0; // reset index of Layer Bs + + InitBits (&pEncCtx->pOut->sBsWrite, pEncCtx->pOut->pBsBuffer, pEncCtx->pOut->uiSize); + if ((keFrameType == videoFrameTypeP) || (keFrameType == videoFrameTypeI)) { + pParamInternal->iFrameIndex --; + if (pParamInternal->iPOC != 0) { + pParamInternal->iPOC -= 2; + } else { + pParamInternal->iPOC = (1 << pEncCtx->pSps->iLog2MaxPocLsb) - 2; + } + + LoadBackFrameNum (pEncCtx, pEncCtx->uiDependencyId); + + pEncCtx->eNalType = NAL_UNIT_CODED_SLICE; + pEncCtx->eSliceType = P_SLICE; + //pEncCtx->eNalPriority = pEncCtx->eLastNalPriority; //not need this since eNalPriority will be updated at the beginning of coding a frame + } else if (keFrameType == videoFrameTypeIDR) { + pParamInternal->uiIdrPicId --; + + //set the next frame to be IDR + ForceCodingIDR (pEncCtx, pEncCtx->uiDependencyId); + } else { // B pictures are not supported now, any else? + assert (0); + } + + // no need to stack back RC info since the info is still useful for later RQ model calculation + // no need to stack back MB slicing info for dynamic balancing, since the info is still refer-able +} + +void ClearFrameBsInfo (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi) { + pFbi->sLayerInfo[0].pBsBuf = pCtx->pFrameBs; + pFbi->sLayerInfo[0].pNalLengthInByte = pCtx->pOut->pNalLen; + + for (int i = 0; i < pFbi->iLayerNum; i++) { + pFbi->sLayerInfo[i].iNalCount = 0; + pFbi->sLayerInfo[i].eFrameType = videoFrameTypeSkip; + } + pFbi->iLayerNum = 0; + pFbi->iFrameSizeInBytes = 0; +} +EVideoFrameType PrepareEncodeFrame (sWelsEncCtx* pCtx, SLayerBSInfo*& pLayerBsInfo, int32_t iSpatialNum, + int8_t& iCurDid, int32_t& iCurTid, + int32_t& iLayerNum, int32_t& iFrameSize, long long uiTimeStamp) { + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + SSpatialPicIndex* pSpatialIndexMap = &pCtx->sSpatialIndexMap[0]; + + bool bSkipFrameFlag = WelsRcCheckFrameStatus (pCtx, uiTimeStamp, iSpatialNum, iCurDid); + EVideoFrameType eFrameType = DecideFrameType (pCtx, iSpatialNum, iCurDid, bSkipFrameFlag); + if (eFrameType == videoFrameTypeSkip) { + if (pSvcParam->bSimulcastAVC) { + if (pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip) + pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip (pCtx, iCurDid); + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc] Frame timestamp = %lld, iDid = %d,skip one frame due to target_br, continual skipped %d frames", + uiTimeStamp, iCurDid, pCtx->pWelsSvcRc[iCurDid].iContinualSkipFrames); + } + + else { + if (pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip) { + for (int32_t i = 0; i < iSpatialNum; i++) { + pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip (pCtx, (pSpatialIndexMap + i)->iDid); + } + } + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc] Frame timestamp = %lld, iDid = %d,skip one frame due to target_br, continual skipped %d frames", + uiTimeStamp, iCurDid, pCtx->pWelsSvcRc[iCurDid].iContinualSkipFrames); + } + + } else { + SSpatialLayerInternal* pParamInternal = &pSvcParam->sDependencyLayers[iCurDid]; + + iCurTid = GetTemporalLevel (pParamInternal, pParamInternal->iCodingIndex, + pSvcParam->uiGopSize); + pCtx->uiTemporalId = iCurTid; + if (eFrameType == videoFrameTypeIDR) { + // write parameter sets bitstream or SEI/SSEI (if any) here + // TODO: use function pointer instead + if (! (SPS_LISTING & pCtx->pSvcParam->eSpsPpsIdStrategy)) { + if (pSvcParam->bSimulcastAVC) { + pCtx->iEncoderError = WriteSavcParaset (pCtx, iCurDid, pLayerBsInfo, iLayerNum, iFrameSize); + ++ pParamInternal->uiIdrPicId; + } else { + pCtx->iEncoderError = WriteSsvcParaset (pCtx, iSpatialNum, pLayerBsInfo, iLayerNum, iFrameSize); + } + } else { + pCtx->iEncoderError = WriteSavcParaset_Listing (pCtx, iSpatialNum, pLayerBsInfo, iLayerNum, iFrameSize); + + + } + } + } + return eFrameType; +} +/*! + * \brief core svc encoding process + * + * \pParam pCtx sWelsEncCtx*, encoder context + * \pParam pFbi FrameBSInfo* + * \pParam pSrcPic Source Picture + * \return EFrameType (videoFrameTypeIDR/videoFrameTypeI/videoFrameTypeP) + */ +int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi, const SSourcePicture* pSrcPic) { + if (pCtx == NULL) { + return ENC_RETURN_MEMALLOCERR; + } + SLayerBSInfo* pLayerBsInfo = &pFbi->sLayerInfo[0]; + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + SSpatialPicIndex* pSpatialIndexMap = &pCtx->sSpatialIndexMap[0]; +#if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC) + SPicture* fsnr = NULL; +#endif//ENABLE_FRAME_DUMP || ENABLE_PSNR_CALC + SPicture* pEncPic = NULL; // to be decided later +#if defined(MT_DEBUG) + int32_t iDidList[MAX_DEPENDENCY_LAYER] = {0}; +#endif + int32_t iLayerNum = 0; + int32_t iLayerSize = 0; + int32_t iSpatialNum = + 0; // available count number of spatial layers due to frame size changed in this given frame + int32_t iSpatialIdx = 0; // iIndex of spatial layers due to frame size changed in this given frame + int32_t iFrameSize = 0; + int32_t iNalIdxInLayer = 0; + int32_t iCountNal = 0; + EVideoFrameType eFrameType = videoFrameTypeInvalid; + int32_t iCurWidth = 0; + int32_t iCurHeight = 0; + EWelsNalUnitType eNalType = NAL_UNIT_UNSPEC_0; + EWelsNalRefIdc eNalRefIdc = NRI_PRI_LOWEST; + int8_t iCurDid = 0; + int32_t iCurTid = 0; + bool bAvcBased = false; + SLogContext* pLogCtx = & (pCtx->sLogCtx); +#if defined(ENABLE_PSNR_CALC) + float fSnrY = .0f, fSnrU = .0f, fSnrV = .0f; +#endif//ENABLE_PSNR_CALC + +#if defined(_DEBUG) + int32_t i = 0, j = 0, k = 0; +#endif//_DEBUG + pCtx->iEncoderError = ENC_RETURN_SUCCESS; + pCtx->bCurFrameMarkedAsSceneLtr = false; + pFbi->eFrameType = videoFrameTypeSkip; + pFbi->iLayerNum = 0; // for initialization + pFbi->uiTimeStamp = GetTimestampForRc (pSrcPic->uiTimeStamp, pCtx->uiLastTimestamp, + pCtx->pSvcParam->sSpatialLayers[pCtx->pSvcParam->iSpatialLayerNum - 1].fFrameRate); + for (int32_t iNalIdx = 0; iNalIdx < MAX_LAYER_NUM_OF_FRAME; iNalIdx++) { + pFbi->sLayerInfo[iNalIdx].eFrameType = videoFrameTypeSkip; + pFbi->sLayerInfo[iNalIdx].iNalCount = 0; + } + // perform csc/denoise/downsample/padding, generate spatial layers + iSpatialNum = pCtx->pVpp->BuildSpatialPicList (pCtx, pSrcPic); + if (iSpatialNum == -1) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "Failed in allocating memory in BuildSpatialPicList"); + return ENC_RETURN_MEMALLOCERR; + } + + if (pCtx->pFuncList->pfRc.pfWelsUpdateMaxBrWindowStatus) { + pCtx->pFuncList->pfRc.pfWelsUpdateMaxBrWindowStatus (pCtx, iSpatialNum, pFbi->uiTimeStamp); + } + + if (iSpatialNum < 1) { + for (int32_t iDidIdx = 0; iDidIdx < pSvcParam->iSpatialLayerNum; iDidIdx++) { + SSpatialLayerInternal* pParamInternal = &pSvcParam->sDependencyLayers[iDidIdx]; + pParamInternal->iCodingIndex ++; + } + pFbi->eFrameType = videoFrameTypeSkip; + pLayerBsInfo->eFrameType = videoFrameTypeSkip; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc] Frame timestamp = %lld, skip one frame due to preprocessing return (temporal layer settings or else)", + pSrcPic->uiTimeStamp); + return ENC_RETURN_SUCCESS; + } + + InitBitStream (pCtx); + pLayerBsInfo->pBsBuf = pCtx->pFrameBs ; + pLayerBsInfo->pNalLengthInByte = pCtx->pOut->pNalLen; + iCurDid = pSpatialIndexMap->iDid; + pCtx->pCurDqLayer = pCtx->ppDqLayerList[iCurDid]; + pCtx->pCurDqLayer->pRefLayer = NULL; + if (!pSvcParam->bSimulcastAVC) { + eFrameType = PrepareEncodeFrame (pCtx, pLayerBsInfo, iSpatialNum, iCurDid, iCurTid, iLayerNum, iFrameSize, + pFbi->uiTimeStamp); + if (eFrameType == videoFrameTypeSkip) { + pFbi->eFrameType = videoFrameTypeSkip; + pLayerBsInfo->eFrameType = videoFrameTypeSkip; + return ENC_RETURN_SUCCESS; + } + } else { + for (int32_t iDidIdx = 0; iDidIdx < pSvcParam->iSpatialLayerNum; iDidIdx++) { + SSpatialLayerInternal* pParamInternal = &pSvcParam->sDependencyLayers[iDidIdx]; + int32_t iTemporalId = GetTemporalLevel (pParamInternal, pParamInternal->iCodingIndex, + pSvcParam->uiGopSize); + if (iTemporalId == INVALID_TEMPORAL_ID) + pParamInternal->iCodingIndex ++; + } + } + + while (iSpatialIdx < iSpatialNum) { + iCurDid = (pSpatialIndexMap + iSpatialIdx)->iDid; + SSpatialLayerConfig* pParam = &pSvcParam->sSpatialLayers[iCurDid]; + SSpatialLayerInternal* pParamInternal = &pSvcParam->sDependencyLayers[iCurDid]; + int32_t iDecompositionStages = pSvcParam->sDependencyLayers[iCurDid].iDecompositionStages; + pCtx->pCurDqLayer = pCtx->ppDqLayerList[iCurDid]; + pCtx->uiDependencyId = iCurDid; + + if (pSvcParam->bSimulcastAVC) { + eFrameType = PrepareEncodeFrame (pCtx, pLayerBsInfo, iSpatialNum, iCurDid, iCurTid, iLayerNum, iFrameSize, + pFbi->uiTimeStamp); + if (eFrameType == videoFrameTypeSkip) { + pLayerBsInfo->eFrameType = videoFrameTypeSkip; + ++iSpatialIdx; + continue; + } + } + InitFrameCoding (pCtx, eFrameType, iCurDid); + pCtx->pVpp->AnalyzeSpatialPic (pCtx, iCurDid); + + pCtx->pEncPic = pEncPic = (pSpatialIndexMap + iSpatialIdx)->pSrc; + pCtx->pEncPic->iPictureType = pCtx->eSliceType; + pCtx->pEncPic->iFramePoc = pParamInternal->iPOC; + + iCurWidth = pParam->iVideoWidth; + iCurHeight = pParam->iVideoHeight; +#if defined(MT_DEBUG) + iDidList[iSpatialIdx] = iCurDid; +#endif + // Encoding this picture might mulitiple sQualityStat layers potentially be encoded as followed + switch (pParam->sSliceArgument.uiSliceMode) { + case SM_FIXEDSLCNUM_SLICE: { + if ((pSvcParam->iMultipleThreadIdc > 1) && + (pSvcParam->bUseLoadBalancing + && pSvcParam->iMultipleThreadIdc >= pSvcParam->sSpatialLayers[iCurDid].sSliceArgument.uiSliceNum) + ) { + if (iCurDid > 0) + AdjustEnhanceLayer (pCtx, iCurDid); + else + AdjustBaseLayer (pCtx); + } + + break; + } + case SM_SIZELIMITED_SLICE: { + int32_t iPicIPartitionNum = PicPartitionNumDecision (pCtx); + // MT compatibility + pCtx->iActiveThreadsNum = + iPicIPartitionNum; // we try to active number of threads, equal to number of picture partitions + WelsInitCurrentDlayerMltslc (pCtx, iPicIPartitionNum); + break; + } + default: { + break; + } + } + + /* coding each spatial layer, only one sQualityStat layer within spatial support */ + int32_t iSliceCount = 1; + if (iLayerNum >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info writing as follows + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) overflow(max:%d)!", iLayerNum, + MAX_LAYER_NUM_OF_FRAME); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + iNalIdxInLayer = 0; + bAvcBased = ((pSvcParam->bSimulcastAVC) || (iCurDid == BASE_DEPENDENCY_ID)); + pCtx->bNeedPrefixNalFlag = ((!pSvcParam->bSimulcastAVC) && (bAvcBased && + (pSvcParam->bPrefixNalAddingCtrl || + (pSvcParam->iSpatialLayerNum > 1)))); + + if (eFrameType == videoFrameTypeP) { + eNalType = bAvcBased ? NAL_UNIT_CODED_SLICE : NAL_UNIT_CODED_SLICE_EXT; + } else if (eFrameType == videoFrameTypeIDR) { + eNalType = bAvcBased ? NAL_UNIT_CODED_SLICE_IDR : NAL_UNIT_CODED_SLICE_EXT; + } + if (iCurTid == 0 || pCtx->eSliceType == I_SLICE) + eNalRefIdc = NRI_PRI_HIGHEST; + else if (iCurTid == iDecompositionStages) + eNalRefIdc = NRI_PRI_LOWEST; + else if (1 + iCurTid == iDecompositionStages) + eNalRefIdc = NRI_PRI_LOW; + else // more details for other temporal layers? + eNalRefIdc = NRI_PRI_HIGHEST; + pCtx->eNalType = eNalType; + pCtx->eNalPriority = eNalRefIdc; + + pCtx->pDecPic = pCtx->ppRefPicListExt[iCurDid]->pNextBuffer; +#if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC) + fsnr = pCtx->pDecPic; +#endif//#if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC) + pCtx->pDecPic->iPictureType = pCtx->eSliceType; + pCtx->pDecPic->iFramePoc = pParamInternal->iPOC; + + WelsInitCurrentLayer (pCtx, iCurWidth, iCurHeight); + + pCtx->pReferenceStrategy->MarkPic(); + if (!pCtx->pReferenceStrategy->BuildRefList (pParamInternal->iPOC, 0)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "WelsEncoderEncodeExt(), WelsBuildRefList failed for P frames, pCtx->iNumRef0= %d. ForceCodingIDR!", + pCtx->iNumRef0); + eFrameType = videoFrameTypeIDR; + pCtx->iEncoderError = ENC_RETURN_CORRECTED; + break; + } + if (pCtx->eSliceType != I_SLICE) { + pCtx->pReferenceStrategy->AfterBuildRefList(); + } +#ifdef LONG_TERM_REF_DUMP + DumpRef (pCtx); +#endif + if (pSvcParam->iRCMode != RC_OFF_MODE) + pCtx->pVpp->AnalyzePictureComplexity (pCtx, pCtx->pEncPic, ((pCtx->eSliceType == P_SLICE) + && (pCtx->iNumRef0 > 0)) ? pCtx->pRefList0[0] : NULL, + iCurDid, (pCtx->eSliceType == P_SLICE) && pSvcParam->bEnableBackgroundDetection); + WelsUpdateRefSyntax (pCtx, pParamInternal->iPOC, + eFrameType); //get reordering syntax used for writing slice header and transmit to encoder. + PrefetchReferencePicture (pCtx, eFrameType); // update reference picture for current pDq layer + pCtx->pFuncList->pfRc.pfWelsRcPictureInit (pCtx, pFbi->uiTimeStamp); + PreprocessSliceCoding (pCtx); // MUST be called after pfWelsRcPictureInit() and WelsInitCurrentLayer() + + //TODO Complexity Calculation here for screen content + iLayerSize = 0; + if (SM_SINGLE_SLICE == pParam->sSliceArgument.uiSliceMode) { // only one slice within a sQualityStat layer + int32_t iSliceSize = 0; + int32_t iPayloadSize = 0; + SSlice* pCurSlice = &pCtx->pCurDqLayer->sSliceBufferInfo[0].pSliceBuffer[0]; + + if (pCtx->bNeedPrefixNalFlag) { + pCtx->iEncoderError = AddPrefixNal (pCtx, pLayerBsInfo, &pLayerBsInfo->pNalLengthInByte[0], &iNalIdxInLayer, eNalType, + eNalRefIdc, + iPayloadSize); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + iLayerSize += iPayloadSize; + } + + WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc); + assert (0 == (int) pCurSlice->iSliceIdx); + pCtx->iEncoderError = SetSliceBoundaryInfo (pCtx->pCurDqLayer, pCurSlice, 0); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + + pCtx->iEncoderError = WelsCodeOneSlice (pCtx, pCurSlice, eNalType); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + + WelsUnloadNal (pCtx->pOut); + + pCtx->iEncoderError = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], + &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer, + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + iSliceSize = pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]; + + iLayerSize += iSliceSize; + pCtx->iPosBsBuffer += iSliceSize; + pLayerBsInfo->uiLayerType = VIDEO_CODING_LAYER; + pLayerBsInfo->uiSpatialId = iCurDid; + pLayerBsInfo->uiTemporalId = iCurTid; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->iNalCount = ++ iNalIdxInLayer; + pLayerBsInfo->eFrameType = eFrameType; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, eFrameType); + } + // for dynamic slicing single threading.. + else if ((SM_SIZELIMITED_SLICE == pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc <= 1)) { + const int32_t kiLastMbInFrame = pCtx->pCurDqLayer->sSliceEncCtx.iMbNumInFrame; + pCtx->iEncoderError = WelsCodeOnePicPartition (pCtx, pFbi, pLayerBsInfo, &iNalIdxInLayer, &iLayerSize, 0, + kiLastMbInFrame - 1, 0); + pLayerBsInfo->eFrameType = eFrameType; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, eFrameType); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + } else { + //other multi-slice uiSliceMode + // THREAD_FULLY_FIRE_MODE/THREAD_PICK_UP_MODE for any mode of non-SM_SIZELIMITED_SLICE + if ((SM_SIZELIMITED_SLICE != pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) { + iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer); + if (iLayerNum + 1 >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info for further writing as followed + WelsLog (pLogCtx, WELS_LOG_ERROR, + "WelsEncoderEncodeExt(), iLayerNum(%d) overflow(max:%d) at iDid= %d uiSliceMode= %d, iSliceCount= %d!", + iLayerNum, MAX_LAYER_NUM_OF_FRAME, iCurDid, pParam->sSliceArgument.uiSliceMode, iSliceCount); + return ENC_RETURN_UNSUPPORTED_PARA; + } + if (iSliceCount <= 1) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "WelsEncoderEncodeExt(), iSliceCount(%d) from GetCurrentSliceNum() is untrusted due stack/heap crupted!", + iSliceCount); + return ENC_RETURN_UNEXPECTED; + } + //note: the old codes are removed at commit: 3e0ee69 + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->uiLayerType = VIDEO_CODING_LAYER; + pLayerBsInfo->uiSpatialId = pCtx->uiDependencyId; + pLayerBsInfo->uiTemporalId = pCtx->uiTemporalId; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->iNalCount = 0; + pLayerBsInfo->eFrameType = eFrameType; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, eFrameType); + + pCtx->pTaskManage->ExecuteTasks(); + if (pCtx->iEncoderError) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "WelsEncoderEncodeExt(), multi-slice (mode %d) encoding error!", + pParam->sSliceArgument.uiSliceMode); + return pCtx->iEncoderError; + } + + iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount); + } + // THREAD_FULLY_FIRE_MODE && SM_SIZELIMITED_SLICE + else if ((SM_SIZELIMITED_SLICE == pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) { + const int32_t kiPartitionCnt = pCtx->iActiveThreadsNum; + + //TODO: use a function to remove duplicate code here and ln3994 + int32_t iLayerBsIdx = pCtx->pOut->iLayerBsIndex; + SLayerBSInfo* pLbi = &pFbi->sLayerInfo[iLayerBsIdx]; + pLbi->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLbi->uiLayerType = VIDEO_CODING_LAYER; + pLbi->uiSpatialId = pCtx->uiDependencyId; + pLbi->uiTemporalId = pCtx->uiTemporalId; + pLbi->uiQualityId = 0; + pLbi->iNalCount = 0; + pLbi->eFrameType = eFrameType; + pLbi->iSubSeqId = GetSubSequenceId (pCtx, eFrameType); + int32_t iIdx = 0; + while (iIdx < kiPartitionCnt) { + pCtx->pSliceThreading->pThreadPEncCtx[iIdx].pFrameBsInfo = pFbi; + pCtx->pSliceThreading->pThreadPEncCtx[iIdx].iSliceIndex = iIdx; + ++ iIdx; + } + + int32_t iRet = InitAllSlicesInThread (pCtx); + if (iRet) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "WelsEncoderEncodeExt(), multi-slice (mode %d) InitAllSlicesInThread() error!", + pParam->sSliceArgument.uiSliceMode); + return ENC_RETURN_UNEXPECTED; + } + pCtx->pTaskManage->ExecuteTasks(); + + if (pCtx->iEncoderError) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "WelsEncoderEncodeExt(), multi-slice (mode %d) encoding error = %d!", + pParam->sSliceArgument.uiSliceMode, pCtx->iEncoderError); + return pCtx->iEncoderError; + } + + iRet = SliceLayerInfoUpdate (pCtx, pFbi, pLayerBsInfo, pParam->sSliceArgument.uiSliceMode); + if (iRet) { + WelsLog (pLogCtx, WELS_LOG_ERROR, + "WelsEncoderEncodeExt(), multi-slice (mode %d) InitAllSlicesInThread() error!", + pParam->sSliceArgument.uiSliceMode); + return ENC_RETURN_UNEXPECTED; + } + + iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer); + iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount); + } else { // for non-dynamic-slicing mode single threading branch.. + const bool bNeedPrefix = pCtx->bNeedPrefixNalFlag; + int32_t iSliceIdx = 0; + SSlice* pCurSlice = NULL; + + iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer); + while (iSliceIdx < iSliceCount) { + int32_t iSliceSize = 0; + int32_t iPayloadSize = 0; + + if (bNeedPrefix) { + pCtx->iEncoderError = AddPrefixNal (pCtx, pLayerBsInfo, &pLayerBsInfo->pNalLengthInByte[0], &iNalIdxInLayer, eNalType, + eNalRefIdc, + iPayloadSize); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + iLayerSize += iPayloadSize; + } + + WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc); + + pCurSlice = &pCtx->pCurDqLayer->sSliceBufferInfo[0].pSliceBuffer[iSliceIdx]; + assert (iSliceIdx == pCurSlice->iSliceIdx); + pCtx->iEncoderError = SetSliceBoundaryInfo (pCtx->pCurDqLayer, pCurSlice, iSliceIdx); + + pCtx->iEncoderError = WelsCodeOneSlice (pCtx, pCurSlice, eNalType); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + + WelsUnloadNal (pCtx->pOut); + + pCtx->iEncoderError = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], + &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer, + pCtx->pFrameBs + pCtx->iPosBsBuffer, &pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + iSliceSize = pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]; + + pCtx->iPosBsBuffer += iSliceSize; + iLayerSize += iSliceSize; + +#if defined(SLICE_INFO_OUTPUT) + fprintf (stderr, + "@slice=%-6d sliceType:%c idc:%d size:%-6d\n", + iSliceIdx, + (pCtx->eSliceType == P_SLICE ? 'P' : 'I'), + eNalRefIdc, + iSliceSize); +#endif//SLICE_INFO_OUTPUT + ++ iNalIdxInLayer; + ++ iSliceIdx; + } + + pLayerBsInfo->uiLayerType = VIDEO_CODING_LAYER; + pLayerBsInfo->uiSpatialId = iCurDid; + pLayerBsInfo->uiTemporalId = iCurTid; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->iNalCount = iNalIdxInLayer; + pLayerBsInfo->eFrameType = eFrameType; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, eFrameType); + } + } + + if (NULL != pCtx->pFuncList->pfRc.pfWelsRcPostFrameSkipping + && pCtx->pFuncList->pfRc.pfWelsRcPostFrameSkipping (pCtx, iCurDid, pFbi->uiTimeStamp)) { + + StackBackEncoderStatus (pCtx, eFrameType); + ClearFrameBsInfo (pCtx, pFbi); + + iFrameSize = 0; + iLayerSize = 0; + iLayerNum = 0; + + if (pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip) { + pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip (pCtx, iSpatialNum); + } + + WelsRcPostFrameSkippedUpdate (pCtx, iCurDid); + pCtx->iEncoderError = ENC_RETURN_SUCCESS; + return ENC_RETURN_SUCCESS; + } + + // deblocking filter + if ( + (!pCtx->pCurDqLayer->bDeblockingParallelFlag) && +#if !defined(ENABLE_FRAME_DUMP) + ((eNalRefIdc != NRI_PRI_LOWEST) && (pSvcParam->sDependencyLayers[iCurDid].iHighestTemporalId == 0 + || iCurTid < pSvcParam->sDependencyLayers[iCurDid].iHighestTemporalId)) && +#endif//!ENABLE_FRAME_DUMP + true + ) { + PerformDeblockingFilter (pCtx); + } + + pCtx->pFuncList->pfRc.pfWelsRcPictureInfoUpdate (pCtx, iLayerSize); + iFrameSize += iLayerSize; + RcTraceFrameBits (pCtx, pFbi->uiTimeStamp, iFrameSize); + pCtx->pDecPic->iFrameAverageQp = pCtx->pWelsSvcRc[iCurDid].iAverageFrameQp; + + //update scc related + pCtx->pFuncList->pfUpdateFMESwitch (pCtx->pCurDqLayer); + + // reference picture list update + if (eNalRefIdc != NRI_PRI_LOWEST) { + if (!pCtx->pReferenceStrategy->UpdateRefList()) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "WelsEncoderEncodeExt(), WelsUpdateRefList failed. ForceCodingIDR!"); + //the above is to set the next frame to be IDR + pCtx->iEncoderError = ENC_RETURN_CORRECTED; + break; + } + } + + + //check MinCr + { + int32_t iMinCrFrameSize = (pParam->iVideoWidth * pParam->iVideoHeight * 3) >> 2; //MinCr = 2; + if (pParam->uiLevelIdc == LEVEL_3_1 || pParam->uiLevelIdc == LEVEL_3_2 || pParam->uiLevelIdc == LEVEL_4_0) + iMinCrFrameSize >>= 1; //MinCr = 4 + if (iFrameSize > iMinCrFrameSize) + WelsLog (pLogCtx, WELS_LOG_WARNING, + "WelsEncoderEncodeExt()MinCr Checking,codec bitstream size is larger than Level limitation"); + } +#ifdef ENABLE_FRAME_DUMP + { + DumpDependencyRec (fsnr, &pSvcParam->sDependencyLayers[iCurDid].sRecFileName[0], iCurDid, + pCtx->bDependencyRecFlag[iCurDid], pCtx->pCurDqLayer, pSvcParam->bSimulcastAVC); + pCtx->bDependencyRecFlag[iCurDid] = true; + } +#endif//ENABLE_FRAME_DUMP + +#if defined(ENABLE_PSNR_CALC) + fSnrY = WelsCalcPsnr (fsnr->pData[0], + fsnr->iLineSize[0], + pEncPic->pData[0], + pEncPic->iLineSize[0], + iCurWidth, + iCurHeight); + fSnrU = WelsCalcPsnr (fsnr->pData[1], + fsnr->iLineSize[1], + pEncPic->pData[1], + pEncPic->iLineSize[1], + (iCurWidth >> 1), + (iCurHeight >> 1)); + fSnrV = WelsCalcPsnr (fsnr->pData[2], + fsnr->iLineSize[2], + pEncPic->pData[2], + pEncPic->iLineSize[2], + (iCurWidth >> 1), + (iCurHeight >> 1)); +#endif//ENABLE_PSNR_CALC + +#if defined(LAYER_INFO_OUTPUT) + fprintf (stderr, "%2s %5d: %-5d %2s T%1d D%1d Q%-2d QP%3d Y%2.2f U%2.2f V%2.2f %8d bits\n", + (iSpatialIdx == 0) ? "#AU" : " ", + pParamInternal->iPOC, + pParamInternal->iFrameNum, + (eFrameType == videoFrameTypeI || eFrameType == videoFrameTypeIDR) ? "I" : "P", + iCurTid, + iCurDid, + 0, + pCtx->pWelsSvcRc[pCtx->uiDependencyId].iAverageFrameQp, + fSnrY, + fSnrU, + fSnrV, + (iLayerSize << 3)); +#endif//LAYER_INFO_OUTPUT + +#if defined(STAT_OUTPUT) + +#if defined(ENABLE_PSNR_CALC) + { + pCtx->sStatData[iCurDid][0].sQualityStat.rYPsnr[pCtx->eSliceType] += fSnrY; + pCtx->sStatData[iCurDid][0].sQualityStat.rUPsnr[pCtx->eSliceType] += fSnrU; + pCtx->sStatData[iCurDid][0].sQualityStat.rVPsnr[pCtx->eSliceType] += fSnrV; + } +#endif//ENABLE_PSNR_CALC + +#if defined(MB_TYPES_CHECK) //091025, frame output + if (pCtx->eSliceType == P_SLICE) { + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Intra4x4] += pCtx->sPerInfo.iMbCount[P_SLICE][Intra4x4]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Intra16x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Intra16x16]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter16x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter16x16]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter16x8] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter16x8]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter8x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter8x16]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter8x8] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter8x8]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][PSkip] += pCtx->sPerInfo.iMbCount[P_SLICE][PSkip]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][8] += pCtx->sPerInfo.iMbCount[P_SLICE][8]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][9] += pCtx->sPerInfo.iMbCount[P_SLICE][9]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][10] += pCtx->sPerInfo.iMbCount[P_SLICE][10]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][11] += pCtx->sPerInfo.iMbCount[P_SLICE][11]; + } else if (pCtx->eSliceType == I_SLICE) { + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][Intra4x4] += pCtx->sPerInfo.iMbCount[I_SLICE][Intra4x4]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][Intra16x16] += pCtx->sPerInfo.iMbCount[I_SLICE][Intra16x16]; + pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][7] += pCtx->sPerInfo.iMbCount[I_SLICE][7]; + } + + memset (pCtx->sPerInfo.iMbCount[P_SLICE], 0, 18 * sizeof (int32_t)); + memset (pCtx->sPerInfo.iMbCount[I_SLICE], 0, 18 * sizeof (int32_t)); + +#endif//MB_TYPES_CHECK + { + ++ pCtx->sStatData[iCurDid][0].sSliceData.iSliceCount[pCtx->eSliceType]; // for multiple slices coding + pCtx->sStatData[iCurDid][0].sSliceData.iSliceSize[pCtx->eSliceType] += (iLayerSize << 3); // bits + } +#endif//STAT_OUTPUT + + iCountNal = pLayerBsInfo->iNalCount; + ++ iLayerNum; + ++ pLayerBsInfo; + ++ pCtx->pOut->iLayerBsIndex; + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal; + + if (pSvcParam->iPaddingFlag && pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize > 0) { + int32_t iPaddingNalSize = 0; + pCtx->iEncoderError = WritePadding (pCtx, pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize, iPaddingNalSize); + WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS) + +#if GOM_TRACE_FLAG + WelsLog (pLogCtx, WELS_LOG_INFO, "[RC] dependency ID = %d,encoding_qp = %d Padding: %d", pCtx->uiDependencyId, + pCtx->iGlobalQp, + pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize); +#endif + if (iPaddingNalSize <= 0) + return ENC_RETURN_UNEXPECTED; + + pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingBitrateStat += pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize; + + pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize = 0; + + pLayerBsInfo->uiSpatialId = 0; + pLayerBsInfo->uiTemporalId = 0; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; + pLayerBsInfo->iNalCount = 1; + pLayerBsInfo->pNalLengthInByte[0] = iPaddingNalSize; + pLayerBsInfo->eFrameType = eFrameType; + pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, eFrameType); + ++ pLayerBsInfo; + ++ pCtx->pOut->iLayerBsIndex; + pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; + pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + 1; + ++ iLayerNum; + + iFrameSize += iPaddingNalSize; + } + + if ((pParam->sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE) + && pSvcParam->bUseLoadBalancing + && pSvcParam->iMultipleThreadIdc > 1 && + pSvcParam->iMultipleThreadIdc >= pParam->sSliceArgument.uiSliceNum) { + CalcSliceComplexRatio (pCtx->pCurDqLayer); +#if defined(MT_DEBUG) + TrackSliceComplexities (pCtx, iCurDid); +#endif//#if defined(MT_DEBUG) + } + + pCtx->eLastNalPriority[iCurDid] = eNalRefIdc; + ++ iSpatialIdx; + + if (iCurDid + 1 < pSvcParam->iSpatialLayerNum) { + //for next layer, note that iSpatialIdx has been ++ so it is pointer to next layer + WelsSwapDqLayers (pCtx, (pSpatialIndexMap + iSpatialIdx)->iDid); + } + + if (pCtx->pVpp->UpdateSpatialPictures (pCtx, pSvcParam, iCurTid, iCurDid) != 0) { + ForceCodingIDR (pCtx, iCurDid); + WelsLog (pLogCtx, WELS_LOG_WARNING, + "WelsEncoderEncodeExt(), Logic Error Found in Preprocess updating. ForceCodingIDR!"); + //the above is to set the next frame IDR + pFbi->eFrameType = eFrameType; + pLayerBsInfo->eFrameType = eFrameType; + return ENC_RETURN_CORRECTED; + } + + if (pSvcParam->bEnableLongTermReference && ((pCtx->pLtr[pCtx->uiDependencyId].bLTRMarkingFlag + && (pCtx->pLtr[pCtx->uiDependencyId].iLTRMarkMode == LTR_DIRECT_MARK)) || eFrameType == videoFrameTypeIDR)) { + pCtx->bRefOfCurTidIsLtr[iCurDid][iCurTid] = true; + } + if (pSvcParam->bSimulcastAVC) + ++ pParamInternal->iCodingIndex; + }//end of (iSpatialIdx/iSpatialNum) + + if (!pSvcParam->bSimulcastAVC) { + for (int32_t i = 0; i < pSvcParam->iSpatialLayerNum; i++) { + SSpatialLayerInternal* pParamInternal = &pSvcParam->sDependencyLayers[i]; + pParamInternal->iCodingIndex ++; + } + } + + if (ENC_RETURN_CORRECTED == pCtx->iEncoderError) { + pCtx->pVpp->UpdateSpatialPictures (pCtx, pSvcParam, iCurTid, (pSpatialIndexMap + iSpatialIdx)->iDid); + ForceCodingIDR (pCtx, (pSpatialIndexMap + iSpatialIdx)->iDid); + WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), Logic Error Found in temporal level. ForceCodingIDR!"); + //the above is to set the next frame IDR + pFbi->eFrameType = eFrameType; + pLayerBsInfo->eFrameType = eFrameType; + return ENC_RETURN_CORRECTED; + } + +#if defined(MT_DEBUG) + TrackSliceConsumeTime (pCtx, iDidList, iSpatialNum); +#endif//MT_DEBUG + + // to check number of layers / nals / slices dependencies + if (iLayerNum > MAX_LAYER_NUM_OF_FRAME) { + WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", + iLayerNum, MAX_LAYER_NUM_OF_FRAME); + return 1; + } + + + pFbi->iLayerNum = iLayerNum; + + WelsLog (pLogCtx, WELS_LOG_DEBUG, "WelsEncoderEncodeExt() OutputInfo iLayerNum = %d,iFrameSize = %d", + iLayerNum, iFrameSize); + for (int32_t i = 0; i < iLayerNum; i++) + WelsLog (pLogCtx, WELS_LOG_DEBUG, + "WelsEncoderEncodeExt() OutputInfo iLayerId = %d,iNalType = %d,iNalCount = %d, first Nal Length=%d,uiSpatialId = %d,uiTemporalId = %d,iSubSeqId = %d", + i, + pFbi->sLayerInfo[i].uiLayerType, pFbi->sLayerInfo[i].iNalCount, pFbi->sLayerInfo[i].pNalLengthInByte[0], + pFbi->sLayerInfo[i].uiSpatialId, pFbi->sLayerInfo[i].uiTemporalId, pFbi->sLayerInfo[i].iSubSeqId); + WelsEmms(); + + pLayerBsInfo->eFrameType = eFrameType; + pFbi->iFrameSizeInBytes = iFrameSize; + pFbi->eFrameType = eFrameType; + for (int32_t k = 0; k < pFbi->iLayerNum; k++) { + if (pFbi->eFrameType != pFbi->sLayerInfo[k].eFrameType) { + pFbi->eFrameType = videoFrameTypeIPMixed; + } + } +#ifdef _DEBUG + if (pFbi->iLayerNum > MAX_LAYER_NUM_OF_FRAME) { + WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", + pFbi->iLayerNum, MAX_LAYER_NUM_OF_FRAME); + return ENC_RETURN_UNEXPECTED; + } + + int32_t iTotalNal = 0; + for (int32_t k = 0; k < pFbi->iLayerNum; k++) { + iTotalNal += pFbi->sLayerInfo[k].iNalCount; + + if ((pCtx->iActiveThreadsNum > 1) && (MAX_NAL_UNITS_IN_LAYER < pFbi->sLayerInfo[k].iNalCount)) { + WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, + "WelsEncoderEncodeExt(), iCountNumNals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) under multi-thread(%d) NOT supported!", + pFbi->sLayerInfo[k].iNalCount, MAX_NAL_UNITS_IN_LAYER, pCtx->iActiveThreadsNum); + return ENC_RETURN_UNEXPECTED; + } + } + + if (iTotalNal > pCtx->pOut->iCountNals) { + WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iTotalNal(%d) > iCountNals(%d)!", + iTotalNal, pCtx->pOut->iCountNals); + return ENC_RETURN_UNEXPECTED; + } +#endif + return ENC_RETURN_SUCCESS; +} + +/*! + * \brief Wels SVC encoder parameters adjustment + * SVC adjustment results in new requirement in memory blocks adjustment + */ +int32_t WelsEncoderParamAdjust (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pNewParam) { + SWelsSvcCodingParam* pOldParam = NULL; + int32_t iReturn = ENC_RETURN_SUCCESS; + int8_t iIndexD = 0; + bool bNeedReset = false; + int16_t iSliceNum = 1; // number of slices used + int32_t iCacheLineSize = 16; // on chip cache line size in byte + uint32_t uiCpuFeatureFlags = 0; + + if (NULL == ppCtx || NULL == *ppCtx || NULL == pNewParam) return 1; + + /* Check validation in new parameters */ + iReturn = ParamValidationExt (& (*ppCtx)->sLogCtx, pNewParam); + if (iReturn != ENC_RETURN_SUCCESS) return iReturn; + + iReturn = GetMultipleThreadIdc (& (*ppCtx)->sLogCtx, pNewParam, iSliceNum, iCacheLineSize, uiCpuFeatureFlags); + if (iReturn != ENC_RETURN_SUCCESS) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, "WelsEncoderParamAdjust(), GetMultipleThreadIdc failed return %d.", + iReturn); + return iReturn; + } + + pOldParam = (*ppCtx)->pSvcParam; + + if (pOldParam->iUsageType != pNewParam->iUsageType) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, + "WelsEncoderParamAdjust(), does not expect in-middle change of iUsgaeType from %d to %d", pOldParam->iUsageType, + pNewParam->iUsageType); + return ENC_RETURN_UNSUPPORTED_PARA; + } + + /* Decide whether need reset for IDR frame based on adjusting prarameters changed */ + /* Temporal levels, spatial settings and/ or quality settings changed need update parameter sets related. */ + bNeedReset = (pOldParam == NULL) || + (pOldParam->bSimulcastAVC != pNewParam->bSimulcastAVC) || + (pOldParam->iSpatialLayerNum != pNewParam->iSpatialLayerNum) || + (pOldParam->iPicWidth != pNewParam->iPicWidth + || pOldParam->iPicHeight != pNewParam->iPicHeight) || + (pOldParam->SUsedPicRect.iWidth != pNewParam->SUsedPicRect.iWidth + || pOldParam->SUsedPicRect.iHeight != pNewParam->SUsedPicRect.iHeight) || + (pOldParam->bEnableLongTermReference != pNewParam->bEnableLongTermReference) || + (pOldParam->iLTRRefNum != pNewParam->iLTRRefNum) || + (pOldParam->iMultipleThreadIdc != pNewParam->iMultipleThreadIdc) || + (pOldParam->bEnableBackgroundDetection != pNewParam->bEnableBackgroundDetection) || + (pOldParam->bEnableAdaptiveQuant != pNewParam->bEnableAdaptiveQuant) || + (pOldParam->eSpsPpsIdStrategy != pNewParam->eSpsPpsIdStrategy); + if ((pNewParam->iMaxNumRefFrame > pOldParam->iMaxNumRefFrame) || + ((pOldParam->iMaxNumRefFrame == 1) && (pOldParam->iTemporalLayerNum == 1) && (pNewParam->iTemporalLayerNum == 2))) { + bNeedReset = true; + } + if (bNeedReset) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsEncoderParamAdjust(),bSimulcastAVC(%d,%d),iSpatialLayerNum(%d,%d),iPicWidth(%d,%d),iPicHeight(%d,%d),Rect.iWidth(%d,%d),Rect.iHeight(%d,%d)", + pOldParam->bSimulcastAVC, pNewParam->bSimulcastAVC, + pOldParam->iSpatialLayerNum, pNewParam->iSpatialLayerNum, + pOldParam->iPicWidth, pNewParam->iPicWidth, + pOldParam->iPicHeight, pNewParam->iPicHeight, + pOldParam->SUsedPicRect.iWidth, pNewParam->SUsedPicRect.iWidth, + pOldParam->SUsedPicRect.iHeight, pNewParam->SUsedPicRect.iHeight); + + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsEncoderParamAdjust(),bEnableLongTermReference(%d,%d),iLTRRefNum(%d,%d),iMultipleThreadIdc(%d,%d),bEnableBackgroundDetection(%d,%d),bEnableAdaptiveQuant(%d,%d),eSpsPpsIdStrategy(%d,%d),iMaxNumRefFrame(%d,%d),iTemporalLayerNum(%d,%d)", + pOldParam->bEnableLongTermReference, pNewParam->bEnableLongTermReference, + pOldParam->iLTRRefNum, pNewParam->iLTRRefNum, + pOldParam->iMultipleThreadIdc, pNewParam->iMultipleThreadIdc, + pOldParam->bEnableBackgroundDetection, pNewParam->bEnableBackgroundDetection, + pOldParam->bEnableAdaptiveQuant, pNewParam->bEnableAdaptiveQuant, + pOldParam->eSpsPpsIdStrategy, pNewParam->eSpsPpsIdStrategy, + pOldParam->iMaxNumRefFrame, pNewParam->iMaxNumRefFrame, + pOldParam->iTemporalLayerNum, pNewParam->iTemporalLayerNum); + } + if (!bNeedReset) { // Check its picture resolutions/quality settings respectively in each dependency layer + iIndexD = 0; + assert (pOldParam->iSpatialLayerNum == pNewParam->iSpatialLayerNum); + do { + const SSpatialLayerInternal* kpOldDlp = &pOldParam->sDependencyLayers[iIndexD]; + const SSpatialLayerInternal* kpNewDlp = &pNewParam->sDependencyLayers[iIndexD]; + float fT1 = .0f; + float fT2 = .0f; + + // check frame size settings + if (pOldParam->sSpatialLayers[iIndexD].iVideoWidth != pNewParam->sSpatialLayers[iIndexD].iVideoWidth || + pOldParam->sSpatialLayers[iIndexD].iVideoHeight != pNewParam->sSpatialLayers[iIndexD].iVideoHeight || + kpOldDlp->iActualWidth != kpNewDlp->iActualWidth || + kpOldDlp->iActualHeight != kpNewDlp->iActualHeight) { + bNeedReset = true; + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsEncoderParamAdjust(),iIndexD = %d,sSpatialLayers.wxh_old(%d,%d),sSpatialLayers.wxh_new(%d,%d),iActualwxh_old(%d,%d),iActualwxh_new(%d,%d)", + iIndexD, pOldParam->sSpatialLayers[iIndexD].iVideoWidth, pOldParam->sSpatialLayers[iIndexD].iVideoHeight, + pNewParam->sSpatialLayers[iIndexD].iVideoWidth, pNewParam->sSpatialLayers[iIndexD].iVideoHeight, + kpOldDlp->iActualWidth, kpOldDlp->iActualHeight, + kpNewDlp->iActualWidth, kpNewDlp->iActualHeight); + break; + } + + if (pOldParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceMode != + pNewParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceMode + || + pOldParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceNum != + pNewParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceNum) { + + bNeedReset = true; + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsEncoderParamAdjust(),iIndexD = %d,uiSliceMode (%d,%d),uiSliceNum(%d,%d)", iIndexD, + pOldParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceMode, + pNewParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceMode, + pOldParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceNum, + pNewParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceNum); + + break; + } + + // check frame rate + // we can not check whether corresponding fFrameRate is equal or not, + // only need to check d_max/d_min and max_fr/d_max whether it is equal or not + if (kpNewDlp->fInputFrameRate > EPSN && kpOldDlp->fInputFrameRate > EPSN) + fT1 = kpNewDlp->fOutputFrameRate / kpNewDlp->fInputFrameRate - kpOldDlp->fOutputFrameRate / kpOldDlp->fInputFrameRate; + if (kpNewDlp->fOutputFrameRate > EPSN && kpOldDlp->fOutputFrameRate > EPSN) + fT2 = pNewParam->fMaxFrameRate / kpNewDlp->fOutputFrameRate - pOldParam->fMaxFrameRate / kpOldDlp->fOutputFrameRate; + if (fT1 > EPSN || fT1 < -EPSN || fT2 > EPSN || fT2 < -EPSN) { + bNeedReset = true; + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsEncoderParamAdjust() iIndexD = %d,fInputFrameRate(%f,%f),fOutputFrameRate(%f,%f),fMaxFrameRate(%f,%f)", iIndexD, + kpOldDlp->fInputFrameRate, kpNewDlp->fInputFrameRate, + kpOldDlp->fOutputFrameRate, kpNewDlp->fOutputFrameRate, + pOldParam->fMaxFrameRate, pNewParam->fMaxFrameRate); + break; + } + if (pOldParam->sSpatialLayers[iIndexD].uiProfileIdc != pNewParam->sSpatialLayers[iIndexD].uiProfileIdc) { + bNeedReset = true; + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsEncoderParamAdjust(),iIndexD = %d,uiProfileIdc(%d,%d)", iIndexD, + pOldParam->sSpatialLayers[iIndexD].uiProfileIdc, pNewParam->sSpatialLayers[iIndexD].uiProfileIdc); + break; + } + //check level change,if new level is smaller than old level,don't reset encoder. still use old level. + + if (pNewParam->sSpatialLayers[iIndexD].uiLevelIdc > pOldParam->sSpatialLayers[iIndexD].uiLevelIdc) { + bNeedReset = true; + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, + "WelsEncoderParamAdjust(),iIndexD = %d,uiLevelIdc(%d,%d)", iIndexD, + pOldParam->sSpatialLayers[iIndexD].uiLevelIdc, pNewParam->sSpatialLayers[iIndexD].uiLevelIdc); + break; + } + ++ iIndexD; + } while (iIndexD < pOldParam->iSpatialLayerNum); + } + + if (bNeedReset) { + SLogContext sLogCtx = (*ppCtx)->sLogCtx; + + int32_t iOldSpsPpsIdStrategy = pOldParam->eSpsPpsIdStrategy; + SParaSetOffsetVariable sTmpPsoVariable[PARA_SET_TYPE]; + int32_t iTmpPpsIdList[MAX_DQ_LAYER_NUM * MAX_PPS_COUNT]; + //for LTR or SPS,PPS ID update + uint16_t uiMaxIdrPicId = 0; + for (iIndexD = 0; iIndexD < pOldParam->iSpatialLayerNum; iIndexD++) { + if (pOldParam->sDependencyLayers[iIndexD].uiIdrPicId > uiMaxIdrPicId) + uiMaxIdrPicId = pOldParam->sDependencyLayers[iIndexD].uiIdrPicId; + } + + //for sEncoderStatistics + SEncoderStatistics sTempEncoderStatistics[MAX_DEPENDENCY_LAYER]; + memcpy (sTempEncoderStatistics, (*ppCtx)->sEncoderStatistics, sizeof (sTempEncoderStatistics)); + int64_t uiStartTimestamp = (*ppCtx)->uiStartTimestamp; + int32_t iStatisticsLogInterval = (*ppCtx)->iStatisticsLogInterval; + int64_t iLastStatisticsLogTs = (*ppCtx)->iLastStatisticsLogTs; + //for sEncoderStatistics + + SExistingParasetList sExistingParasetList; + SExistingParasetList* pExistingParasetList = NULL; + + if (((CONSTANT_ID != iOldSpsPpsIdStrategy) && (CONSTANT_ID != pNewParam->eSpsPpsIdStrategy))) { + (*ppCtx)->pFuncList->pParametersetStrategy->OutputCurrentStructure (sTmpPsoVariable, iTmpPpsIdList, (*ppCtx), + &sExistingParasetList); + + if ((SPS_LISTING & iOldSpsPpsIdStrategy) + && (SPS_LISTING & pNewParam->eSpsPpsIdStrategy)) { + pExistingParasetList = &sExistingParasetList; + } + } + + WelsUninitEncoderExt (ppCtx); + + /* Update new parameters */ + if (WelsInitEncoderExt (ppCtx, pNewParam, &sLogCtx, pExistingParasetList)) + return 1; + //if WelsInitEncoderExt succeed + //for LTR or SPS,PPS ID update + for (iIndexD = 0; iIndexD < pNewParam->iSpatialLayerNum; iIndexD++) { + (*ppCtx)->pSvcParam->sDependencyLayers[iIndexD].uiIdrPicId = uiMaxIdrPicId; + } + + //for sEncoderStatistics + memcpy ((*ppCtx)->sEncoderStatistics, sTempEncoderStatistics, sizeof (sTempEncoderStatistics)); + (*ppCtx)->uiStartTimestamp = uiStartTimestamp; + (*ppCtx)->iStatisticsLogInterval = iStatisticsLogInterval; + (*ppCtx)->iLastStatisticsLogTs = iLastStatisticsLogTs; + //for sEncoderStatistics + + //load back the needed structure for eSpsPpsIdStrategy + if (((CONSTANT_ID != iOldSpsPpsIdStrategy) && (CONSTANT_ID != pNewParam->eSpsPpsIdStrategy)) + || ((SPS_PPS_LISTING == iOldSpsPpsIdStrategy) + && (SPS_PPS_LISTING == pNewParam->eSpsPpsIdStrategy))) { + (*ppCtx)->pFuncList->pParametersetStrategy->LoadPreviousStructure (sTmpPsoVariable, iTmpPpsIdList); + } + } else { + /* maybe adjustment introduced in bitrate or little settings adjustment and so on.. */ + pNewParam->iNumRefFrame = WELS_CLIP3 (pNewParam->iNumRefFrame, MIN_REF_PIC_COUNT, + (pNewParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA : + MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN)); + pNewParam->iLoopFilterDisableIdc = WELS_CLIP3 (pNewParam->iLoopFilterDisableIdc, 0, 6); + pNewParam->iLoopFilterAlphaC0Offset = WELS_CLIP3 (pNewParam->iLoopFilterAlphaC0Offset, -6, 6); + pNewParam->iLoopFilterBetaOffset = WELS_CLIP3 (pNewParam->iLoopFilterBetaOffset, -6, 6); + pNewParam->fMaxFrameRate = WELS_CLIP3 (pNewParam->fMaxFrameRate, MIN_FRAME_RATE, MAX_FRAME_RATE); + + // we can not use direct struct based memcpy due some fields need keep unchanged as before + pOldParam->fMaxFrameRate = pNewParam->fMaxFrameRate; // maximal frame rate [Hz / fps] + pOldParam->iComplexityMode = pNewParam->iComplexityMode; // color space of input sequence + pOldParam->uiIntraPeriod = pNewParam->uiIntraPeriod; // intra period (multiple of GOP size as desired) + pOldParam->eSpsPpsIdStrategy = pNewParam->eSpsPpsIdStrategy; + pOldParam->bPrefixNalAddingCtrl = pNewParam->bPrefixNalAddingCtrl; + pOldParam->iNumRefFrame = pNewParam->iNumRefFrame; // number of reference frame used + pOldParam->uiGopSize = pNewParam->uiGopSize; + if (pOldParam->iTemporalLayerNum != pNewParam->iTemporalLayerNum) { + pOldParam->iTemporalLayerNum = pNewParam->iTemporalLayerNum; + for (int32_t iIndexD = 0; iIndexD < MAX_DEPENDENCY_LAYER; iIndexD++) + pOldParam->sDependencyLayers[iIndexD].iCodingIndex = 0; + } + pOldParam->iDecompStages = pNewParam->iDecompStages; + /* denoise control */ + pOldParam->bEnableDenoise = pNewParam->bEnableDenoise; + + /* background detection control */ + pOldParam->bEnableBackgroundDetection = pNewParam->bEnableBackgroundDetection; + + /* adaptive quantization control */ + pOldParam->bEnableAdaptiveQuant = pNewParam->bEnableAdaptiveQuant; + + /* int32_t term reference control */ + pOldParam->bEnableLongTermReference = pNewParam->bEnableLongTermReference; + pOldParam->iLtrMarkPeriod = pNewParam->iLtrMarkPeriod; + + // keep below values unchanged as before + pOldParam->bEnableSSEI = pNewParam->bEnableSSEI; + pOldParam->bSimulcastAVC = pNewParam->bSimulcastAVC; + pOldParam->bEnableFrameCroppingFlag = pNewParam->bEnableFrameCroppingFlag; // enable frame cropping flag + + /* Motion search */ + + /* Deblocking loop filter */ + pOldParam->iLoopFilterDisableIdc = + pNewParam->iLoopFilterDisableIdc; // 0: on, 1: off, 2: on except for slice boundaries + pOldParam->iLoopFilterAlphaC0Offset = pNewParam->iLoopFilterAlphaC0Offset;// AlphaOffset: valid range [-6, 6], default 0 + pOldParam->iLoopFilterBetaOffset = + pNewParam->iLoopFilterBetaOffset; // BetaOffset: valid range [-6, 6], default 0 + + /* Rate Control */ + pOldParam->iRCMode = pNewParam->iRCMode; + pOldParam->iTargetBitrate = + pNewParam->iTargetBitrate; // overall target bitrate introduced in RC module + pOldParam->iPaddingFlag = pNewParam->iPaddingFlag; + + /* Layer definition */ + pOldParam->bPrefixNalAddingCtrl = pNewParam->bPrefixNalAddingCtrl; + + // d + iIndexD = 0; + do { + SSpatialLayerInternal* pOldDlpInternal = &pOldParam->sDependencyLayers[iIndexD]; + SSpatialLayerInternal* pNewDlpInternal = &pNewParam->sDependencyLayers[iIndexD]; + + SSpatialLayerConfig* pOldDlp = &pOldParam->sSpatialLayers[iIndexD]; + SSpatialLayerConfig* pNewDlp = &pNewParam->sSpatialLayers[iIndexD]; + + pOldDlpInternal->fInputFrameRate = pNewDlpInternal->fInputFrameRate; // input frame rate + pOldDlpInternal->fOutputFrameRate = pNewDlpInternal->fOutputFrameRate; // output frame rate + pOldDlp->iSpatialBitrate = pNewDlp->iSpatialBitrate; + pOldDlp->iMaxSpatialBitrate = pNewDlp->iMaxSpatialBitrate; + pOldDlp->uiProfileIdc = + pNewDlp->uiProfileIdc; // value of profile IDC (0 for auto-detection) + pOldDlp->iDLayerQp = pNewDlp->iDLayerQp; + + /* Derived variants below */ + pOldDlpInternal->iTemporalResolution = pNewDlpInternal->iTemporalResolution; + pOldDlpInternal->iDecompositionStages = pNewDlpInternal->iDecompositionStages; + memcpy (pOldDlpInternal->uiCodingIdx2TemporalId, pNewDlpInternal->uiCodingIdx2TemporalId, + sizeof (pOldDlpInternal->uiCodingIdx2TemporalId)); // confirmed_safe_unsafe_usage + ++ iIndexD; + } while (iIndexD < pOldParam->iSpatialLayerNum); + } + + /* Any else initialization/reset for rate control here? */ + + return 0; +} + +int32_t WelsEncoderApplyLTR (SLogContext* pLogCtx, sWelsEncCtx** ppCtx, SLTRConfig* pLTRValue) { + SWelsSvcCodingParam sConfig; + int32_t iNumRefFrame = 1; + int32_t iRet = 0; + memcpy (&sConfig, (*ppCtx)->pSvcParam, sizeof (SWelsSvcCodingParam)); + sConfig.bEnableLongTermReference = pLTRValue->bEnableLongTermReference; + sConfig.iLTRRefNum = pLTRValue->iLTRRefNum; + int32_t uiGopSize = 1 << (sConfig.iTemporalLayerNum - 1); + if (sConfig.iUsageType == SCREEN_CONTENT_REAL_TIME) { + if (sConfig.bEnableLongTermReference) { + sConfig.iLTRRefNum = LONG_TERM_REF_NUM_SCREEN;//WELS_CLIP3 (sConfig.iLTRRefNum, 1, LONG_TERM_REF_NUM_SCREEN); + iNumRefFrame = WELS_MAX (1, WELS_LOG2 (uiGopSize)) + sConfig.iLTRRefNum; + } else { + sConfig.iLTRRefNum = 0; + iNumRefFrame = WELS_MAX (1, uiGopSize >> 1); + } + } else { + if (sConfig.bEnableLongTermReference) { + sConfig.iLTRRefNum = LONG_TERM_REF_NUM;//WELS_CLIP3 (sConfig.iLTRRefNum, 1, LONG_TERM_REF_NUM); + } else { + sConfig.iLTRRefNum = 0; + } + iNumRefFrame = ((uiGopSize >> 1) > 1) ? ((uiGopSize >> 1) + sConfig.iLTRRefNum) : (MIN_REF_PIC_COUNT + + sConfig.iLTRRefNum); + iNumRefFrame = WELS_CLIP3 (iNumRefFrame, MIN_REF_PIC_COUNT, MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA); + + } + if (iNumRefFrame > sConfig.iMaxNumRefFrame) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + " CWelsH264SVCEncoder::SetOption LTR flag = %d and number = %d: Required number of reference increased to %d and iMaxNumRefFrame is adjusted (from %d)", + sConfig.bEnableLongTermReference, sConfig.iLTRRefNum, iNumRefFrame, sConfig.iMaxNumRefFrame); + sConfig.iMaxNumRefFrame = iNumRefFrame; + } + + if (sConfig.iNumRefFrame < iNumRefFrame) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + " CWelsH264SVCEncoder::SetOption LTR flag = %d and number = %d, Required number of reference increased from Old = %d to New = %d because of LTR setting", + sConfig.bEnableLongTermReference, sConfig.iLTRRefNum, sConfig.iNumRefFrame, iNumRefFrame); + sConfig.iNumRefFrame = iNumRefFrame; + } + WelsLog (pLogCtx, WELS_LOG_INFO, "CWelsH264SVCEncoder::SetOption enable LTR = %d,ltrnum = %d", + sConfig.bEnableLongTermReference, sConfig.iLTRRefNum); + iRet = WelsEncoderParamAdjust (ppCtx, &sConfig); + return iRet; +} + +int32_t DynSliceRealloc (sWelsEncCtx* pCtx, + SFrameBSInfo* pFrameBsInfo, + SLayerBSInfo* pLayerBsInfo) { + int32_t iRet = 0; + + iRet = FrameBsRealloc (pCtx, pFrameBsInfo, pLayerBsInfo, pCtx->pCurDqLayer->iMaxSliceNum); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + iRet = ReallocSliceBuffer (pCtx); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + return iRet; +} + +int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx, + SFrameBSInfo* pFrameBSInfo, + SLayerBSInfo* pLayerBsInfo, + int32_t* pNalIdxInLayer, + int32_t* pLayerSize, + int32_t iFirstMbIdxInPartition, + int32_t iEndMbIdxInPartition, + int32_t iStartSliceIdx + ) { + + SDqLayer* pCurLayer = pCtx->pCurDqLayer; + uint32_t uSlcBuffIdx = 0; + SSlice* pStartSlice = &pCurLayer->sSliceBufferInfo[uSlcBuffIdx].pSliceBuffer[iStartSliceIdx]; + int32_t iNalIdxInLayer = *pNalIdxInLayer; + int32_t iSliceIdx = iStartSliceIdx; + const int32_t kiSliceStep = pCtx->iActiveThreadsNum; + const int32_t kiPartitionId = iStartSliceIdx % kiSliceStep; + int32_t iPartitionBsSize = 0; + int32_t iAnyMbLeftInPartition = iEndMbIdxInPartition - iFirstMbIdxInPartition + 1; + const EWelsNalUnitType keNalType = pCtx->eNalType; + const EWelsNalRefIdc keNalRefIdc = pCtx->eNalPriority; + const bool kbNeedPrefix = pCtx->bNeedPrefixNalFlag; + const int32_t kiSliceIdxStep = pCtx->iActiveThreadsNum; + int32_t iReturn = ENC_RETURN_SUCCESS; + + pStartSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = iFirstMbIdxInPartition; + + while (iAnyMbLeftInPartition > 0) { + int32_t iSliceSize = 0; + int32_t iPayloadSize = 0; + SSlice* pCurSlice = NULL; + + if (iSliceIdx >= (pCurLayer->sSliceBufferInfo[uSlcBuffIdx].iMaxSliceNum - + kiSliceIdxStep)) { // insufficient memory in pSliceInLayer[] + if (pCtx->iActiveThreadsNum == 1) { + //only single thread support re-alloc now + if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "CWelsH264SVCEncoder::WelsCodeOnePicPartition: DynSliceRealloc not successful"); + return ENC_RETURN_MEMALLOCERR; + } + } else if (iSliceIdx >= pCurLayer->iMaxSliceNum) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "CWelsH264SVCEncoder::WelsCodeOnePicPartition: iSliceIdx(%d) over iMaxSliceNum(%d)", iSliceIdx, + pCurLayer->iMaxSliceNum); + return ENC_RETURN_MEMALLOCERR; + } + } + + if (kbNeedPrefix) { + iReturn = AddPrefixNal (pCtx, pLayerBsInfo, &pLayerBsInfo->pNalLengthInByte[0], &iNalIdxInLayer, keNalType, keNalRefIdc, + iPayloadSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + iPartitionBsSize += iPayloadSize; + } + + WelsLoadNal (pCtx->pOut, keNalType, keNalRefIdc); + pCurSlice = &pCtx->pCurDqLayer->sSliceBufferInfo[uSlcBuffIdx].pSliceBuffer[iSliceIdx]; + pCurSlice->iSliceIdx = iSliceIdx; + + iReturn = WelsCodeOneSlice (pCtx, pCurSlice, keNalType); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + WelsUnloadNal (pCtx->pOut); + + iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], + &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, + pCtx->iFrameBsSize - pCtx->iPosBsBuffer, + pCtx->pFrameBs + pCtx->iPosBsBuffer, + &pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + iSliceSize = pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]; + + pCtx->iPosBsBuffer += iSliceSize; + iPartitionBsSize += iSliceSize; + +#if defined(SLICE_INFO_OUTPUT) + fprintf (stderr, + "@slice=%-6d sliceType:%c idc:%d size:%-6d\n", + iSliceIdx, + (pCtx->eSliceType == P_SLICE ? 'P' : 'I'), + keNalRefIdc, + iSliceSize); +#endif//SLICE_INFO_OUTPUT + + ++ iNalIdxInLayer; + iSliceIdx += kiSliceStep; //if iSliceIdx is not continuous + iAnyMbLeftInPartition = iEndMbIdxInPartition - pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId]; + } + + *pLayerSize = iPartitionBsSize; + *pNalIdxInLayer = iNalIdxInLayer; + + // slice based packing??? + pLayerBsInfo->uiLayerType = VIDEO_CODING_LAYER; + pLayerBsInfo->uiSpatialId = pCtx->uiDependencyId; + pLayerBsInfo->uiTemporalId = pCtx->uiTemporalId; + pLayerBsInfo->uiQualityId = 0; + pLayerBsInfo->iNalCount = iNalIdxInLayer; + return ENC_RETURN_SUCCESS; +} +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/get_intra_predictor.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/get_intra_predictor.cpp new file mode 100644 index 000000000..ab6de930b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/get_intra_predictor.cpp @@ -0,0 +1,738 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file get_intra_predictor.c + * + * \brief implementation for get intra predictor about 16x16, 4x4, chroma. + * + * \date 4/2/2009 Created + * 9/14/2009 C level based optimization with high performance gained. + * [const, using ST32/ST64 to replace memset, memcpy and memmove etc.] + * + ************************************************************************************* + */ +#include "ls_defines.h" +#include "cpu_core.h" +#include "intra_pred_common.h" +#include "get_intra_predictor.h" + +namespace WelsEnc { +#define I4x4_COUNT 4 +#define I8x8_COUNT 8 +#define I16x16_COUNT 16 + +typedef void (*PFillingPred) (uint8_t* pPred, uint8_t* pSrc); +typedef void (*PFillingPred1to16) (uint8_t* pPred, const uint8_t kuiSrc); + +static inline void WelsFillingPred8to16_c (uint8_t* pPred, uint8_t* pSrc) { + ST64 (pPred , LD64 (pSrc)); + ST64 (pPred + 8, LD64 (pSrc)); +} +static inline void WelsFillingPred8x2to16_c (uint8_t* pPred, uint8_t* pSrc) { + ST64 (pPred , LD64 (pSrc)); + ST64 (pPred + 8, LD64 (pSrc + 8)); +} +static inline void WelsFillingPred1to16_c (uint8_t* pPred, const uint8_t kuiSrc) { + const uint8_t kuiSrc8[8] = { kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc }; + ST64 (pPred , LD64 (kuiSrc8)); + ST64 (pPred + 8, LD64 (kuiSrc8)); +} + +#define WelsFillingPred8to16 WelsFillingPred8to16_c +#define WelsFillingPred8x2to16 WelsFillingPred8x2to16_c +#define WelsFillingPred1to16 WelsFillingPred1to16_c + + + +#define I4x4_PRED_STRIDE 4 +#define I4x4_PRED_STRIDE2 8 +#define I4x4_PRED_STRIDE3 12 + +void WelsI4x4LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint32_t kuiSrc = LD32 (&pRef[-kiStride]); + ENFORCE_STACK_ALIGN_1D (uint32_t, uiSrcx2, 2, 16) + uiSrcx2[0] = uiSrcx2[1] = kuiSrc; + + WelsFillingPred8to16 (pPred, (uint8_t*)&uiSrcx2[0]); +} + +void WelsI4x4LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint32_t kiStridex2Left = (kiStride << 1) - 1; + const uint32_t kiStridex3Left = kiStride + kiStridex2Left; + const uint8_t kuiHor1 = pRef[-1]; + const uint8_t kuiHor2 = pRef[kiStride - 1]; + const uint8_t kuiHor3 = pRef[kiStridex2Left]; + const uint8_t kuiHor4 = pRef[kiStridex3Left]; + const uint8_t kuiVec1[4] = {kuiHor1, kuiHor1, kuiHor1, kuiHor1}; + const uint8_t kuiVec2[4] = {kuiHor2, kuiHor2, kuiHor2, kuiHor2}; + const uint8_t kuiVec3[4] = {kuiHor3, kuiHor3, kuiHor3, kuiHor3}; + const uint8_t kuiVec4[4] = {kuiHor4, kuiHor4, kuiHor4, kuiHor4}; + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + ST32 (&uiSrc[0], LD32 (kuiVec1)); + ST32 (&uiSrc[4], LD32 (kuiVec2)); + ST32 (&uiSrc[8], LD32 (kuiVec3)); + ST32 (&uiSrc[12], LD32 (kuiVec4)); + + WelsFillingPred8x2to16 (pPred, uiSrc); +} +void WelsI4x4LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint8_t kuiDcValue = (pRef[-1] + pRef[kiStride - 1] + pRef[ (kiStride << 1) - 1] + pRef[ (kiStride << 1) + + kiStride - 1] + + pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] + 4) >> 3; + + WelsFillingPred1to16 (pPred, kuiDcValue); +} + +void WelsI4x4LumaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint8_t kuiDcValue = (pRef[-1] + pRef[kiStride - 1] + pRef[ (kiStride << 1) - 1] + pRef[ (kiStride << 1) + + kiStride - 1] + 2) >> 2; + + WelsFillingPred1to16 (pPred, kuiDcValue); +} + +void WelsI4x4LumaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint8_t kuiDcValue = (pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] + 2) >> 2; + + WelsFillingPred1to16 (pPred, kuiDcValue); +} + +void WelsI4x4LumaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint8_t kuiDcValue = 0x80; + + WelsFillingPred1to16 (pPred, kuiDcValue); +} + +/*down pLeft*/ +void WelsI4x4LumaPredDDL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + /*get pTop*/ + const uint8_t kuiT0 = pRef[-kiStride]; + const uint8_t kuiT1 = pRef[1 - kiStride]; + const uint8_t kuiT2 = pRef[2 - kiStride]; + const uint8_t kuiT3 = pRef[3 - kiStride]; + const uint8_t kuiT4 = pRef[4 - kiStride]; + const uint8_t kuiT5 = pRef[5 - kiStride]; + const uint8_t kuiT6 = pRef[6 - kiStride]; + const uint8_t kuiT7 = pRef[7 - kiStride]; + const uint8_t kuiDDL0 = (2 + kuiT0 + kuiT2 + (kuiT1 << 1)) >> 2; // uiDDL0 + const uint8_t kuiDDL1 = (2 + kuiT1 + kuiT3 + (kuiT2 << 1)) >> 2; // uiDDL1 + const uint8_t kuiDDL2 = (2 + kuiT2 + kuiT4 + (kuiT3 << 1)) >> 2; // uiDDL2 + const uint8_t kuiDDL3 = (2 + kuiT3 + kuiT5 + (kuiT4 << 1)) >> 2; // uiDDL3 + const uint8_t kuiDDL4 = (2 + kuiT4 + kuiT6 + (kuiT5 << 1)) >> 2; // uiDDL4 + const uint8_t kuiDDL5 = (2 + kuiT5 + kuiT7 + (kuiT6 << 1)) >> 2; // uiDDL5 + const uint8_t kuiDDL6 = (2 + kuiT6 + kuiT7 + (kuiT7 << 1)) >> 2; // uiDDL6 + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + uiSrc[0] = kuiDDL0; + uiSrc[1] = uiSrc[4] = kuiDDL1; + uiSrc[2] = uiSrc[5] = uiSrc[8] = kuiDDL2; + uiSrc[3] = uiSrc[6] = uiSrc[9] = uiSrc[12] = kuiDDL3; + uiSrc[7] = uiSrc[10] = uiSrc[13] = kuiDDL4; + uiSrc[11] = uiSrc[14] = kuiDDL5; + uiSrc[15] = kuiDDL6; + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + +/*down pLeft*/ +void WelsI4x4LumaPredDDLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + /*get pTop*/ + const uint8_t kuiT0 = pRef[-kiStride]; + const uint8_t kuiT1 = pRef[1 - kiStride]; + const uint8_t kuiT2 = pRef[2 - kiStride]; + const uint8_t kuiT3 = pRef[3 - kiStride]; + const uint8_t kuiDLT0 = (2 + kuiT0 + kuiT2 + (kuiT1 << 1)) >> 2; // uiDLT0 + const uint8_t kuiDLT1 = (2 + kuiT1 + kuiT3 + (kuiT2 << 1)) >> 2; // uiDLT1 + const uint8_t kuiDLT2 = (2 + kuiT2 + kuiT3 + (kuiT3 << 1)) >> 2; // uiDLT2 + const uint8_t kuiDLT3 = (2 + (kuiT3 << 2)) >> 2; // uiDLT3 + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + memset (&uiSrc[6], kuiDLT3, 10 * sizeof (uint8_t)); + uiSrc[0] = kuiDLT0; + uiSrc[1] = uiSrc[4] = kuiDLT1; + uiSrc[2] = uiSrc[5] = uiSrc[8] = kuiDLT2; + uiSrc[3] = kuiDLT3; + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + + +/*down right*/ +void WelsI4x4LumaPredDDR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const int32_t kiStridex2 = kiStride << 1; + const int32_t kiStridex3 = kiStride + kiStridex2; + const uint8_t kuiLT = pRef[-kiStride - 1]; // pTop-pLeft + /*get pLeft and pTop*/ + const uint8_t kuiL0 = pRef[-1]; + const uint8_t kuiL1 = pRef[kiStride - 1]; + const uint8_t kuiL2 = pRef[kiStridex2 - 1]; + const uint8_t kuiL3 = pRef[kiStridex3 - 1]; + const uint8_t kuiT0 = pRef[-kiStride]; + const uint8_t kuiT1 = pRef[1 - kiStride]; + const uint8_t kuiT2 = pRef[2 - kiStride]; + const uint8_t kuiT3 = pRef[3 - kiStride]; + const uint16_t kuiTL0 = 1 + kuiLT + kuiL0; + const uint16_t kuiLT0 = 1 + kuiLT + kuiT0; + const uint16_t kuiT01 = 1 + kuiT0 + kuiT1; + const uint16_t kuiT12 = 1 + kuiT1 + kuiT2; + const uint16_t kuiT23 = 1 + kuiT2 + kuiT3; + const uint16_t kuiL01 = 1 + kuiL0 + kuiL1; + const uint16_t kuiL12 = 1 + kuiL1 + kuiL2; + const uint16_t kuiL23 = 1 + kuiL2 + kuiL3; + const uint8_t kuiDDR0 = (kuiTL0 + kuiLT0) >> 2; + const uint8_t kuiDDR1 = (kuiLT0 + kuiT01) >> 2; + const uint8_t kuiDDR2 = (kuiT01 + kuiT12) >> 2; + const uint8_t kuiDDR3 = (kuiT12 + kuiT23) >> 2; + const uint8_t kuiDDR4 = (kuiTL0 + kuiL01) >> 2; + const uint8_t kuiDDR5 = (kuiL01 + kuiL12) >> 2; + const uint8_t kuiDDR6 = (kuiL12 + kuiL23) >> 2; + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + uiSrc[0] = uiSrc[5] = uiSrc[10] = uiSrc[15] = kuiDDR0; + uiSrc[1] = uiSrc[6] = uiSrc[11] = kuiDDR1; + uiSrc[2] = uiSrc[7] = kuiDDR2; + uiSrc[3] = kuiDDR3; + uiSrc[4] = uiSrc[9] = uiSrc[14] = kuiDDR4; + uiSrc[8] = uiSrc[13] = kuiDDR5; + uiSrc[12] = kuiDDR6; + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + + +/*vertical pLeft*/ +void WelsI4x4LumaPredVL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + /*get pTop*/ + const uint8_t kuiT0 = pRef[-kiStride]; + const uint8_t kuiT1 = pRef[1 - kiStride]; + const uint8_t kuiT2 = pRef[2 - kiStride]; + const uint8_t kuiT3 = pRef[3 - kiStride]; + const uint8_t kuiT4 = pRef[4 - kiStride]; + const uint8_t kuiT5 = pRef[5 - kiStride]; + const uint8_t kuiT6 = pRef[6 - kiStride]; + const uint8_t kuiVL0 = (1 + kuiT0 + kuiT1) >> 1; // uiVL0 + const uint8_t kuiVL1 = (1 + kuiT1 + kuiT2) >> 1; // uiVL1 + const uint8_t kuiVL2 = (1 + kuiT2 + kuiT3) >> 1; // uiVL2 + const uint8_t kuiVL3 = (1 + kuiT3 + kuiT4) >> 1; // uiVL3 + const uint8_t kuiVL4 = (1 + kuiT4 + kuiT5) >> 1; // uiVL4 + const uint8_t kuiVL5 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; // uiVL5 + const uint8_t kuiVL6 = (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2; // uiVL6 + const uint8_t kuiVL7 = (2 + kuiT2 + (kuiT3 << 1) + kuiT4) >> 2; // uiVL7 + const uint8_t kuiVL8 = (2 + kuiT3 + (kuiT4 << 1) + kuiT5) >> 2; // uiVL8 + const uint8_t kuiVL9 = (2 + kuiT4 + (kuiT5 << 1) + kuiT6) >> 2; // uiVL9 + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + uiSrc[0] = kuiVL0; + uiSrc[1] = uiSrc[8] = kuiVL1; + uiSrc[2] = uiSrc[9] = kuiVL2; + uiSrc[3] = uiSrc[10] = kuiVL3; + uiSrc[4] = kuiVL5; + uiSrc[5] = uiSrc[12] = kuiVL6; + uiSrc[6] = uiSrc[13] = kuiVL7; + uiSrc[7] = uiSrc[14] = kuiVL8; + uiSrc[11] = kuiVL4; + uiSrc[15] = kuiVL9; + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + + + +/*vertical pLeft*/ +void WelsI4x4LumaPredVLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + uint8_t* pTopLeft = &pRef[-kiStride - 1]; // pTop-pLeft + /*get pTop*/ + const uint8_t kuiT0 = * (pTopLeft + 1); + const uint8_t kuiT1 = * (pTopLeft + 2); + const uint8_t kuiT2 = * (pTopLeft + 3); + const uint8_t kuiT3 = * (pTopLeft + 4); + const uint8_t kuiVLT0 = (1 + kuiT0 + kuiT1) >> 1; // uiVLT0 + const uint8_t kuiVLT1 = (1 + kuiT1 + kuiT2) >> 1; // uiVLT1 + const uint8_t kuiVLT2 = (1 + kuiT2 + kuiT3) >> 1; // uiVLT2 + const uint8_t kuiVLT3 = (1 + (kuiT3 << 1)) >> 1; // uiVLT3 + const uint8_t kuiVLT4 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; // uiVLT4 + const uint8_t kuiVLT5 = (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2; // uiVLT5 + const uint8_t kuiVLT6 = (2 + kuiT2 + (kuiT3 << 1) + kuiT3) >> 2; // uiVLT6 + const uint8_t kuiVLT7 = (2 + (kuiT3 << 2)) >> 2; // uiVLT7 + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + uiSrc[0] = kuiVLT0; + uiSrc[1] = uiSrc[8] = kuiVLT1; + uiSrc[2] = uiSrc[9] = kuiVLT2; + uiSrc[3] = uiSrc[10] = uiSrc[11] = kuiVLT3; + uiSrc[4] = kuiVLT4; + uiSrc[5] = uiSrc[12] = kuiVLT5; + uiSrc[6] = uiSrc[13] = kuiVLT6; + uiSrc[7] = uiSrc[14] = uiSrc[15] = kuiVLT7; + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + +/*vertical right*/ +void WelsI4x4LumaPredVR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const int32_t kiStridex2 = kiStride << 1; + const uint8_t kuiLT = pRef[-kiStride - 1]; // pTop-pLeft + /*get pLeft and pTop*/ + const uint8_t kuiL0 = pRef[-1]; + const uint8_t kuiL1 = pRef[kiStride - 1]; + const uint8_t kuiL2 = pRef[kiStridex2 - 1]; + const uint8_t kuiT0 = pRef[-kiStride]; + const uint8_t kuiT1 = pRef[1 - kiStride]; + const uint8_t kuiT2 = pRef[2 - kiStride]; + const uint8_t kuiT3 = pRef[3 - kiStride]; + const uint8_t kuiVR0 = (1 + kuiLT + kuiT0) >> 1; + const uint8_t kuiVR1 = (1 + kuiT0 + kuiT1) >> 1; + const uint8_t kuiVR2 = (1 + kuiT1 + kuiT2) >> 1; + const uint8_t kuiVR3 = (1 + kuiT2 + kuiT3) >> 1; + const uint8_t kuiVR4 = (2 + kuiL0 + (kuiLT << 1) + kuiT0) >> 2; + const uint8_t kuiVR5 = (2 + kuiLT + (kuiT0 << 1) + kuiT1) >> 2; + const uint8_t kuiVR6 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; + const uint8_t kuiVR7 = (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2; + const uint8_t kuiVR8 = (2 + kuiLT + (kuiL0 << 1) + kuiL1) >> 2; + const uint8_t kuiVR9 = (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2; + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + uiSrc[0] = uiSrc[9] = kuiVR0; + uiSrc[1] = uiSrc[10] = kuiVR1; + uiSrc[2] = uiSrc[11] = kuiVR2; + uiSrc[3] = kuiVR3; + uiSrc[4] = uiSrc[13] = kuiVR4; + uiSrc[5] = uiSrc[14] = kuiVR5; + uiSrc[6] = uiSrc[15] = kuiVR6; + uiSrc[7] = kuiVR7; + uiSrc[8] = kuiVR8; + uiSrc[12] = kuiVR9; + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + + +/*horizontal up*/ +void WelsI4x4LumaPredHU_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const int32_t kiStridex2 = kiStride << 1; + const int32_t kiStridex3 = kiStride + kiStridex2; + /*get pLeft*/ + const uint8_t kuiL0 = pRef[-1]; + const uint8_t kuiL1 = pRef[kiStride - 1]; + const uint8_t kuiL2 = pRef[kiStridex2 - 1]; + const uint8_t kuiL3 = pRef[kiStridex3 - 1]; + const uint16_t kuiL01 = (1 + kuiL0 + kuiL1); + const uint16_t kuiL12 = (1 + kuiL1 + kuiL2); + const uint16_t kuiL23 = (1 + kuiL2 + kuiL3); + const uint8_t kuiHU0 = kuiL01 >> 1; + const uint8_t kuiHU1 = (kuiL01 + kuiL12) >> 2; + const uint8_t kuiHU2 = kuiL12 >> 1; + const uint8_t kuiHU3 = (kuiL12 + kuiL23) >> 2; + const uint8_t kuiHU4 = kuiL23 >> 1; + const uint8_t kuiHU5 = (1 + kuiL23 + (kuiL3 << 1)) >> 2; + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + uiSrc[0] = kuiHU0; + uiSrc[1] = kuiHU1; + uiSrc[2] = uiSrc[4] = kuiHU2; + uiSrc[3] = uiSrc[5] = kuiHU3; + uiSrc[6] = uiSrc[8] = kuiHU4; + uiSrc[7] = uiSrc[9] = kuiHU5; + memset (&uiSrc[10], kuiL3, 6 * sizeof (uint8_t)); + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + + +/*horizontal down*/ +void WelsI4x4LumaPredHD_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const int32_t kiStridex2 = kiStride << 1; + const int32_t kiStridex3 = kiStride + kiStridex2; + const uint8_t kuiLT = pRef[-kiStride - 1]; // pTop-pLeft + /*get pLeft and pTop*/ + const uint8_t kuiL0 = pRef[-1]; + const uint8_t kuiL1 = pRef[kiStride - 1]; + const uint8_t kuiL2 = pRef[kiStridex2 - 1]; + const uint8_t kuiL3 = pRef[kiStridex3 - 1]; + const uint8_t kuiT0 = pRef[-kiStride]; + const uint8_t kuiT1 = pRef[1 - kiStride]; + const uint8_t kuiT2 = pRef[2 - kiStride]; + const uint8_t kuiHD0 = (1 + kuiLT + kuiL0) >> 1; // uiHD0 + const uint8_t kuiHD1 = (2 + kuiL0 + (kuiLT << 1) + kuiT0) >> 2; // uiHD1 + const uint8_t kuiHD2 = (2 + kuiLT + (kuiT0 << 1) + kuiT1) >> 2; // uiHD2 + const uint8_t kuiHD3 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; // uiHD3 + const uint8_t kuiHD4 = (1 + kuiL0 + kuiL1) >> 1; // uiHD4 + const uint8_t kuiHD5 = (2 + kuiLT + (kuiL0 << 1) + kuiL1) >> 2; // uiHD5 + const uint8_t kuiHD6 = (1 + kuiL1 + kuiL2) >> 1; // uiHD6 + const uint8_t kuiHD7 = (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2; // uiHD7 + const uint8_t kuiHD8 = (1 + kuiL2 + kuiL3) >> 1; // uiHD8 + const uint8_t kuiHD9 = (2 + kuiL1 + (kuiL2 << 1) + kuiL3) >> 2; // uiHD9 + ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows + uiSrc[0] = uiSrc[6] = kuiHD0; + uiSrc[1] = uiSrc[7] = kuiHD1; + uiSrc[2] = kuiHD2; + uiSrc[3] = kuiHD3; + uiSrc[4] = uiSrc[10] = kuiHD4; + uiSrc[5] = uiSrc[11] = kuiHD5; + uiSrc[8] = uiSrc[14] = kuiHD6; + uiSrc[9] = uiSrc[15] = kuiHD7; + uiSrc[12] = kuiHD8; + uiSrc[13] = kuiHD9; + + WelsFillingPred8x2to16 (pPred, uiSrc); +} + + + +#define I8x8_PRED_STRIDE 8 + +void WelsIChromaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint64_t kuiSrc64 = LD64 (&pRef[-kiStride]); + + ST64 (pPred , kuiSrc64); + ST64 (pPred + 8 , kuiSrc64); + ST64 (pPred + 16, kuiSrc64); + ST64 (pPred + 24, kuiSrc64); + ST64 (pPred + 32, kuiSrc64); + ST64 (pPred + 40, kuiSrc64); + ST64 (pPred + 48, kuiSrc64); + ST64 (pPred + 56, kuiSrc64); +} + +void WelsIChromaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + int32_t iStridex7 = (kiStride << 3) - kiStride; + int32_t iI8x8Stridex7 = (I8x8_PRED_STRIDE << 3) - I8x8_PRED_STRIDE; + uint8_t i = 7; + + do { + const uint8_t kuiLeft = pRef[iStridex7 - 1]; // pLeft value + uint64_t kuiSrc64 = (uint64_t) (0x0101010101010101ULL * kuiLeft); + ST64 (pPred + iI8x8Stridex7, kuiSrc64); + + iStridex7 -= kiStride; + iI8x8Stridex7 -= I8x8_PRED_STRIDE; + } while (i-- > 0); +} + + +void WelsIChromaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + int32_t iLTshift = 0, iTopshift = 0, iLeftshift = 0, iTopSum = 0, iLeftSum = 0; + int32_t i, j; + uint8_t* pTop = &pRef[-kiStride]; + uint8_t* pLeft = &pRef[-1]; + + for (i = 0 ; i < 4 ; i ++) { + iTopSum += (i + 1) * (pTop[4 + i] - pTop[2 - i]); + iLeftSum += (i + 1) * (pLeft[ (4 + i) * kiStride] - pLeft[ (2 - i) * kiStride]); + } + + iLTshift = (pLeft[7 * kiStride] + pTop[7]) << 4; + iTopshift = (17 * iTopSum + 16) >> 5; + iLeftshift = (17 * iLeftSum + 16) >> 5; + + for (i = 0 ; i < 8 ; i ++) { + for (j = 0 ; j < 8 ; j ++) { + pPred[j] = WelsClip1 ((iLTshift + iTopshift * (j - 3) + iLeftshift * (i - 3) + 16) >> 5); + } + pPred += I8x8_PRED_STRIDE; + } +} + + +void WelsIChromaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const int32_t kuiL1 = kiStride - 1; + const int32_t kuiL2 = kuiL1 + kiStride; + const int32_t kuiL3 = kuiL2 + kiStride; + const int32_t kuiL4 = kuiL3 + kiStride; + const int32_t kuiL5 = kuiL4 + kiStride; + const int32_t kuiL6 = kuiL5 + kiStride; + const int32_t kuiL7 = kuiL6 + kiStride; + /*caculate the iMean value*/ + const uint8_t kuiMean1 = (pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] + + pRef[-1] + pRef[kuiL1] + pRef[kuiL2] + pRef[kuiL3] + 4) >> 3; + const uint32_t kuiSum2 = pRef[4 - kiStride] + pRef[5 - kiStride] + pRef[6 - kiStride] + pRef[7 - kiStride]; + const uint32_t kuiSum3 = pRef[kuiL4] + pRef[kuiL5] + pRef[kuiL6] + pRef[kuiL7]; + const uint8_t kuiMean2 = (kuiSum2 + 2) >> 2; + const uint8_t kuiMean3 = (kuiSum3 + 2) >> 2; + const uint8_t kuiMean4 = (kuiSum2 + kuiSum3 + 4) >> 3; + + const uint8_t kuiTopMean[8] = {kuiMean1, kuiMean1, kuiMean1, kuiMean1, kuiMean2, kuiMean2, kuiMean2, kuiMean2}; + const uint8_t kuiBottomMean[8] = {kuiMean3, kuiMean3, kuiMean3, kuiMean3, kuiMean4, kuiMean4, kuiMean4, kuiMean4}; + const uint64_t kuiTopMean64 = LD64 (kuiTopMean); + const uint64_t kuiBottomMean64 = LD64 (kuiBottomMean); + + ST64 (pPred , kuiTopMean64); + ST64 (pPred + 8 , kuiTopMean64); + ST64 (pPred + 16, kuiTopMean64); + ST64 (pPred + 24, kuiTopMean64); + ST64 (pPred + 32, kuiBottomMean64); + ST64 (pPred + 40, kuiBottomMean64); + ST64 (pPred + 48, kuiBottomMean64); + ST64 (pPred + 56, kuiBottomMean64); +} + +void WelsIChromaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const int32_t kuiL1 = kiStride - 1; + const int32_t kuiL2 = kuiL1 + kiStride; + const int32_t kuiL3 = kuiL2 + kiStride; + const int32_t kuiL4 = kuiL3 + kiStride; + const int32_t kuiL5 = kuiL4 + kiStride; + const int32_t kuiL6 = kuiL5 + kiStride; + const int32_t kuiL7 = kuiL6 + kiStride; + /*caculate the iMean value*/ + const uint8_t kuiTopMean = (pRef[-1] + pRef[kuiL1] + pRef[kuiL2] + pRef[kuiL3] + 2) >> 2 ; + const uint8_t kuiBottomMean = (pRef[kuiL4] + pRef[kuiL5] + pRef[kuiL6] + pRef[kuiL7] + 2) >> 2; + const uint64_t kuiTopMean64 = (uint64_t) (0x0101010101010101ULL * kuiTopMean); + const uint64_t kuiBottomMean64 = (uint64_t) (0x0101010101010101ULL * kuiBottomMean); + ST64 (pPred , kuiTopMean64); + ST64 (pPred + 8 , kuiTopMean64); + ST64 (pPred + 16, kuiTopMean64); + ST64 (pPred + 24, kuiTopMean64); + ST64 (pPred + 32, kuiBottomMean64); + ST64 (pPred + 40, kuiBottomMean64); + ST64 (pPred + 48, kuiBottomMean64); + ST64 (pPred + 56, kuiBottomMean64); +} + +void WelsIChromaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + /*caculate the iMean value*/ + const uint8_t kuiMean1 = (pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] + 2) >> 2; + const uint8_t kuiMean2 = (pRef[4 - kiStride] + pRef[5 - kiStride] + pRef[6 - kiStride] + pRef[7 - kiStride] + 2) >> 2; + const uint8_t kuiMean[8] = {kuiMean1, kuiMean1, kuiMean1, kuiMean1, kuiMean2, kuiMean2, kuiMean2, kuiMean2}; + const uint64_t kuiMean64 = LD64 (kuiMean); + + ST64 (pPred , kuiMean64); + ST64 (pPred + 8 , kuiMean64); + ST64 (pPred + 16, kuiMean64); + ST64 (pPred + 24, kuiMean64); + ST64 (pPred + 32, kuiMean64); + ST64 (pPred + 40, kuiMean64); + ST64 (pPred + 48, kuiMean64); + ST64 (pPred + 56, kuiMean64); +} + +void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + const uint64_t kuiDcValue64 = (uint64_t)0x8080808080808080ULL; + ST64 (pPred , kuiDcValue64); + ST64 (pPred + 8 , kuiDcValue64); + ST64 (pPred + 16, kuiDcValue64); + ST64 (pPred + 24, kuiDcValue64); + ST64 (pPred + 32, kuiDcValue64); + ST64 (pPred + 40, kuiDcValue64); + ST64 (pPred + 48, kuiDcValue64); + ST64 (pPred + 56, kuiDcValue64); +} + + +void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + int32_t iLTshift = 0, iTopshift = 0, iLeftshift = 0, iTopSum = 0, iLeftSum = 0; + int32_t i, j; + uint8_t* pTop = &pRef[-kiStride]; + uint8_t* pLeft = &pRef[-1]; + int32_t iPredStride = 16; + + for (i = 0 ; i < 8 ; i ++) { + iTopSum += (i + 1) * (pTop[8 + i] - pTop[6 - i]); + iLeftSum += (i + 1) * (pLeft[ (8 + i) * kiStride] - pLeft[ (6 - i) * kiStride]); + } + + iLTshift = (pLeft[15 * kiStride] + pTop[15]) << 4; + iTopshift = (5 * iTopSum + 32) >> 6; + iLeftshift = (5 * iLeftSum + 32) >> 6; + + for (i = 0 ; i < 16 ; i ++) { + for (j = 0 ; j < 16 ; j ++) { + pPred[j] = WelsClip1 ((iLTshift + iTopshift * (j - 7) + iLeftshift * (i - 7) + 16) >> 5); + } + pPred += iPredStride; + } +} + +void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + int32_t iStridex15 = (kiStride << 4) - kiStride; + int32_t iSum = 0; + uint8_t i = 15; + uint8_t iMean = 0; + + /*caculate the iMean value*/ + do { + iSum += pRef[-1 + iStridex15] + pRef[-kiStride + i]; + iStridex15 -= kiStride; + } while (i-- > 0); + iMean = (16 + iSum) >> 5; + memset (pPred, iMean, 256); +} + + +void WelsI16x16LumaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + int32_t iSum = 0; + uint8_t i = 15; + uint8_t iMean = 0; + + /*caculate the iMean value*/ + do { + iSum += pRef[-kiStride + i]; + } while (i-- > 0); + iMean = (8 + iSum) >> 4; + memset (pPred, iMean, 256); +} + +void WelsI16x16LumaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + int32_t iStridex15 = (kiStride << 4) - kiStride; + int32_t iSum = 0; + uint8_t i = 15; + uint8_t iMean = 0; + + /*caculate the iMean value*/ + do { + iSum += pRef[-1 + iStridex15]; + iStridex15 -= kiStride; + } while (i-- > 0); + iMean = (8 + iSum) >> 4; + memset (pPred, iMean, 256); +} + +void WelsI16x16LumaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { + memset (pPred, 0x80, 256); +} + +void WelsInitIntraPredFuncs (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag) { + pFuncList->pfGetLumaI16x16Pred[I16_PRED_V] = WelsI16x16LumaPredV_c; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_H] = WelsI16x16LumaPredH_c; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC] = WelsI16x16LumaPredDc_c; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_P] = WelsI16x16LumaPredPlane_c; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC_L] = WelsI16x16LumaPredDcLeft_c; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC_T] = WelsI16x16LumaPredDcTop_c; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC_128] = WelsI16x16LumaPredDcNA_c; + + pFuncList->pfGetLumaI4x4Pred[I4_PRED_V] = WelsI4x4LumaPredV_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_H] = WelsI4x4LumaPredH_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DC] = WelsI4x4LumaPredDc_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DC_L] = WelsI4x4LumaPredDcLeft_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DC_T] = WelsI4x4LumaPredDcTop_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DC_128] = WelsI4x4LumaPredDcNA_c; + + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDL] = WelsI4x4LumaPredDDL_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDL_TOP] = WelsI4x4LumaPredDDLTop_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDR] = WelsI4x4LumaPredDDR_c; + + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VL] = WelsI4x4LumaPredVL_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VL_TOP] = WelsI4x4LumaPredVLTop_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VR] = WelsI4x4LumaPredVR_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HU] = WelsI4x4LumaPredHU_c; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HD] = WelsI4x4LumaPredHD_c; + + pFuncList->pfGetChromaPred[C_PRED_DC] = WelsIChromaPredDc_c; + pFuncList->pfGetChromaPred[C_PRED_H] = WelsIChromaPredH_c; + pFuncList->pfGetChromaPred[C_PRED_V] = WelsIChromaPredV_c; + pFuncList->pfGetChromaPred[C_PRED_P] = WelsIChromaPredPlane_c; + pFuncList->pfGetChromaPred[C_PRED_DC_L] = WelsIChromaPredDcLeft_c; + pFuncList->pfGetChromaPred[C_PRED_DC_T] = WelsIChromaPredDcTop_c; + pFuncList->pfGetChromaPred[C_PRED_DC_128] = WelsIChromaPredDcNA_c; +#ifdef HAVE_NEON + if (kuiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDR] = WelsI4x4LumaPredDDR_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HD] = WelsI4x4LumaPredHD_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HU] = WelsI4x4LumaPredHU_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VR] = WelsI4x4LumaPredVR_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDL] = WelsI4x4LumaPredDDL_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VL] = WelsI4x4LumaPredVL_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_H] = WelsI4x4LumaPredH_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_V] = WelsI4x4LumaPredV_neon; + + pFuncList->pfGetLumaI16x16Pred[I16_PRED_V] = WelsI16x16LumaPredV_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_H] = WelsI16x16LumaPredH_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC] = WelsI16x16LumaPredDc_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_P] = WelsI16x16LumaPredPlane_neon; + + pFuncList->pfGetChromaPred[C_PRED_DC] = WelsIChromaPredDc_neon; + pFuncList->pfGetChromaPred[C_PRED_V] = WelsIChromaPredV_neon; + pFuncList->pfGetChromaPred[C_PRED_P] = WelsIChromaPredPlane_neon; + pFuncList->pfGetChromaPred[C_PRED_H] = WelsIChromaPredH_neon; + } +#endif + +#if defined(HAVE_NEON_AARCH64) + if (kuiCpuFlag & WELS_CPU_NEON) { + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC] = WelsI16x16LumaPredDc_AArch64_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_P] = WelsI16x16LumaPredPlane_AArch64_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_H] = WelsI16x16LumaPredH_AArch64_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_V] = WelsI16x16LumaPredV_AArch64_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC_L] = WelsI16x16LumaPredDcLeft_AArch64_neon; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC_T] = WelsI16x16LumaPredDcTop_AArch64_neon; + + pFuncList->pfGetLumaI4x4Pred[I4_PRED_H ] = WelsI4x4LumaPredH_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDL ] = WelsI4x4LumaPredDDL_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDL_TOP] = WelsI4x4LumaPredDDLTop_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VL ] = WelsI4x4LumaPredVL_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VL_TOP ] = WelsI4x4LumaPredVLTop_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VR ] = WelsI4x4LumaPredVR_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HU ] = WelsI4x4LumaPredHU_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HD ] = WelsI4x4LumaPredHD_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DC ] = WelsI4x4LumaPredDc_AArch64_neon; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DC_T ] = WelsI4x4LumaPredDcTop_AArch64_neon; + + pFuncList->pfGetChromaPred[C_PRED_H] = WelsIChromaPredH_AArch64_neon; + pFuncList->pfGetChromaPred[C_PRED_V] = WelsIChromaPredV_AArch64_neon; + pFuncList->pfGetChromaPred[C_PRED_P ] = WelsIChromaPredPlane_AArch64_neon; + pFuncList->pfGetChromaPred[C_PRED_DC] = WelsIChromaPredDc_AArch64_neon; + pFuncList->pfGetChromaPred[C_PRED_DC_T] = WelsIChromaPredDcTop_AArch64_neon; + } +#endif//HAVE_NEON_AARCH64 + +#ifdef X86_ASM + if (kuiCpuFlag & WELS_CPU_MMXEXT) { + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDR] = WelsI4x4LumaPredDDR_mmx; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HD] = WelsI4x4LumaPredHD_mmx; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_HU] = WelsI4x4LumaPredHU_mmx; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VR] = WelsI4x4LumaPredVR_mmx; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDL] = WelsI4x4LumaPredDDL_mmx; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_VL] = WelsI4x4LumaPredVL_mmx; + pFuncList->pfGetChromaPred[C_PRED_H] = WelsIChromaPredH_mmx; + } + if (kuiCpuFlag & WELS_CPU_SSE2) { + pFuncList->pfGetLumaI4x4Pred[I4_PRED_H] = WelsI4x4LumaPredH_sse2; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_DC] = WelsI4x4LumaPredDc_sse2; + pFuncList->pfGetLumaI4x4Pred[I4_PRED_V] = WelsI4x4LumaPredV_sse2; + + pFuncList->pfGetLumaI16x16Pred[I16_PRED_V] = WelsI16x16LumaPredV_sse2; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_H] = WelsI16x16LumaPredH_sse2; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC] = WelsI16x16LumaPredDc_sse2; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_P] = WelsI16x16LumaPredPlane_sse2; + + pFuncList->pfGetChromaPred[C_PRED_DC] = WelsIChromaPredDc_sse2; + pFuncList->pfGetChromaPred[C_PRED_V] = WelsIChromaPredV_sse2; + pFuncList->pfGetChromaPred[C_PRED_P] = WelsIChromaPredPlane_sse2; + } +#endif + +#if defined(HAVE_MMI) + if (kuiCpuFlag & WELS_CPU_MMI) { + pFuncList->pfGetLumaI16x16Pred[I16_PRED_V] = WelsI16x16LumaPredV_mmi; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_H] = WelsI16x16LumaPredH_mmi; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC] = WelsI16x16LumaPredDc_mmi; + pFuncList->pfGetLumaI16x16Pred[I16_PRED_P] = WelsI16x16LumaPredPlane_mmi; + + pFuncList->pfGetChromaPred[C_PRED_H] = WelsIChromaPredH_mmi; + pFuncList->pfGetChromaPred[C_PRED_DC] = WelsIChromaPredDc_mmi; + pFuncList->pfGetChromaPred[C_PRED_V] = WelsIChromaPredV_mmi; + pFuncList->pfGetChromaPred[C_PRED_P] = WelsIChromaPredPlane_mmi; + } +#endif//HAVE_MMI +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/md.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/md.cpp new file mode 100644 index 000000000..041e453fd --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/md.cpp @@ -0,0 +1,910 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file md.c + * + * \brief mode decision + * + * \date 2009.05.14 Created + * + ************************************************************************************* + */ + +#include "ls_defines.h" +#include "md.h" +#include "cpu_core.h" +#include "svc_enc_golomb.h" + +namespace WelsEnc { +#define INTRA_VARIANCE_SAD_THRESHOLD 150 +#define INTER_VARIANCE_SAD_THRESHOLD 20 + +//fill cache of neighbor MB, containing pNonZeroCount, sample_avail, pIntra4x4PredMode +void FillNeighborCacheIntra (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth) { + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + uint32_t uiNeighborIntra = 0; + + if (uiNeighborAvail & LEFT_MB_POS) { //LEFT MB + int8_t* pLeftMbNonZeroCount = pCurMb->pNonZeroCount - MB_LUMA_CHROMA_BLOCK4x4_NUM; + pMbCache->iNonZeroCoeffCount[8] = pLeftMbNonZeroCount[ 3]; + pMbCache->iNonZeroCoeffCount[16] = pLeftMbNonZeroCount[ 7]; + pMbCache->iNonZeroCoeffCount[24] = pLeftMbNonZeroCount[11]; + pMbCache->iNonZeroCoeffCount[32] = pLeftMbNonZeroCount[15]; + + pMbCache->iNonZeroCoeffCount[ 13] = pLeftMbNonZeroCount[17]; + pMbCache->iNonZeroCoeffCount[21] = pLeftMbNonZeroCount[21]; + pMbCache->iNonZeroCoeffCount[37] = pLeftMbNonZeroCount[19]; + pMbCache->iNonZeroCoeffCount[45] = pLeftMbNonZeroCount[23]; + + uiNeighborIntra |= LEFT_MB_POS; + + if (IS_INTRA4x4 ((pCurMb - 1)->uiMbType)) { + int8_t* pLeftMbIntra4x4PredMode = pCurMb->pIntra4x4PredMode - INTRA_4x4_MODE_NUM; + pMbCache->iIntraPredMode[8] = pLeftMbIntra4x4PredMode[4]; + pMbCache->iIntraPredMode[16] = pLeftMbIntra4x4PredMode[5]; + pMbCache->iIntraPredMode[24] = pLeftMbIntra4x4PredMode[6]; + pMbCache->iIntraPredMode[32] = pLeftMbIntra4x4PredMode[3]; + } else { // if ( 0 == constrained_intra_pred_flag || IS_INTRA16x16((pCurMb-1)->uiMbType )) + pMbCache->iIntraPredMode[8] = + pMbCache->iIntraPredMode[16] = + pMbCache->iIntraPredMode[24] = + pMbCache->iIntraPredMode[32] = 2; //DC + } + } else { + pMbCache->iNonZeroCoeffCount[ 8] = + pMbCache->iNonZeroCoeffCount[16] = + pMbCache->iNonZeroCoeffCount[24] = + pMbCache->iNonZeroCoeffCount[32] = -1;//unavailable + pMbCache->iNonZeroCoeffCount[13] = + pMbCache->iNonZeroCoeffCount[21] = + pMbCache->iNonZeroCoeffCount[37] = + pMbCache->iNonZeroCoeffCount[45] = -1;//unavailable + + pMbCache->iIntraPredMode[8] = + pMbCache->iIntraPredMode[16] = + pMbCache->iIntraPredMode[24] = + pMbCache->iIntraPredMode[32] = -1;//unavailable + } + + if (uiNeighborAvail & TOP_MB_POS) { //TOP MB + SMB* pTopMb = pCurMb - iMbWidth; + ST32 (&pMbCache->iNonZeroCoeffCount[1], LD32 (&pTopMb->pNonZeroCount[12])); + + ST16 (&pMbCache->iNonZeroCoeffCount[6], LD16 (&pTopMb->pNonZeroCount[20])); + ST16 (&pMbCache->iNonZeroCoeffCount[30], LD16 (&pTopMb->pNonZeroCount[22])); + + uiNeighborIntra |= TOP_MB_POS; + + if (IS_INTRA4x4 (pTopMb->uiMbType)) { + ST32 (pMbCache->iIntraPredMode + 1, LD32 (&pTopMb->pIntra4x4PredMode[0])); + } else { // if ( 0 == constrained_intra_pred_flag || IS_INTRA16x16( pTopMb->uiMbType )) + const uint32_t kuiDc32 = 0x02020202; + ST32 (pMbCache->iIntraPredMode + 1 , kuiDc32); + } + } else { + const uint32_t kuiUnavail32 = 0xffffffff; + ST32 (pMbCache->iIntraPredMode + 1 , kuiUnavail32); + ST32 (&pMbCache->iNonZeroCoeffCount[1], kuiUnavail32); + + ST16 (&pMbCache->iNonZeroCoeffCount[6], 0xffff); + ST16 (&pMbCache->iNonZeroCoeffCount[30], 0xffff); + } + + if (uiNeighborAvail & TOPLEFT_MB_POS) { + uiNeighborIntra |= 0x04; + } + + + if (uiNeighborAvail & TOPRIGHT_MB_POS) { + uiNeighborIntra |= 0x08; + } + pMbCache->uiNeighborIntra = uiNeighborIntra; +} +//fill cache of neighbor MB, containing motion_vector and uiRefIndex +void FillNeighborCacheInterWithoutBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag) { + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + SMB* pLeftMb = pCurMb - 1 ; + SMB* pTopMb = pCurMb - iMbWidth; + SMB* pLeftTopMb = pCurMb - iMbWidth - 1 ; + SMB* iRightTopMb = pCurMb - iMbWidth + 1 ; + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + if ((uiNeighborAvail & LEFT_MB_POS) && IS_SVC_INTER (pLeftMb->uiMbType)) { + pMvComp->sMotionVectorCache[ 6] = pLeftMb->sMv[ 3]; + pMvComp->sMotionVectorCache[12] = pLeftMb->sMv[ 7]; + pMvComp->sMotionVectorCache[18] = pLeftMb->sMv[11]; + pMvComp->sMotionVectorCache[24] = pLeftMb->sMv[15]; + pMvComp->iRefIndexCache[ 6] = pLeftMb->pRefIndex[1]; + pMvComp->iRefIndexCache[12] = pLeftMb->pRefIndex[1]; + pMvComp->iRefIndexCache[18] = pLeftMb->pRefIndex[3]; + pMvComp->iRefIndexCache[24] = pLeftMb->pRefIndex[3]; + pMbCache->iSadCost[3] = pLeftMb->pSadCost[0]; + + if (pLeftMb->uiMbType == MB_TYPE_SKIP) { + pMbCache->bMbTypeSkip[3] = 1; + pMbCache->iSadCostSkip[3] = pMbCache->pEncSad[-1]; + } else { + pMbCache->bMbTypeSkip[3] = 0; + pMbCache->iSadCostSkip[3] = 0; + } + } else { //avail or non-inter + ST32 (&pMvComp->sMotionVectorCache[ 6], 0); + ST32 (&pMvComp->sMotionVectorCache[12], 0); + ST32 (&pMvComp->sMotionVectorCache[18], 0); + ST32 (&pMvComp->sMotionVectorCache[24], 0); + pMvComp->iRefIndexCache[ 6] = + pMvComp->iRefIndexCache[12] = + pMvComp->iRefIndexCache[18] = + pMvComp->iRefIndexCache[24] = (uiNeighborAvail & LEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[3] = 0; + pMbCache->bMbTypeSkip[3] = 0; + pMbCache->iSadCostSkip[3] = 0; + } + + if ((uiNeighborAvail & TOP_MB_POS) && IS_SVC_INTER (pTopMb->uiMbType)) { //TOP MB + ST64 (&pMvComp->sMotionVectorCache[1], LD64 (&pTopMb->sMv[12])); + ST64 (&pMvComp->sMotionVectorCache[3], LD64 (&pTopMb->sMv[14])); + pMvComp->iRefIndexCache[1] = pTopMb->pRefIndex[2]; + pMvComp->iRefIndexCache[2] = pTopMb->pRefIndex[2]; + pMvComp->iRefIndexCache[3] = pTopMb->pRefIndex[3]; + pMvComp->iRefIndexCache[4] = pTopMb->pRefIndex[3]; + pMbCache->iSadCost[1] = pTopMb->pSadCost[0]; + + if (pTopMb->uiMbType == MB_TYPE_SKIP) { + pMbCache->bMbTypeSkip[1] = 1; + pMbCache->iSadCostSkip[1] = pMbCache->pEncSad[-iMbWidth]; + } else { + pMbCache->bMbTypeSkip[1] = 0; + pMbCache->iSadCostSkip[1] = 0; + } + } else { //unavail + ST64 (&pMvComp->sMotionVectorCache[1], 0); + ST64 (&pMvComp->sMotionVectorCache[3], 0); + pMvComp->iRefIndexCache[1] = + pMvComp->iRefIndexCache[2] = + pMvComp->iRefIndexCache[3] = + pMvComp->iRefIndexCache[4] = (uiNeighborAvail & TOP_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[1] = 0; + + pMbCache->bMbTypeSkip[1] = 0; + pMbCache->iSadCostSkip[1] = 0; + } + + if ((uiNeighborAvail & TOPLEFT_MB_POS) && IS_SVC_INTER (pLeftTopMb->uiMbType)) { //LEFT_TOP MB + pMvComp->sMotionVectorCache[0] = pLeftTopMb->sMv[15]; + pMvComp->iRefIndexCache[0] = pLeftTopMb->pRefIndex[3]; + pMbCache->iSadCost[0] = pLeftTopMb->pSadCost[0]; + + if (pLeftTopMb->uiMbType == MB_TYPE_SKIP) { + pMbCache->bMbTypeSkip[0] = 1; + pMbCache->iSadCostSkip[0] = pMbCache->pEncSad[-iMbWidth - 1]; + } else { + pMbCache->bMbTypeSkip[0] = 0; + pMbCache->iSadCostSkip[0] = 0; + } + } else { //unavail + ST32 (&pMvComp->sMotionVectorCache[0], 0); + pMvComp->iRefIndexCache[0] = (uiNeighborAvail & TOPLEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[0] = 0; + pMbCache->bMbTypeSkip[0] = 0; + pMbCache->iSadCostSkip[0] = 0; + } + + if ((uiNeighborAvail & TOPRIGHT_MB_POS) && IS_SVC_INTER (iRightTopMb->uiMbType)) { //RIGHT_TOP MB + pMvComp->sMotionVectorCache[5] = iRightTopMb->sMv[12]; + pMvComp->iRefIndexCache[5] = iRightTopMb->pRefIndex[2]; + pMbCache->iSadCost[2] = iRightTopMb->pSadCost[0]; + + if (iRightTopMb->uiMbType == MB_TYPE_SKIP) { + pMbCache->bMbTypeSkip[2] = 1; + pMbCache->iSadCostSkip[2] = pMbCache->pEncSad[-iMbWidth + 1]; + } else { + pMbCache->bMbTypeSkip[2] = 0; + pMbCache->iSadCostSkip[2] = 0; + } + } else { //unavail + ST32 (&pMvComp->sMotionVectorCache[5], 0); + pMvComp->iRefIndexCache[5] = (uiNeighborAvail & TOPRIGHT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[2] = 0; + pMbCache->bMbTypeSkip[2] = 0; + pMbCache->iSadCostSkip[2] = 0; + } + + //right-top 4*4 pBlock unavailable + ST32 (&pMvComp->sMotionVectorCache[ 9], 0); + ST32 (&pMvComp->sMotionVectorCache[21], 0); + ST32 (&pMvComp->sMotionVectorCache[11], 0); + ST32 (&pMvComp->sMotionVectorCache[17], 0); + ST32 (&pMvComp->sMotionVectorCache[23], 0); + pMvComp->iRefIndexCache[ 9] = + pMvComp->iRefIndexCache[11] = + pMvComp->iRefIndexCache[17] = + pMvComp->iRefIndexCache[21] = + pMvComp->iRefIndexCache[23] = REF_NOT_AVAIL; +} + +void FillNeighborCacheInterWithBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag) { + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + SMB* pLeftMb = pCurMb - 1 ; + SMB* pTopMb = pCurMb - iMbWidth; + SMB* pLeftTopMb = pCurMb - iMbWidth - 1 ; + SMB* iRightTopMb = pCurMb - iMbWidth + 1 ; + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + + if ((uiNeighborAvail & LEFT_MB_POS) && IS_SVC_INTER (pLeftMb->uiMbType)) { + pMvComp->sMotionVectorCache[ 6] = pLeftMb->sMv[ 3]; + pMvComp->sMotionVectorCache[12] = pLeftMb->sMv[ 7]; + pMvComp->sMotionVectorCache[18] = pLeftMb->sMv[11]; + pMvComp->sMotionVectorCache[24] = pLeftMb->sMv[15]; + pMvComp->iRefIndexCache[ 6] = pLeftMb->pRefIndex[1]; + pMvComp->iRefIndexCache[12] = pLeftMb->pRefIndex[1]; + pMvComp->iRefIndexCache[18] = pLeftMb->pRefIndex[3]; + pMvComp->iRefIndexCache[24] = pLeftMb->pRefIndex[3]; + pMbCache->iSadCost[3] = pLeftMb->pSadCost[0]; + + if (pLeftMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-1] == 0) { + pMbCache->bMbTypeSkip[3] = 1; + pMbCache->iSadCostSkip[3] = pMbCache->pEncSad[-1]; + } else { + pMbCache->bMbTypeSkip[3] = 0; + pMbCache->iSadCostSkip[3] = 0; + } + } else { //avail or non-inter + ST32 (&pMvComp->sMotionVectorCache[ 6], 0); + ST32 (&pMvComp->sMotionVectorCache[12], 0); + ST32 (&pMvComp->sMotionVectorCache[18], 0); + ST32 (&pMvComp->sMotionVectorCache[24], 0); + pMvComp->iRefIndexCache[ 6] = + pMvComp->iRefIndexCache[12] = + pMvComp->iRefIndexCache[18] = + pMvComp->iRefIndexCache[24] = (uiNeighborAvail & LEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[3] = 0; + pMbCache->bMbTypeSkip[3] = 0; + pMbCache->iSadCostSkip[3] = 0; + } + + if ((uiNeighborAvail & TOP_MB_POS) && IS_SVC_INTER (pTopMb->uiMbType)) { //TOP MB + ST64 (&pMvComp->sMotionVectorCache[1], LD64 (&pTopMb->sMv[12])); + ST64 (&pMvComp->sMotionVectorCache[3], LD64 (&pTopMb->sMv[14])); + pMvComp->iRefIndexCache[1] = pTopMb->pRefIndex[2]; + pMvComp->iRefIndexCache[2] = pTopMb->pRefIndex[2]; + pMvComp->iRefIndexCache[3] = pTopMb->pRefIndex[3]; + pMvComp->iRefIndexCache[4] = pTopMb->pRefIndex[3]; + pMbCache->iSadCost[1] = pTopMb->pSadCost[0]; + if (pTopMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-iMbWidth] == 0) { + pMbCache->bMbTypeSkip[1] = 1; + pMbCache->iSadCostSkip[1] = pMbCache->pEncSad[-iMbWidth]; + } else { + pMbCache->bMbTypeSkip[1] = 0; + pMbCache->iSadCostSkip[1] = 0; + } + } else { //unavail + ST64 (&pMvComp->sMotionVectorCache[1], 0); + ST64 (&pMvComp->sMotionVectorCache[3], 0); + pMvComp->iRefIndexCache[1] = + pMvComp->iRefIndexCache[2] = + pMvComp->iRefIndexCache[3] = + pMvComp->iRefIndexCache[4] = (uiNeighborAvail & TOP_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[1] = 0; + pMbCache->bMbTypeSkip[1] = 0; + pMbCache->iSadCostSkip[1] = 0; + } + + + if ((uiNeighborAvail & TOPLEFT_MB_POS) && IS_SVC_INTER (pLeftTopMb->uiMbType)) { //LEFT_TOP MB + pMvComp->sMotionVectorCache[0] = pLeftTopMb->sMv[15]; + pMvComp->iRefIndexCache[0] = pLeftTopMb->pRefIndex[3]; + pMbCache->iSadCost[0] = pLeftTopMb->pSadCost[0]; + + if (pLeftTopMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-iMbWidth - 1] == 0) { + pMbCache->bMbTypeSkip[0] = 1; + pMbCache->iSadCostSkip[0] = pMbCache->pEncSad[-iMbWidth - 1]; + } else { + pMbCache->bMbTypeSkip[0] = 0; + pMbCache->iSadCostSkip[0] = 0; + } + } else { //unavail + ST32 (&pMvComp->sMotionVectorCache[0], 0); + pMvComp->iRefIndexCache[0] = (uiNeighborAvail & TOPLEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[0] = 0; + pMbCache->bMbTypeSkip[0] = 0; + pMbCache->iSadCostSkip[0] = 0; + } + + if ((uiNeighborAvail & TOPRIGHT_MB_POS) && IS_SVC_INTER (iRightTopMb->uiMbType)) { //RIGHT_TOP MB + pMvComp->sMotionVectorCache[5] = iRightTopMb->sMv[12]; + pMvComp->iRefIndexCache[5] = iRightTopMb->pRefIndex[2]; + pMbCache->iSadCost[2] = iRightTopMb->pSadCost[0]; + + if (iRightTopMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-iMbWidth + 1] == 0) { + pMbCache->bMbTypeSkip[2] = 1; + pMbCache->iSadCostSkip[2] = pMbCache->pEncSad[-iMbWidth + 1]; + } else { + pMbCache->bMbTypeSkip[2] = 0; + pMbCache->iSadCostSkip[2] = 0; + } + } else { //unavail + ST32 (&pMvComp->sMotionVectorCache[5], 0); + pMvComp->iRefIndexCache[5] = (uiNeighborAvail & TOPRIGHT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL; + pMbCache->iSadCost[2] = 0; + pMbCache->bMbTypeSkip[2] = 0; + pMbCache->iSadCostSkip[2] = 0; + } + + //right-top 4*4 pBlock unavailable + ST32 (&pMvComp->sMotionVectorCache[ 9], 0); + ST32 (&pMvComp->sMotionVectorCache[21], 0); + ST32 (&pMvComp->sMotionVectorCache[11], 0); + ST32 (&pMvComp->sMotionVectorCache[17], 0); + ST32 (&pMvComp->sMotionVectorCache[23], 0); + pMvComp->iRefIndexCache[ 9] = + pMvComp->iRefIndexCache[11] = + pMvComp->iRefIndexCache[17] = + pMvComp->iRefIndexCache[21] = + pMvComp->iRefIndexCache[23] = REF_NOT_AVAIL; +} + +void InitFillNeighborCacheInterFunc (SWelsFuncPtrList* pFuncList, const int32_t kiFlag) { + pFuncList->pfFillInterNeighborCache = kiFlag ? FillNeighborCacheInterWithBGD : FillNeighborCacheInterWithoutBGD; +} + +void UpdateMbMv_c (SMVUnitXY* pMvBuffer, const SMVUnitXY ksMv) { + int32_t k = 0; + for (; k < MB_BLOCK4x4_NUM; k += 4) { + pMvBuffer[k ] = + pMvBuffer[k + 1] = + pMvBuffer[k + 2] = + pMvBuffer[k + 3] = ksMv; + } +} + + +uint8_t MdInterAnalysisVaaInfo_c (int32_t* pSad8x8) { + int32_t iSadBlock[4], iAverageSadBlock[4]; + int32_t iAverageSad, iVarianceSad; + + iSadBlock[0] = pSad8x8[0]; + iAverageSad = iSadBlock[0]; + + iSadBlock[1] = pSad8x8[1]; + iAverageSad += iSadBlock[1]; + + iSadBlock[2] = pSad8x8[2]; + iAverageSad += iSadBlock[2]; + + iSadBlock[3] = pSad8x8[3]; + iAverageSad += iSadBlock[3]; + + iAverageSad = iAverageSad >> 2; + + iAverageSadBlock[0] = (iSadBlock[0] >> 6) - (iAverageSad >> 6); + iVarianceSad = iAverageSadBlock[0] * iAverageSadBlock[0]; + + iAverageSadBlock[1] = (iSadBlock[1] >> 6) - (iAverageSad >> 6); + iVarianceSad += iAverageSadBlock[1] * iAverageSadBlock[1]; + + iAverageSadBlock[2] = (iSadBlock[2] >> 6) - (iAverageSad >> 6); + iVarianceSad += iAverageSadBlock[2] * iAverageSadBlock[2]; + + iAverageSadBlock[3] = (iSadBlock[3] >> 6) - (iAverageSad >> 6); + iVarianceSad += iAverageSadBlock[3] * iAverageSadBlock[3]; + + if (iVarianceSad < INTER_VARIANCE_SAD_THRESHOLD) { + return 15; + } + + uint8_t uiMbSign = 0; + if (iSadBlock[0] > iAverageSad) + uiMbSign |= 0x08; + if (iSadBlock[1] > iAverageSad) + uiMbSign |= 0x04; + if (iSadBlock[2] > iAverageSad) + uiMbSign |= 0x02; + if (iSadBlock[3] > iAverageSad) + uiMbSign |= 0x01; + return (uiMbSign); +} + +int32_t AnalysisVaaInfoIntra_c (uint8_t* pDataY, const int32_t kiLineSize) { + ENFORCE_STACK_ALIGN_1D (uint16_t, uiAvgBlock, 16, 16) + uint16_t* pBlock = &uiAvgBlock[0]; + uint8_t* pEncData = pDataY; + const int32_t kiLineSize2 = kiLineSize << 1; + const int32_t kiLineSize3 = kiLineSize + kiLineSize2; + const int32_t kiLineSize4 = kiLineSize << 2; + int32_t i = 0, j = 0, num = 0; + int32_t iSumAvg = 0, iSumSqr = 0; + +// analysis_vaa_info_intra_core_c( pDataY, iLineSize, pBlock ); + for (; j < 16; j += 4) { + num = 0; + for (i = 0; i < 16; i += 4, num ++) { + pBlock[num] = pEncData[i ] + pEncData[i + 1 ] + pEncData[i + 2 ] + pEncData[i + + 3 ]; + pBlock[num] += pEncData[i + kiLineSize ] + pEncData[i + kiLineSize + 1] + pEncData[i + kiLineSize + 2] + pEncData[i + + kiLineSize + 3]; + pBlock[num] += pEncData[i + kiLineSize2] + pEncData[i + kiLineSize2 + 1] + pEncData[i + kiLineSize2 + 2] + pEncData[i + + kiLineSize2 + 3]; + pBlock[num] += pEncData[i + kiLineSize3] + pEncData[i + kiLineSize3 + 1] + pEncData[i + kiLineSize3 + 2] + pEncData[i + + kiLineSize3 + 3]; + pBlock[num] >>= 4; + } + pBlock += 4; + pEncData += kiLineSize4; + } + + pBlock = &uiAvgBlock[0]; + i = 4; + for (; i > 0; --i) { + iSumAvg += pBlock[0] + pBlock[1] + pBlock[2] + pBlock[3]; + iSumSqr += pBlock[0] * pBlock[0] + pBlock[1] * pBlock[1] + pBlock[2] * pBlock[2] + pBlock[3] * pBlock[3]; + + pBlock += 4; + } + + + return /*variance =*/ (iSumSqr - ((iSumAvg * iSumAvg) >> 4)); +} + +// for pfGetVarianceFromIntraVaa function ptr adaptive by CPU features, 6/7/2010 +void InitIntraAnalysisVaaInfo (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag) { + pFuncList->pfGetVarianceFromIntraVaa = AnalysisVaaInfoIntra_c; + pFuncList->pfGetMbSignFromInterVaa = MdInterAnalysisVaaInfo_c; + pFuncList->pfUpdateMbMv = UpdateMbMv_c; + +#if defined(X86_ASM) + if ((kuiCpuFlag & WELS_CPU_SSE2) == WELS_CPU_SSE2) { + pFuncList->pfGetVarianceFromIntraVaa = AnalysisVaaInfoIntra_sse2; + pFuncList->pfGetMbSignFromInterVaa = MdInterAnalysisVaaInfo_sse2; + pFuncList->pfUpdateMbMv = UpdateMbMv_sse2; + } + if ((kuiCpuFlag & WELS_CPU_SSSE3) == WELS_CPU_SSSE3) { + pFuncList->pfGetVarianceFromIntraVaa = AnalysisVaaInfoIntra_ssse3; + } + if ((kuiCpuFlag & WELS_CPU_SSE41) == WELS_CPU_SSE41) { + pFuncList->pfGetMbSignFromInterVaa = MdInterAnalysisVaaInfo_sse41; + } +#endif//X86_ASM +} + +bool MdIntraAnalysisVaaInfo (sWelsEncCtx* pEncCtx, uint8_t* pEncMb) { + + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + const int32_t kiLineSize = pCurDqLayer->iEncStride[0]; + const int32_t kiVariance = pEncCtx->pFuncList->pfGetVarianceFromIntraVaa (pEncMb, kiLineSize); + return (kiVariance >= INTRA_VARIANCE_SAD_THRESHOLD); +} + +void InitMeRefinePointer (SMeRefinePointer* pMeRefine, SMbCache* pMbCache, int32_t iStride) { + pMeRefine->pHalfPixH = &pMbCache->pBufferInterPredMe[0] + iStride; + pMeRefine->pHalfPixV = &pMbCache->pBufferInterPredMe[640] + iStride; + + pMeRefine->pQuarPixBest = &pMbCache->pBufferInterPredMe[1280] + iStride; + pMeRefine->pQuarPixTmp = &pMbCache->pBufferInterPredMe[1920] + iStride; +} +typedef struct TagQuarParams { + int32_t iBestCost; + int32_t iBestHalfPix; + int32_t iStrideA; + int32_t iStrideB; + uint8_t* pRef; + uint8_t* pSrcB[4]; + uint8_t* pSrcA[4]; + int32_t iLms[4]; + int32_t iBestQuarPix; +} SQuarRefineParams; + +#define SWITCH_BEST_TMP_BUF(prev_best, curr_best){\ + pParams->iBestCost = iCurCost;\ + pTmp = prev_best;\ + prev_best = curr_best;\ + curr_best = pTmp;\ +} +#define CALC_COST(me_buf, lm) ( pFunc->sSampleDealingFuncs.pfMeCost[kuiPixel](pEncMb, iStrideEnc, me_buf, ME_REFINE_BUF_STRIDE) + lm ) + +inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefinePointer* pMeRefine, + const int32_t kiWidth, const int32_t kiHeight, SQuarRefineParams* pParams, int32_t iStrideEnc) { + PWelsSampleAveragingFunc pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging; + int32_t iCurCost; + uint8_t* pEncMb = pMe->pEncMb; + uint8_t* pTmp = NULL; + const uint8_t kuiPixel = pMe->uiBlockSize; + + pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE, + pParams->pSrcB[0], pParams->iStrideA, kiWidth, kiHeight); + + iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[0]); + if (iCurCost < pParams->iBestCost) { + pParams->iBestQuarPix = ME_QUAR_PIXEL_TOP; + SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp); + } + //=========================(0, 1)=======================// + pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1], + ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiWidth, kiHeight); + iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[1]); + if (iCurCost < pParams->iBestCost) { + pParams->iBestQuarPix = ME_QUAR_PIXEL_BOTTOM; + SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp); + } + //==========================(-1, 0)=========================// + pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2], + ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiWidth, kiHeight); + iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[2]); + if (iCurCost < pParams->iBestCost) { + pParams->iBestQuarPix = ME_QUAR_PIXEL_LEFT; + SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp); + } + //==========================(1, 0)=========================// + pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3], + ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiWidth, kiHeight); + + iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[3]); + if (iCurCost < pParams->iBestCost) { + pParams->iBestQuarPix = ME_QUAR_PIXEL_RIGHT; + SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp); + } +} + +void MeRefineFracPixel (sWelsEncCtx* pEncCtx, uint8_t* pMemPredInterMb, SWelsME* pMe, + SMeRefinePointer* pMeRefine, int32_t iWidth, int32_t iHeight) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + int16_t iMvx = pMe->sMv.iMvX; + int16_t iMvy = pMe->sMv.iMvY; + + int16_t iHalfMvx = iMvx; + int16_t iHalfMvy = iMvy; + const int32_t kiStrideEnc = pEncCtx->pCurDqLayer->iEncStride[0]; + const int32_t kiStrideRef = pEncCtx->pCurDqLayer->pRefPic->iLineSize[0]; + + uint8_t* pEncData = pMe->pEncMb; + uint8_t* pRef = pMe->pRefMb;//091010 + + int32_t iBestQuarPix = ME_NO_BEST_QUAR_PIXEL; + + SQuarRefineParams sParams; + static const int32_t iMvQuarAddX[10] = {0, 0, -1, 1, 0, 0, 0, -1, 1, 0}; + const int32_t* pMvQuarAddY = iMvQuarAddX + 3; + uint8_t* pBestPredInter = pRef; + int32_t iInterBlk4Stride = ME_REFINE_BUF_STRIDE; + + int32_t iBestCost; + int32_t iCurCost; + int32_t iBestHalfPix; + + if (pEncCtx->pCurDqLayer->bSatdInMdFlag) { + iBestCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY); + } else { + iBestCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pRef, kiStrideRef) + + COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY); + } + + iBestHalfPix = REFINE_ME_NO_BEST_HALF_PIXEL; + + pFunc->sMcFuncs.pfLumaHalfpelVer (pRef - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixV, ME_REFINE_BUF_STRIDE, iWidth, + iHeight + 1); + + //step 1: get [iWidth][iHeight+1] half pixel from vertical filter + //===========================(0, -2)==============================// + iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixV, + ME_REFINE_BUF_STRIDE) + + COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - 2 - pMe->sMvp.iMvY); + if (iCurCost < iBestCost) { + iBestCost = iCurCost; + iBestHalfPix = REFINE_ME_HALF_PIXEL_TOP; + pBestPredInter = pMeRefine->pHalfPixV; + } + //===========================(0, 2)==============================// + iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, + pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE, ME_REFINE_BUF_STRIDE) + + COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy + 2 - pMe->sMvp.iMvY); + if (iCurCost < iBestCost) { + iBestCost = iCurCost; + iBestHalfPix = REFINE_ME_HALF_PIXEL_BOTTOM; + pBestPredInter = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE; + } + pFunc->sMcFuncs.pfLumaHalfpelHor (pRef - 1, kiStrideRef, pMeRefine->pHalfPixH, ME_REFINE_BUF_STRIDE, iWidth + 1, + iHeight); + //step 2: get [iWidth][iHeight+1] half pixel from horizon filter + + //===========================(-2, 0)==============================// + iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixH, + ME_REFINE_BUF_STRIDE) + + COST_MVD (pMe->pMvdCost, iMvx - 2 - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY); + if (iCurCost < iBestCost) { + iBestCost = iCurCost; + iBestHalfPix = REFINE_ME_HALF_PIXEL_LEFT; + pBestPredInter = pMeRefine->pHalfPixH; + } + //===========================(2, 0)===============================// + iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixH + 1, + ME_REFINE_BUF_STRIDE) + + COST_MVD (pMe->pMvdCost, iMvx + 2 - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY); + if (iCurCost < iBestCost) { + iBestCost = iCurCost; + iBestHalfPix = REFINE_ME_HALF_PIXEL_RIGHT; + pBestPredInter = pMeRefine->pHalfPixH + 1; + } + + sParams.iBestCost = iBestCost; + sParams.iBestHalfPix = iBestHalfPix; + sParams.pRef = pRef; + sParams.iBestQuarPix = ME_NO_BEST_QUAR_PIXEL; + + //step 5: if no best half-pixel prediction, try quarter pixel prediction + // if yes, must get [X+1][X+1] half-pixel from (2, 2) horizontal and vertical filter + if (REFINE_ME_NO_BEST_HALF_PIXEL == iBestHalfPix) { + sParams.iStrideA = kiStrideRef; + sParams.iStrideB = kiStrideRef; + sParams.pSrcA[0] = pMeRefine->pHalfPixV; + sParams.pSrcA[1] = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE; + sParams.pSrcA[2] = pMeRefine->pHalfPixH; + sParams.pSrcA[3] = pMeRefine->pHalfPixH + 1; + + sParams.pSrcB[0] = sParams.pSrcB[1] = sParams.pSrcB[2] = sParams.pSrcB[3] = pRef; + + sParams.iLms[0] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy - 1 - pMe->sMvp.iMvY); + sParams.iLms[1] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy + 1 - pMe->sMvp.iMvY); + sParams.iLms[2] = COST_MVD (pMe->pMvdCost, iHalfMvx - 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY); + sParams.iLms[3] = COST_MVD (pMe->pMvdCost, iHalfMvx + 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY); + } else { //must get [X+1][X+1] half-pixel from (2, 2) horizontal and vertical filter + switch (iBestHalfPix) { + case REFINE_ME_HALF_PIXEL_LEFT: { + pMeRefine->pHalfPixHV = pMeRefine->pHalfPixV;//reuse pBuffer, here only h&hv + pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE, + iWidth + 1, iHeight + 1); + + iHalfMvx -= 2; + sParams.iStrideA = ME_REFINE_BUF_STRIDE; + sParams.iStrideB = kiStrideRef; + sParams.pSrcA[0] = pMeRefine->pHalfPixH; + sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0]; + sParams.pSrcB[0] = pMeRefine->pHalfPixHV; + sParams.pSrcB[1] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE; + sParams.pSrcB[2] = pRef - 1; + sParams.pSrcB[3] = pRef; + + } + break; + case REFINE_ME_HALF_PIXEL_RIGHT: { + pMeRefine->pHalfPixHV = pMeRefine->pHalfPixV;//reuse pBuffer, here only h&hv + pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE, + iWidth + 1, iHeight + 1); + iHalfMvx += 2; + sParams.iStrideA = ME_REFINE_BUF_STRIDE; + sParams.iStrideB = kiStrideRef; + sParams.pSrcA[0] = pMeRefine->pHalfPixH + 1; + sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0]; + sParams.pSrcB[0] = pMeRefine->pHalfPixHV + 1; + sParams.pSrcB[1] = pMeRefine->pHalfPixHV + 1 + ME_REFINE_BUF_STRIDE; + sParams.pSrcB[2] = pRef; + sParams.pSrcB[3] = pRef + 1; + } + break; + case REFINE_ME_HALF_PIXEL_TOP: { + pMeRefine->pHalfPixHV = pMeRefine->pHalfPixH;//reuse pBuffer, here only v&hv + pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE, + iWidth + 1, iHeight + 1); + + iHalfMvy -= 2; + sParams.iStrideA = kiStrideRef; + sParams.iStrideB = ME_REFINE_BUF_STRIDE; + sParams.pSrcA[0] = pMeRefine->pHalfPixV; + sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0]; + sParams.pSrcB[0] = pRef - kiStrideRef; + sParams.pSrcB[1] = pRef; + sParams.pSrcB[2] = pMeRefine->pHalfPixHV; + sParams.pSrcB[3] = pMeRefine->pHalfPixHV + 1; + } + break; + case REFINE_ME_HALF_PIXEL_BOTTOM: { + pMeRefine->pHalfPixHV = pMeRefine->pHalfPixH;//reuse pBuffer, here only v&hv + pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE, + iWidth + 1, iHeight + 1); + iHalfMvy += 2; + sParams.iStrideA = kiStrideRef; + sParams.iStrideB = ME_REFINE_BUF_STRIDE; + sParams.pSrcA[0] = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE; + sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0]; + sParams.pSrcB[0] = pRef; + sParams.pSrcB[1] = pRef + kiStrideRef; + sParams.pSrcB[2] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE; + sParams.pSrcB[3] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE + 1; + } + break; + default: + break; + } + sParams.iLms[0] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy - 1 - pMe->sMvp.iMvY); + sParams.iLms[1] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy + 1 - pMe->sMvp.iMvY); + sParams.iLms[2] = COST_MVD (pMe->pMvdCost, iHalfMvx - 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY); + sParams.iLms[3] = COST_MVD (pMe->pMvdCost, iHalfMvx + 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY); + } + MeRefineQuarPixel (pFunc, pMe, pMeRefine, iWidth, iHeight, &sParams, kiStrideEnc); + + if (iBestCost > sParams.iBestCost) { + pBestPredInter = pMeRefine->pQuarPixBest; + iBestCost = sParams.iBestCost; + } + iBestQuarPix = sParams.iBestQuarPix; + + //update final best MV + pMe->sMv.iMvX = iHalfMvx + iMvQuarAddX[iBestQuarPix]; + pMe->sMv.iMvY = iHalfMvy + pMvQuarAddY[iBestQuarPix]; + pMe->uiSatdCost = iBestCost; + + //No half or quarter pixel best, so do MC with integer pixel MV + if (iBestHalfPix + iBestQuarPix == NO_BEST_FRAC_PIX) { + pBestPredInter = pRef; + iInterBlk4Stride = kiStrideRef; + } + pMeRefine->pfCopyBlockByMode (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter, + iInterBlk4Stride); +} + +void InitBlkStrideWithRef (int32_t* pBlkStride, const int32_t kiStrideRef) { + static const uint8_t kuiStrideX[16] = { + 0, 4 , 0, 4 , + 8, 12, 8, 12, + 0, 4 , 0, 4 , + 8, 12, 8, 12 + }; + static const uint8_t kuiStrideY[16] = { + 0, 0, 4 , 4 , + 0, 0, 4 , 4 , + 8, 8, 12, 12, + 8, 8, 12, 12 + }; + int32_t i; + + for (i = 0; i < 16; i += 4) { + pBlkStride[i ] = kuiStrideX[i ] + kuiStrideY[i ] * kiStrideRef; + pBlkStride[i + 1] = kuiStrideX[i + 1] + kuiStrideY[i + 1] * kiStrideRef; + pBlkStride[i + 2] = kuiStrideX[i + 2] + kuiStrideY[i + 2] * kiStrideRef; + pBlkStride[i + 3] = kuiStrideX[i + 3] + kuiStrideY[i + 3] * kiStrideRef; + } +} + +/* + * iMvdSz = (648*2+1) or (972*2+1); + */ +void MvdCostInit (uint16_t* pMvdCostInter, const int32_t kiMvdSz) { + const int32_t kiSz = kiMvdSz >> 1; + uint16_t* pNegMvd = pMvdCostInter; + uint16_t* pPosMvd = pMvdCostInter + kiSz + 1; + const int32_t* kpQpLambda = &g_kiQpCostTable[0]; + int32_t i, j; + + for (i = 0; i < 52; ++ i) { + const uint16_t kiLambda = kpQpLambda[i]; + int32_t iNegSe = -kiSz; + int32_t iPosSe = 1; + + for (j = 0; j < kiSz; j += 4) { + *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++); + *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++); + *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++); + *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++); + + *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++); + *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++); + *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++); + *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++); + } + *pNegMvd = kiLambda; + pNegMvd += kiSz + 1; + pPosMvd += kiSz + 1; + } +} + +void PredictSad (int8_t* pRefIndexCache, int32_t* pSadCostCache, int32_t uiRef, int32_t* pSadPred) { + const int32_t kiRefB = pRefIndexCache[1];//top g_uiCache12_8x8RefIdx[0] - 4 + int32_t iRefC = pRefIndexCache[5];//top-right g_uiCache12_8x8RefIdx[0] - 2 + const int32_t kiRefA = pRefIndexCache[6];//left g_uiCache12_8x8RefIdx[0] - 1 + const int32_t kiSadB = pSadCostCache[1]; + int32_t iSadC = pSadCostCache[2]; + const int32_t kiSadA = pSadCostCache[3]; + + int32_t iCount; + + if (iRefC == REF_NOT_AVAIL) { + iRefC = pRefIndexCache[0];//top-left g_uiCache12_8x8RefIdx[0] - 4 - 1 + iSadC = pSadCostCache[0]; + } + + if (kiRefB == REF_NOT_AVAIL && iRefC == REF_NOT_AVAIL && kiRefA != REF_NOT_AVAIL) { + * pSadPred = kiSadA; + } else { + iCount = (uiRef == kiRefA) << MB_LEFT_BIT; + iCount |= (uiRef == kiRefB) << MB_TOP_BIT; + iCount |= (uiRef == iRefC) << MB_TOPRIGHT_BIT; + switch (iCount) { + case LEFT_MB_POS:// A + *pSadPred = kiSadA; + break; + case TOP_MB_POS:// B + *pSadPred = kiSadB; + break; + case TOPRIGHT_MB_POS:// C or D + *pSadPred = iSadC; + break; + default: + *pSadPred = WelsMedian (kiSadA, kiSadB, iSadC); + break; + } + } + +#define REPLACE_SAD_MULTIPLY(x) ((x) - (x>>3) + (x >>5)) // it's 0.90625, very close with 0.9 + iCount = (*pSadPred) << 6; // here *64 will not overflow. SAD range 0~ 255*256(max 2^16), int32_t is enough + *pSadPred = (REPLACE_SAD_MULTIPLY (iCount) + 32) >> 6; +#undef REPLACE_SAD_MULTIPLY +} + + +void PredictSadSkip (int8_t* pRefIndexCache, bool* pMbSkipCache, int32_t* pSadCostCache, int32_t uiRef, + int32_t* iSadPredSkip) { + const int32_t kiRefB = pRefIndexCache[1];//top g_uiCache12_8x8RefIdx[0] - 4 + int32_t iRefC = pRefIndexCache[5];//top-right g_uiCache12_8x8RefIdx[0] - 2 + const int32_t kiRefA = pRefIndexCache[6];//left g_uiCache12_8x8RefIdx[0] - 1 + const int32_t kiSadB = (pMbSkipCache[1] == 1 ? pSadCostCache[1] : 0); + int32_t iSadC = (pMbSkipCache[2] == 1 ? pSadCostCache[2] : 0); + const int32_t kiSadA = (pMbSkipCache[3] == 1 ? pSadCostCache[3] : 0); + int32_t iRefSkip = pMbSkipCache[2]; + + int32_t iCount = 0; + + if (iRefC == REF_NOT_AVAIL) { + iRefC = pRefIndexCache[0];//top-left g_uiCache12_8x8RefIdx[0] - 4 - 1 + iSadC = (pMbSkipCache[0] == 1 ? pSadCostCache[0] : 0); + iRefSkip = pMbSkipCache[0]; + } + + if (kiRefB == REF_NOT_AVAIL && iRefC == REF_NOT_AVAIL && kiRefA != REF_NOT_AVAIL) { + * iSadPredSkip = kiSadA; + } else { + iCount = ((uiRef == kiRefA) && (pMbSkipCache[3] == 1)) << MB_LEFT_BIT; + iCount |= ((uiRef == kiRefB) && (pMbSkipCache[1] == 1)) << MB_TOP_BIT; + iCount |= ((uiRef == iRefC) && (iRefSkip == 1)) << MB_TOPRIGHT_BIT; + switch (iCount) { + case LEFT_MB_POS:// A + *iSadPredSkip = kiSadA; + break; + case TOP_MB_POS:// B + *iSadPredSkip = kiSadB; + break; + case TOPRIGHT_MB_POS:// C or D + *iSadPredSkip = iSadC; + break; + default: + *iSadPredSkip = WelsMedian (kiSadA, kiSadB, iSadC); + break; + } + } +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/mv_pred.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/mv_pred.cpp new file mode 100644 index 000000000..a06a629c7 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/mv_pred.cpp @@ -0,0 +1,436 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file mv_pred.c + * + * \brief Get MV predictor and update motion vector of mb cache + * + * \date 05/22/2009 Created + * + ************************************************************************************* + */ + +#include "mv_pred.h" +#include "ls_defines.h" +namespace WelsEnc { +//basic pMv prediction unit for pMv width (4, 2, 1) +void PredMv (const SMVComponentUnit* kpMvComp, int8_t iPartIdx, int8_t iPartW, int32_t iRef, SMVUnitXY* sMvp) { + const uint8_t kuiLeftIdx = g_kuiCache30ScanIdx[iPartIdx] - 1; + const uint8_t kuiTopIdx = g_kuiCache30ScanIdx[iPartIdx] - 6; + + int32_t iMatchRef; + int32_t iLeftRef = kpMvComp->iRefIndexCache[kuiLeftIdx]; + int32_t iTopRef = kpMvComp->iRefIndexCache[ kuiTopIdx]; + int32_t iRightTopRef = kpMvComp->iRefIndexCache[kuiTopIdx + iPartW]; + int32_t iDiagonalRef; + SMVUnitXY sMvA (kpMvComp->sMotionVectorCache[kuiLeftIdx]); + SMVUnitXY sMvB (kpMvComp->sMotionVectorCache[kuiTopIdx]); + SMVUnitXY sMvC; + + if (REF_NOT_AVAIL == iRightTopRef) { + iDiagonalRef = kpMvComp->iRefIndexCache[ kuiTopIdx - 1];// left_top; + sMvC = kpMvComp->sMotionVectorCache[kuiTopIdx - 1]; + } else { + iDiagonalRef = iRightTopRef;// right_top; + sMvC = kpMvComp->sMotionVectorCache[kuiTopIdx + iPartW]; + } + + if ((REF_NOT_AVAIL == iTopRef) && (REF_NOT_AVAIL == iDiagonalRef) && iLeftRef != REF_NOT_AVAIL) { + *sMvp = sMvA; + return; + } + + // b2[diag] b1[top] b0[left] is available! + iMatchRef = (iRef == iLeftRef) << MB_LEFT_BIT; + iMatchRef |= (iRef == iTopRef) << MB_TOP_BIT; + iMatchRef |= (iRef == iDiagonalRef) << MB_TOPRIGHT_BIT; + switch (iMatchRef) { + case LEFT_MB_POS:// A + *sMvp = sMvA; + break; + case TOP_MB_POS:// B + *sMvp = sMvB; + break; + case TOPRIGHT_MB_POS:// C or D + *sMvp = sMvC; + break; + default: + sMvp->iMvX = WelsMedian (sMvA.iMvX, sMvB.iMvX, sMvC.iMvX); + sMvp->iMvY = WelsMedian (sMvA.iMvY, sMvB.iMvY, sMvC.iMvY); + break; + } +} +void PredInter8x16Mv (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* sMvp) { + const SMVComponentUnit* kpMvComp = &pMbCache->sMvComponents; + if (0 == iPartIdx) { + const int8_t kiLeftRef = kpMvComp->iRefIndexCache[6]; + if (iRef == kiLeftRef) { + *sMvp = kpMvComp->sMotionVectorCache[6]; + return; + } + } else { // 1 == iPartIdx + int8_t iDiagonalRef = kpMvComp->iRefIndexCache[5]; //top-right + int8_t iIndex = 5; + if (REF_NOT_AVAIL == iDiagonalRef) { + iDiagonalRef = kpMvComp->iRefIndexCache[2]; //top-left for 8*8 block(iIndex 1) + iIndex = 2; + } + if (iRef == iDiagonalRef) { + *sMvp = kpMvComp->sMotionVectorCache[iIndex]; + return; + } + } + + PredMv (kpMvComp, iPartIdx, 2, iRef, sMvp); +} +void PredInter16x8Mv (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* sMvp) { + const SMVComponentUnit* kpMvComp = &pMbCache->sMvComponents; + if (0 == iPartIdx) { + const int8_t kiTopRef = kpMvComp->iRefIndexCache[1]; + if (iRef == kiTopRef) { + *sMvp = kpMvComp->sMotionVectorCache[1]; + return; + } + } else { // 8 == iPartIdx + const int8_t kiLeftRef = kpMvComp->iRefIndexCache[18]; + if (iRef == kiLeftRef) { + *sMvp = kpMvComp->sMotionVectorCache[18]; + return; + } + } + + PredMv (kpMvComp, iPartIdx, 4, iRef, sMvp); +} +void PredSkipMv (SMbCache* pMbCache, SMVUnitXY* sMvp) { + const SMVComponentUnit* kpMvComp = &pMbCache->sMvComponents; + const int8_t kiLeftRef = kpMvComp->iRefIndexCache[6]; //A + const int8_t kiTopRef = kpMvComp->iRefIndexCache[1]; //B + + if (REF_NOT_AVAIL == kiLeftRef || REF_NOT_AVAIL == kiTopRef || + (0 == kiLeftRef && 0 == * (int32_t*) (&kpMvComp->sMotionVectorCache[6])) || + (0 == kiTopRef && 0 == * (int32_t*) (&kpMvComp->sMotionVectorCache[1]))) { + ST32 (sMvp, 0); + return; + } + + PredMv (kpMvComp, 0, 4, 0, sMvp); +} + +//update pMv and uiRefIndex cache for current MB, only for P_16*16 (SKIP inclusive) +void UpdateP16x16MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int8_t kiRef, SMVUnitXY* pMv) { + // optimized 11/25/2011 + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint32_t kuiMv32 = LD32 (pMv); + const uint64_t kuiMv64 = BUTTERFLY4x8 (kuiMv32); + uint64_t uiMvBuf[8] = { kuiMv64, kuiMv64, kuiMv64, kuiMv64, kuiMv64, kuiMv64, kuiMv64, kuiMv64 }; + const uint16_t kuiRef16 = BUTTERFLY1x2 (kiRef); + const uint32_t kuiRef32 = BUTTERFLY2x4 (kuiRef16); + + ST32 (pCurMb->pRefIndex, kuiRef32); + // update pMv range from 0~15 + memcpy (pCurMb->sMv, uiMvBuf, sizeof (uiMvBuf)); // confirmed_safe_unsafe_usage + + /* + * blocks 0: 7~10, 1: 13~16, 2: 19~22, 3: 25~28 + */ + pMvComp->iRefIndexCache[7] = kiRef; + ST16 (&pMvComp->iRefIndexCache[8], kuiRef16); + pMvComp->iRefIndexCache[10] = kiRef; + pMvComp->iRefIndexCache[13] = kiRef; + ST16 (&pMvComp->iRefIndexCache[14], kuiRef16); + pMvComp->iRefIndexCache[16] = kiRef; + pMvComp->iRefIndexCache[19] = kiRef; + ST16 (&pMvComp->iRefIndexCache[20], kuiRef16); + pMvComp->iRefIndexCache[22] = kiRef; + pMvComp->iRefIndexCache[25] = kiRef; + ST16 (&pMvComp->iRefIndexCache[26], kuiRef16); + pMvComp->iRefIndexCache[28] = kiRef; + + /* + * blocks 0: 7~10, 1: 13~16, 2: 19~22, 3: 25~28 + */ + pMvComp->sMotionVectorCache[7] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[8], kuiMv64); + pMvComp->sMotionVectorCache[10] = *pMv; + pMvComp->sMotionVectorCache[13] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[14], kuiMv64); + pMvComp->sMotionVectorCache[16] = *pMv; + pMvComp->sMotionVectorCache[19] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[20], kuiMv64); + pMvComp->sMotionVectorCache[22] = *pMv; + pMvComp->sMotionVectorCache[25] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[26], kuiMv64); + pMvComp->sMotionVectorCache[28] = *pMv; +} + +//update uiRefIndex and pMv of both SMB and Mb_cache, only for P16x8 +void UpdateP16x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv) { + // optimized 11/25/2011 + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint32_t kuiMv32 = LD32 (pMv); + const uint64_t kuiMv64 = BUTTERFLY4x8 (kuiMv32); + uint64_t uiMvBuf[4] = { kuiMv64, kuiMv64, kuiMv64, kuiMv64 }; + const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; + const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; + const int16_t kiCacheIdx1 = 1 + kiCacheIdx; + const int16_t kiCacheIdx3 = 3 + kiCacheIdx; + const int16_t kiCacheIdx6 = 6 + kiCacheIdx; + const int16_t kiCacheIdx7 = 7 + kiCacheIdx; + const int16_t kiCacheIdx9 = 9 + kiCacheIdx; + const uint16_t kuiRef16 = BUTTERFLY1x2 (kiRef); + + ST16 (&pCurMb->pRefIndex[ (kiPartIdx >> 2)], kuiRef16); + memcpy (&pCurMb->sMv[kiScan4Idx], uiMvBuf, sizeof (uiMvBuf)); // confirmed_safe_unsafe_usage + + /* + * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 + */ + pMvComp->iRefIndexCache[kiCacheIdx] = kiRef; + ST16 (&pMvComp->iRefIndexCache[kiCacheIdx1], kuiRef16); + pMvComp->iRefIndexCache[kiCacheIdx3] = kiRef; + pMvComp->iRefIndexCache[kiCacheIdx6] = kiRef; + ST16 (&pMvComp->iRefIndexCache[kiCacheIdx7], kuiRef16); + pMvComp->iRefIndexCache[kiCacheIdx9] = kiRef; + + /* + * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 + */ + pMvComp->sMotionVectorCache[kiCacheIdx] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[kiCacheIdx1], kuiMv64); + pMvComp->sMotionVectorCache[kiCacheIdx3] = *pMv; + pMvComp->sMotionVectorCache[kiCacheIdx6] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[kiCacheIdx7], kuiMv64); + pMvComp->sMotionVectorCache[kiCacheIdx9] = *pMv; +} +//update uiRefIndex and pMv of both SMB and Mb_cache, only for P8x16 +void update_P8x16_motion_info (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv) { + // optimized 11/25/2011 + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint32_t kuiMv32 = LD32 (pMv); + const uint64_t kuiMv64 = BUTTERFLY4x8 (kuiMv32); + const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; + const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; + const int16_t kiCacheIdx1 = 1 + kiCacheIdx; + const int16_t kiCacheIdx3 = 3 + kiCacheIdx; + const int16_t kiCacheIdx12 = 12 + kiCacheIdx; + const int16_t kiCacheIdx13 = 13 + kiCacheIdx; + const int16_t kiCacheIdx15 = 15 + kiCacheIdx; + const int16_t kiBlkIdx = kiPartIdx >> 2; + const uint16_t kuiRef16 = BUTTERFLY1x2 (kiRef); + + pCurMb->pRefIndex[kiBlkIdx] = kiRef; + pCurMb->pRefIndex[2 + kiBlkIdx] = kiRef; + ST64 (&pCurMb->sMv[kiScan4Idx], kuiMv64); + ST64 (&pCurMb->sMv[4 + kiScan4Idx], kuiMv64); + ST64 (&pCurMb->sMv[8 + kiScan4Idx], kuiMv64); + ST64 (&pCurMb->sMv[12 + kiScan4Idx], kuiMv64); + + /* + * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 + */ + pMvComp->iRefIndexCache[kiCacheIdx] = kiRef; + ST16 (&pMvComp->iRefIndexCache[kiCacheIdx1], kuiRef16); + pMvComp->iRefIndexCache[kiCacheIdx3] = kiRef; + pMvComp->iRefIndexCache[kiCacheIdx12] = kiRef; + ST16 (&pMvComp->iRefIndexCache[kiCacheIdx13], kuiRef16); + pMvComp->iRefIndexCache[kiCacheIdx15] = kiRef; + + /* + * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 + */ + pMvComp->sMotionVectorCache[kiCacheIdx] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[kiCacheIdx1], kuiMv64); + pMvComp->sMotionVectorCache[kiCacheIdx3] = *pMv; + pMvComp->sMotionVectorCache[kiCacheIdx12] = *pMv; + ST64 (&pMvComp->sMotionVectorCache[kiCacheIdx13], kuiMv64); + pMvComp->sMotionVectorCache[kiCacheIdx15] = *pMv; +} +//update uiRefIndex and pMv of both SMB and Mb_cache, only for P8x8 +void UpdateP8x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint32_t kuiMv32 = LD32 (pMv); + const uint64_t kuiMv64 = BUTTERFLY4x8 (kuiMv32); + const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; + const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; + const int16_t kiCacheIdx1 = 1 + kiCacheIdx; + const int16_t kiCacheIdx6 = 6 + kiCacheIdx; + const int16_t kiCacheIdx7 = 7 + kiCacheIdx; + + //mb + ST64 (&pCurMb->sMv[ kiScan4Idx], kuiMv64); + ST64 (&pCurMb->sMv[4 + kiScan4Idx], kuiMv64); + + //cache + pMvComp->iRefIndexCache[kiCacheIdx ] = + pMvComp->iRefIndexCache[kiCacheIdx1] = + pMvComp->iRefIndexCache[kiCacheIdx6] = + pMvComp->iRefIndexCache[kiCacheIdx7] = kiRef; + pMvComp->sMotionVectorCache[kiCacheIdx ] = + pMvComp->sMotionVectorCache[kiCacheIdx1] = + pMvComp->sMotionVectorCache[kiCacheIdx6] = + pMvComp->sMotionVectorCache[kiCacheIdx7] = *pMv; +} +//update uiRefIndex and pMv of both SMB and Mb_cache, only for P4x4 +void UpdateP4x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; + const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; + + //mb + pCurMb->sMv[kiScan4Idx] = *pMv; + //cache + pMvComp->iRefIndexCache[kiCacheIdx] = kiRef; + pMvComp->sMotionVectorCache[kiCacheIdx] = *pMv; +} +//update uiRefIndex and pMv of both SMB and Mb_cache, only for P8x4 +void UpdateP8x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; + const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; + + //mb + pCurMb->sMv[ kiScan4Idx] = *pMv; + pCurMb->sMv[1 + kiScan4Idx] = *pMv; + //cache + pMvComp->iRefIndexCache[ kiCacheIdx] = kiRef; + pMvComp->iRefIndexCache[1 + kiCacheIdx] = kiRef; + pMvComp->sMotionVectorCache[ kiCacheIdx] = *pMv; + pMvComp->sMotionVectorCache[1 + kiCacheIdx] = *pMv; +} +//update uiRefIndex and pMv of both SMB and Mb_cache, only for P4x8 +void UpdateP4x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, + SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; + const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; + + //mb + pCurMb->sMv[ kiScan4Idx] = *pMv; + pCurMb->sMv[4 + kiScan4Idx] = *pMv; + //cache + pMvComp->iRefIndexCache[ kiCacheIdx] = kiRef; + pMvComp->iRefIndexCache[6 + kiCacheIdx] = kiRef; + pMvComp->sMotionVectorCache[ kiCacheIdx] = *pMv; + pMvComp->sMotionVectorCache[6 + kiCacheIdx] = *pMv; +} +//=========================update motion info(MV and ref_idx) into Mb_cache========================== +//update pMv and uiRefIndex cache only for Mb_cache, only for P_16*16 (SKIP inclusive) + +//update uiRefIndex and pMv of only Mb_cache, only for P16x8 +void UpdateP16x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + int32_t i; + + for (i = 0; i < 2; i++, iPartIdx += 4) { + //cache + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + pMvComp->iRefIndexCache[ kuiCacheIdx] = + pMvComp->iRefIndexCache[1 + kuiCacheIdx] = + pMvComp->iRefIndexCache[6 + kuiCacheIdx] = + pMvComp->iRefIndexCache[7 + kuiCacheIdx] = iRef; + pMvComp->sMotionVectorCache[ kuiCacheIdx] = + pMvComp->sMotionVectorCache[1 + kuiCacheIdx] = + pMvComp->sMotionVectorCache[6 + kuiCacheIdx] = + pMvComp->sMotionVectorCache[7 + kuiCacheIdx] = *pMv; + } +} +//update uiRefIndex and pMv of only Mb_cache, only for P8x16 +void UpdateP8x16Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + int32_t i; + + for (i = 0; i < 2; i++, iPartIdx += 8) { + //cache + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + pMvComp->iRefIndexCache[ kuiCacheIdx] = + pMvComp->iRefIndexCache[1 + kuiCacheIdx] = + pMvComp->iRefIndexCache[6 + kuiCacheIdx] = + pMvComp->iRefIndexCache[7 + kuiCacheIdx] = iRef; + pMvComp->sMotionVectorCache[ kuiCacheIdx] = + pMvComp->sMotionVectorCache[1 + kuiCacheIdx] = + pMvComp->sMotionVectorCache[6 + kuiCacheIdx] = + pMvComp->sMotionVectorCache[7 + kuiCacheIdx] = *pMv; + } +} + +//update uiRefIndex and pMv of only Mb_cache, only for P8x8 +void UpdateP8x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + pMvComp->iRefIndexCache[ kuiCacheIdx] = + pMvComp->iRefIndexCache[1 + kuiCacheIdx] = + pMvComp->iRefIndexCache[6 + kuiCacheIdx] = + pMvComp->iRefIndexCache[7 + kuiCacheIdx] = pRef; + pMvComp->sMotionVectorCache[ kuiCacheIdx] = + pMvComp->sMotionVectorCache[1 + kuiCacheIdx] = + pMvComp->sMotionVectorCache[6 + kuiCacheIdx] = + pMvComp->sMotionVectorCache[7 + kuiCacheIdx] = *pMv; +} + +//update uiRefIndex and pMv of only Mb_cache, for P4x4 +void UpdateP4x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + pMvComp->iRefIndexCache [kuiCacheIdx] = pRef; + pMvComp->sMotionVectorCache[kuiCacheIdx] = *pMv; +} + +//update uiRefIndex and pMv of only Mb_cache, for P8x4 +void UpdateP8x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + pMvComp->iRefIndexCache [ kuiCacheIdx] = + pMvComp->iRefIndexCache [1 + kuiCacheIdx] = pRef; + pMvComp->sMotionVectorCache [ kuiCacheIdx] = + pMvComp->sMotionVectorCache[1 + kuiCacheIdx] = *pMv; +} + +//update uiRefIndex and pMv of only Mb_cache, for P4x8 +void UpdateP4x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + pMvComp->iRefIndexCache [ kuiCacheIdx] = + pMvComp->iRefIndexCache [6 + kuiCacheIdx] = pRef; + pMvComp->sMotionVectorCache [ kuiCacheIdx] = + pMvComp->sMotionVectorCache[6 + kuiCacheIdx] = *pMv; +} +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/nal_encap.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/nal_encap.cpp new file mode 100644 index 000000000..019d86ff6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/nal_encap.cpp @@ -0,0 +1,198 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file nal_encap.c + * + * \brief NAL pRawNal pData encapsulation + * + * \date 5/25/2009 Created + * + *************************************************************************************/ +#include "nal_encap.h" +#include "svc_enc_golomb.h" +#include "ls_defines.h" +namespace WelsEnc { +/*! + * \brief load an initialize NAL pRawNal pData + */ +void WelsLoadNal (SWelsEncoderOutput* pEncoderOuput, const int32_t/*EWelsNalUnitType*/ kiType, + const int32_t/*EWelsNalRefIdc*/ kiNalRefIdc) { + SWelsEncoderOutput* pWelsEncoderOuput = pEncoderOuput; + SWelsNalRaw* pRawNal = &pWelsEncoderOuput->sNalList[ pWelsEncoderOuput->iNalIndex ]; + SNalUnitHeader* sNalUnitHeader = &pRawNal->sNalExt.sNalUnitHeader; + const int32_t kiStartPos = (BsGetBitsPos (&pWelsEncoderOuput->sBsWrite) >> 3); + + sNalUnitHeader->eNalUnitType = (EWelsNalUnitType)kiType; + sNalUnitHeader->uiNalRefIdc = (EWelsNalRefIdc)kiNalRefIdc; + sNalUnitHeader->uiForbiddenZeroBit = 0; + + pRawNal->pRawData = &pWelsEncoderOuput->pBsBuffer[kiStartPos]; + pRawNal->iStartPos = kiStartPos; + pRawNal->iPayloadSize = 0; +} + +/*! + * \brief unload pRawNal NAL + */ +void WelsUnloadNal (SWelsEncoderOutput* pEncoderOuput) { + SWelsEncoderOutput* pWelsEncoderOuput = pEncoderOuput; + int32_t* pIdx = &pWelsEncoderOuput->iNalIndex; + SWelsNalRaw* pRawNal = &pWelsEncoderOuput->sNalList[ *pIdx ]; + const int32_t kiEndPos = (BsGetBitsPos (&pWelsEncoderOuput->sBsWrite) >> 3); + + /* count payload size of pRawNal NAL */ + pRawNal->iPayloadSize = kiEndPos - pRawNal->iStartPos; + + ++ (*pIdx); +} + +/*! + * \brief load an initialize NAL pRawNal pData + */ +void WelsLoadNalForSlice (SWelsSliceBs* pSliceBs, const int32_t/*EWelsNalUnitType*/ kiType, + const int32_t/*EWelsNalRefIdc*/ kiNalRefIdc) { + SWelsNalRaw* pRawNal = &pSliceBs->sNalList[ pSliceBs->iNalIndex ]; + SNalUnitHeader* sNalUnitHeader = &pRawNal->sNalExt.sNalUnitHeader; + SBitStringAux* pBitStringAux = &pSliceBs->sBsWrite; + const int32_t kiStartPos = (BsGetBitsPos (pBitStringAux) >> 3); + + sNalUnitHeader->eNalUnitType = (EWelsNalUnitType)kiType; + sNalUnitHeader->uiNalRefIdc = (EWelsNalRefIdc)kiNalRefIdc; + sNalUnitHeader->uiForbiddenZeroBit = 0; + + pRawNal->pRawData = &pSliceBs->pBsBuffer[kiStartPos]; + pRawNal->iStartPos = kiStartPos; + pRawNal->iPayloadSize = 0; +} + +/*! + * \brief unload pRawNal NAL + */ +void WelsUnloadNalForSlice (SWelsSliceBs* pSliceBs) { + int32_t* pIdx = &pSliceBs->iNalIndex; + SWelsNalRaw* pRawNal = &pSliceBs->sNalList[ *pIdx ]; + SBitStringAux* pBitStringAux = &pSliceBs->sBsWrite; + const int32_t kiEndPos = (BsGetBitsPos (pBitStringAux) >> 3); + + /* count payload size of pRawNal NAL */ + pRawNal->iPayloadSize = kiEndPos - pRawNal->iStartPos; + ++ (*pIdx); +} + +/*! + * \brief encode NAL with emulation forbidden three bytes checking + * \param pDst pDst NAL pData + * \param pDstLen length of pDst NAL output + * \param annexeb annexeb flag + * \param pRawNal pRawNal NAL pData + * \return ERRCODE + */ +//TODO 1: refactor the calling of this func in multi-thread +//TODO 2: complete the realloc© +int32_t WelsEncodeNal (SWelsNalRaw* pRawNal, void* pNalHeaderExt, const int32_t kiDstBufferLen, void* pDst, + int32_t* pDstLen) { + const bool kbNALExt = pRawNal->sNalExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_PREFIX + || pRawNal->sNalExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_EXT; + int32_t iAssumedNeededLength = NAL_HEADER_SIZE + (kbNALExt ? 3 : 0) + pRawNal->iPayloadSize + 1; + WELS_VERIFY_RETURN_IF (ENC_RETURN_UNEXPECTED, (iAssumedNeededLength <= 0)) + + //since for each 0x000 need a 0x03, so the needed length will not exceed (iAssumeNeedLenth + iAssumeNeedLength/3), here adjust to >>1 to omit division + if (kiDstBufferLen < (iAssumedNeededLength + (iAssumedNeededLength >> 1))) { + return ENC_RETURN_MEMALLOCERR; + //TODO: call the realloc© instead + } + uint8_t* pDstStart = (uint8_t*)pDst; + uint8_t* pDstPointer = pDstStart; + uint8_t* pSrcPointer = pRawNal->pRawData; + uint8_t* pSrcEnd = pRawNal->pRawData + pRawNal->iPayloadSize; + int32_t iZeroCount = 0; + int32_t iNalLength = 0; + *pDstLen = 0; + + static const uint8_t kuiStartCodePrefix[NAL_HEADER_SIZE] = { 0, 0, 0, 1 }; + ST32 (pDstPointer, LD32 (&kuiStartCodePrefix[0])); + pDstPointer += 4; + + /* NAL Unit Header */ + *pDstPointer++ = (pRawNal->sNalExt.sNalUnitHeader.uiNalRefIdc << 5) | (pRawNal->sNalExt.sNalUnitHeader.eNalUnitType & + 0x1f); + + if (kbNALExt) { + SNalUnitHeaderExt* sNalExt = (SNalUnitHeaderExt*)pNalHeaderExt; + + /* NAL UNIT Extension Header */ + *pDstPointer++ = (0x80) | + (sNalExt->bIdrFlag << 6); + + *pDstPointer++ = (0x80) | + (sNalExt->uiDependencyId << 4); + + *pDstPointer++ = (sNalExt->uiTemporalId << 5) | + (sNalExt->bDiscardableFlag << 3) | + (0x07); + } + + while (pSrcPointer < pSrcEnd) { + if (iZeroCount == 2 && *pSrcPointer <= 3) { + //add the code 03 + *pDstPointer++ = 3; + iZeroCount = 0; + } + if (*pSrcPointer == 0) { + ++ iZeroCount; + } else { + iZeroCount = 0; + } + *pDstPointer++ = *pSrcPointer++; + } + + /* count length of NAL Unit */ + iNalLength = (int32_t) (pDstPointer - pDstStart); + if (NULL != pDstLen) + *pDstLen = iNalLength; + + return ENC_RETURN_SUCCESS; +} + +/*! + * \brief write prefix nal + */ +int32_t WelsWriteSVCPrefixNal (SBitStringAux* pBitStringAux, const int32_t kiNalRefIdc, + const bool kbIdrFlag) { + if (0 < kiNalRefIdc) { + BsWriteOneBit (pBitStringAux, false/*bStoreRefBasePicFlag*/); + BsWriteOneBit (pBitStringAux, false); + BsRbspTrailingBits (pBitStringAux); + } + return 0; +} + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/paraset_strategy.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/paraset_strategy.cpp new file mode 100644 index 000000000..a506362a4 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/paraset_strategy.cpp @@ -0,0 +1,713 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "au_set.h" +#include "encoder_context.h" +#include "paraset_strategy.h" + +namespace WelsEnc { + + +IWelsParametersetStrategy* IWelsParametersetStrategy::CreateParametersetStrategy ( + EParameterSetStrategy eSpsPpsIdStrategy, bool bSimulcastAVC, + const int32_t kiSpatialLayerNum) { + + IWelsParametersetStrategy* pParametersetStrategy = NULL; + switch (eSpsPpsIdStrategy) { + case INCREASING_ID: + pParametersetStrategy = WELS_NEW_OP (CWelsParametersetIdIncreasing (bSimulcastAVC, kiSpatialLayerNum), + CWelsParametersetIdIncreasing); + WELS_VERIFY_RETURN_IF (NULL, NULL == pParametersetStrategy) + break; + case SPS_LISTING: + pParametersetStrategy = WELS_NEW_OP (CWelsParametersetSpsListing (bSimulcastAVC, kiSpatialLayerNum), + CWelsParametersetSpsListing); + WELS_VERIFY_RETURN_IF (NULL, NULL == pParametersetStrategy) + break; + case SPS_LISTING_AND_PPS_INCREASING: + pParametersetStrategy = WELS_NEW_OP (CWelsParametersetSpsListingPpsIncreasing (bSimulcastAVC, kiSpatialLayerNum), + CWelsParametersetSpsListingPpsIncreasing); + WELS_VERIFY_RETURN_IF (NULL, NULL == pParametersetStrategy) + break; + case SPS_PPS_LISTING: + pParametersetStrategy = WELS_NEW_OP (CWelsParametersetSpsPpsListing (bSimulcastAVC, kiSpatialLayerNum), + CWelsParametersetSpsPpsListing); + WELS_VERIFY_RETURN_IF (NULL, NULL == pParametersetStrategy) + break; + case CONSTANT_ID: + default: + pParametersetStrategy = WELS_NEW_OP (CWelsParametersetIdConstant (bSimulcastAVC, kiSpatialLayerNum), + CWelsParametersetIdConstant); + WELS_VERIFY_RETURN_IF (NULL, NULL == pParametersetStrategy) + break; + } + + return pParametersetStrategy; +} + + +static int32_t WelsGenerateNewSps (sWelsEncCtx* pCtx, const bool kbUseSubsetSps, const int32_t iDlayerIndex, + const int32_t iDlayerCount, const int32_t kiSpsId, + SWelsSPS*& pSps, SSubsetSps*& pSubsetSps, bool bSVCBaselayer) { + int32_t iRet = 0; + + if (!kbUseSubsetSps) { + pSps = & (pCtx->pSpsArray[kiSpsId]); + } else { + pSubsetSps = & (pCtx->pSubsetArray[kiSpsId]); + pSps = &pSubsetSps->pSps; + } + + SWelsSvcCodingParam* pParam = pCtx->pSvcParam; + SSpatialLayerConfig* pDlayerParam = &pParam->sSpatialLayers[iDlayerIndex]; + // Need port pSps/pPps initialization due to spatial scalability changed + if (!kbUseSubsetSps) { + iRet = WelsInitSps (pSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod, + pParam->iMaxNumRefFrame, + kiSpsId, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount, + bSVCBaselayer); + } else { + iRet = WelsInitSubsetSps (pSubsetSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod, + pParam->iMaxNumRefFrame, + kiSpsId, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount); + } + return iRet; +} + +static bool CheckMatchedSps (SWelsSPS* const pSps1, SWelsSPS* const pSps2) { + + if ((pSps1->iMbWidth != pSps2->iMbWidth) + || (pSps1->iMbHeight != pSps2->iMbHeight)) { + return false; + } + + if ((pSps1->uiLog2MaxFrameNum != pSps2->uiLog2MaxFrameNum) + || (pSps1->iLog2MaxPocLsb != pSps2->iLog2MaxPocLsb)) { + return false; + } + + if (pSps1->iNumRefFrames != pSps2->iNumRefFrames) { + return false; + } + + if ((pSps1->bFrameCroppingFlag != pSps2->bFrameCroppingFlag) + || (pSps1->sFrameCrop.iCropLeft != pSps2->sFrameCrop.iCropLeft) + || (pSps1->sFrameCrop.iCropRight != pSps2->sFrameCrop.iCropRight) + || (pSps1->sFrameCrop.iCropTop != pSps2->sFrameCrop.iCropTop) + || (pSps1->sFrameCrop.iCropBottom != pSps2->sFrameCrop.iCropBottom) + ) { + return false; + } + + if ((pSps1->uiProfileIdc != pSps2->uiProfileIdc) + || (pSps1->bConstraintSet0Flag != pSps2->bConstraintSet0Flag) + || (pSps1->bConstraintSet1Flag != pSps2->bConstraintSet1Flag) + || (pSps1->bConstraintSet2Flag != pSps2->bConstraintSet2Flag) + || (pSps1->bConstraintSet3Flag != pSps2->bConstraintSet3Flag) + || (pSps1->iLevelIdc != pSps2->iLevelIdc)) { + return false; + } + + return true; +} + +static bool CheckMatchedSubsetSps (SSubsetSps* const pSubsetSps1, SSubsetSps* const pSubsetSps2) { + if (!CheckMatchedSps (&pSubsetSps1->pSps, &pSubsetSps2->pSps)) { + return false; + } + + if ((pSubsetSps1->sSpsSvcExt.iExtendedSpatialScalability != pSubsetSps2->sSpsSvcExt.iExtendedSpatialScalability) + || (pSubsetSps1->sSpsSvcExt.bAdaptiveTcoeffLevelPredFlag != pSubsetSps2->sSpsSvcExt.bAdaptiveTcoeffLevelPredFlag) + || (pSubsetSps1->sSpsSvcExt.bSeqTcoeffLevelPredFlag != pSubsetSps2->sSpsSvcExt.bSeqTcoeffLevelPredFlag) + || (pSubsetSps1->sSpsSvcExt.bSliceHeaderRestrictionFlag != pSubsetSps2->sSpsSvcExt.bSliceHeaderRestrictionFlag)) { + return false; + } + + return true; +} + +/*! + * \brief check if the current parameter can found a presenting sps + * \param pParam the current encoding paramter in SWelsSvcCodingParam + * \param kbUseSubsetSps bool + * \param iDlayerIndex int, the index of current D layer + * \param iDlayerCount int, the number of total D layer + * \param pSpsArray array of all the stored SPSs + * \param pSubsetArray array of all the stored Subset-SPSs + * \return 0 - successful + * -1 - cannot find existing SPS for current encoder parameter + */ +int32_t FindExistingSps (SWelsSvcCodingParam* pParam, const bool kbUseSubsetSps, const int32_t iDlayerIndex, + const int32_t iDlayerCount, const int32_t iSpsNumInUse, + SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray, bool bSVCBaseLayer) { + SSpatialLayerConfig* pDlayerParam = &pParam->sSpatialLayers[iDlayerIndex]; + + assert (iSpsNumInUse <= MAX_SPS_COUNT); + if (!kbUseSubsetSps) { + SWelsSPS sTmpSps; + WelsInitSps (&sTmpSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod, + pParam->iMaxNumRefFrame, + 0, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount, + bSVCBaseLayer); + for (int32_t iId = 0; iId < iSpsNumInUse; iId++) { + if (CheckMatchedSps (&sTmpSps, &pSpsArray[iId])) { + return iId; + } + } + } else { + SSubsetSps sTmpSubsetSps; + WelsInitSubsetSps (&sTmpSubsetSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod, + pParam->iMaxNumRefFrame, + 0, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount); + + for (int32_t iId = 0; iId < iSpsNumInUse; iId++) { + if (CheckMatchedSubsetSps (&sTmpSubsetSps, &pSubsetArray[iId])) { + return iId; + } + } + } + + return INVALID_ID; +} + +CWelsParametersetIdConstant::CWelsParametersetIdConstant (const bool bSimulcastAVC, const int32_t kiSpatialLayerNum) { + memset (&m_sParaSetOffset, 0, sizeof (m_sParaSetOffset)); + + m_bSimulcastAVC = bSimulcastAVC; + m_iSpatialLayerNum = kiSpatialLayerNum; + + m_iBasicNeededSpsNum = 1; + m_iBasicNeededPpsNum = (1 + m_iSpatialLayerNum); +} + +CWelsParametersetIdConstant::~CWelsParametersetIdConstant() { +} + +int32_t CWelsParametersetIdConstant::GetPpsIdOffset (const int32_t iPpsId) { + return 0; +}; +int32_t CWelsParametersetIdConstant::GetSpsIdOffset (const int32_t iPpsId, const int32_t iSpsId) { + return 0; +}; + +int32_t* CWelsParametersetIdConstant::GetSpsIdOffsetList (const int iParasetType) { + return & (m_sParaSetOffset.sParaSetOffsetVariable[iParasetType].iParaSetIdDelta[0]); +} + +uint32_t CWelsParametersetIdConstant::GetAllNeededParasetNum() { + return (GetNeededSpsNum() + + GetNeededSubsetSpsNum() + + GetNeededPpsNum()); +} + +uint32_t CWelsParametersetIdConstant::GetNeededSpsNum() { + if (0 >= m_sParaSetOffset.uiNeededSpsNum) { + m_sParaSetOffset.uiNeededSpsNum = m_iBasicNeededSpsNum * ((m_bSimulcastAVC) ? (m_iSpatialLayerNum) : (1)); + } + return m_sParaSetOffset.uiNeededSpsNum; +} + + +uint32_t CWelsParametersetIdConstant::GetNeededSubsetSpsNum() { + if (0 >= m_sParaSetOffset.uiNeededSubsetSpsNum) { + m_sParaSetOffset.uiNeededSubsetSpsNum = (m_bSimulcastAVC ? 0 : (m_iSpatialLayerNum - 1)); + } + return m_sParaSetOffset.uiNeededSubsetSpsNum; +} + +uint32_t CWelsParametersetIdConstant::GetNeededPpsNum() { + if (0 == m_sParaSetOffset.uiNeededPpsNum) { + m_sParaSetOffset.uiNeededPpsNum = m_iBasicNeededPpsNum * ((m_bSimulcastAVC) ? (m_iSpatialLayerNum) : + (1)); + } + return m_sParaSetOffset.uiNeededPpsNum; +} + +void CWelsParametersetIdConstant::LoadPrevious (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray, SWelsPPS* pPpsArray) { + return; +} + +void CWelsParametersetIdConstant::Update (const uint32_t kuiId, const int iParasetType) { + memset (&m_sParaSetOffset, 0, sizeof (SParaSetOffset)); +} + +uint32_t CWelsParametersetIdConstant::GenerateNewSps (sWelsEncCtx* pCtx, const bool kbUseSubsetSps, + const int32_t iDlayerIndex, + const int32_t iDlayerCount, uint32_t kuiSpsId, + SWelsSPS*& pSps, SSubsetSps*& pSubsetSps, bool bSVCBaselayer) { + WelsGenerateNewSps (pCtx, kbUseSubsetSps, iDlayerIndex, + iDlayerCount, kuiSpsId, + pSps, pSubsetSps, bSVCBaselayer); + return kuiSpsId; +} + + +uint32_t CWelsParametersetIdConstant::InitPps (sWelsEncCtx* pCtx, uint32_t kiSpsId, + SWelsSPS* pSps, + SSubsetSps* pSubsetSps, + uint32_t kuiPpsId, + const bool kbDeblockingFilterPresentFlag, + const bool kbUsingSubsetSps, + const bool kbEntropyCodingModeFlag) { + WelsInitPps (& pCtx->pPPSArray[kuiPpsId], pSps, pSubsetSps, kuiPpsId, true, kbUsingSubsetSps, kbEntropyCodingModeFlag); + SetUseSubsetFlag (kuiPpsId, kbUsingSubsetSps); + return kuiPpsId; +} + +void CWelsParametersetIdConstant::SetUseSubsetFlag (const uint32_t iPpsId, const bool bUseSubsetSps) { + m_sParaSetOffset.bPpsIdMappingIntoSubsetsps[iPpsId] = bUseSubsetSps; +} + +void CWelsParametersetIdNonConstant::OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, + int32_t* pPpsIdList, sWelsEncCtx* pCtx, SExistingParasetList* pExistingParasetList) { + for (int32_t k = 0; k < PARA_SET_TYPE; k++) { + memset ((m_sParaSetOffset.sParaSetOffsetVariable[k].bUsedParaSetIdInBs), 0, MAX_PPS_COUNT * sizeof (bool)); + } + memcpy (pParaSetOffsetVariable, m_sParaSetOffset.sParaSetOffsetVariable, + (PARA_SET_TYPE)*sizeof (SParaSetOffsetVariable)); // confirmed_safe_unsafe_usage +} +void CWelsParametersetIdNonConstant::LoadPreviousStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, + int32_t* pPpsIdList) { + memcpy (m_sParaSetOffset.sParaSetOffsetVariable, pParaSetOffsetVariable, + (PARA_SET_TYPE)*sizeof (SParaSetOffsetVariable)); // confirmed_safe_unsafe_usage +} + +// +//CWelsParametersetIdIncreasing +// + +void CWelsParametersetIdIncreasing::DebugSpsPps (const int32_t kiPpsId, const int32_t kiSpsId) { +#if _DEBUG + //SParaSetOffset use, 110421 + //if ( (INCREASING_ID & eSpsPpsIdStrategy)) { + const int32_t kiParameterSetType = (m_sParaSetOffset.bPpsIdMappingIntoSubsetsps[kiPpsId] ? + PARA_SET_TYPE_SUBSETSPS : PARA_SET_TYPE_AVCSPS) ; + + const int32_t kiTmpSpsIdInBs = kiSpsId + + m_sParaSetOffset.sParaSetOffsetVariable[kiParameterSetType].iParaSetIdDelta[kiSpsId]; + const int32_t tmp_pps_id_in_bs = kiPpsId + + m_sParaSetOffset.sParaSetOffsetVariable[PARA_SET_TYPE_PPS].iParaSetIdDelta[kiPpsId]; + assert (MAX_SPS_COUNT > kiTmpSpsIdInBs); + assert (MAX_PPS_COUNT > tmp_pps_id_in_bs); + assert (m_sParaSetOffset.sParaSetOffsetVariable[kiParameterSetType].bUsedParaSetIdInBs[kiTmpSpsIdInBs]); + //} +#endif +} +void CWelsParametersetIdIncreasing::DebugPps (const int32_t kiPpsId) { +#if _DEBUG + const int32_t kiTmpPpsIdInBs = kiPpsId + + m_sParaSetOffset.sParaSetOffsetVariable[PARA_SET_TYPE_PPS].iParaSetIdDelta[ kiPpsId ]; + assert (MAX_PPS_COUNT > kiTmpPpsIdInBs); + + //when activated need to sure there is avialable PPS + assert (m_sParaSetOffset.sParaSetOffsetVariable[PARA_SET_TYPE_PPS].bUsedParaSetIdInBs[kiTmpPpsIdInBs]); +#endif +} + +void ParasetIdAdditionIdAdjust (SParaSetOffsetVariable* sParaSetOffsetVariable, + const int32_t kiCurEncoderParaSetId, + const uint32_t kuiMaxIdInBs) { //paraset_type = 0: SPS; =1: PPS + //SPS_ID in avc_sps and pSubsetSps will be different using this + //SPS_ID case example: + //1st enter: next_spsid_in_bs == 0; spsid == 0; delta==0; //actual spsid_in_bs == 0 + //1st finish: next_spsid_in_bs == 1; + //2nd enter: next_spsid_in_bs == 1; spsid == 0; delta==1; //actual spsid_in_bs == 1 + //2nd finish: next_spsid_in_bs == 2; + //31st enter: next_spsid_in_bs == 31; spsid == 0~2; delta==31~29; //actual spsid_in_bs == 31 + //31st finish:next_spsid_in_bs == 0; + //31st enter: next_spsid_in_bs == 0; spsid == 0~2; delta==-2~0; //actual spsid_in_bs == 0 + //31st finish:next_spsid_in_bs == 1; + + const int32_t kiEncId = kiCurEncoderParaSetId; + uint32_t uiNextIdInBs = sParaSetOffsetVariable->uiNextParaSetIdToUseInBs; + + //update current layer's pCodingParam + sParaSetOffsetVariable->iParaSetIdDelta[kiEncId] = uiNextIdInBs - + kiEncId; //for current parameter set, change its id_delta + //write pso pData for next update: + sParaSetOffsetVariable->bUsedParaSetIdInBs[uiNextIdInBs] = true; // update current used_id + + //prepare for next update: + // find the next avaibable iId + ++uiNextIdInBs; + if (uiNextIdInBs >= kuiMaxIdInBs) { + uiNextIdInBs = 0;//ensure the SPS_ID wound not exceed MAX_SPS_COUNT + } + // update next_id + sParaSetOffsetVariable->uiNextParaSetIdToUseInBs = uiNextIdInBs; +} + +void CWelsParametersetIdIncreasing::Update (const uint32_t kuiId, const int iParasetType) { +#if _DEBUG + m_sParaSetOffset.eSpsPpsIdStrategy = INCREASING_ID; + assert (kuiId < MAX_DQ_LAYER_NUM); +#endif + + ParasetIdAdditionIdAdjust (& (m_sParaSetOffset.sParaSetOffsetVariable[iParasetType]), + kuiId, + (iParasetType != PARA_SET_TYPE_PPS) ? MAX_SPS_COUNT : MAX_PPS_COUNT); +} +//((SPS_PPS_LISTING != pEncCtx->pSvcParam->eSpsPpsIdStrategy) ? (& +// (pEncCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_PPS].iParaSetIdDelta[0])) : NULL) + +int32_t CWelsParametersetIdIncreasing::GetPpsIdOffset (const int32_t kiPpsId) { +#if _DEBUG + DebugPps (kiPpsId); +#endif + return (m_sParaSetOffset.sParaSetOffsetVariable[PARA_SET_TYPE_PPS].iParaSetIdDelta[kiPpsId]); +} + +int32_t CWelsParametersetIdIncreasing::GetSpsIdOffset (const int32_t kiPpsId, const int32_t kiSpsId) { + const int32_t kiParameterSetType = (m_sParaSetOffset.bPpsIdMappingIntoSubsetsps[kiPpsId] ? + PARA_SET_TYPE_SUBSETSPS : PARA_SET_TYPE_AVCSPS); +#if _DEBUG + DebugSpsPps (kiPpsId, kiSpsId); +#endif + return (m_sParaSetOffset.sParaSetOffsetVariable[kiParameterSetType].iParaSetIdDelta[kiSpsId]); +} + +// +//CWelsParametersetSpsListing +// + +CWelsParametersetSpsListing::CWelsParametersetSpsListing (const bool bSimulcastAVC, + const int32_t kiSpatialLayerNum) : CWelsParametersetIdNonConstant (bSimulcastAVC, kiSpatialLayerNum) { + memset (&m_sParaSetOffset, 0, sizeof (m_sParaSetOffset)); + + m_bSimulcastAVC = bSimulcastAVC; + m_iSpatialLayerNum = kiSpatialLayerNum; + + m_iBasicNeededSpsNum = MAX_SPS_COUNT; + m_iBasicNeededPpsNum = 1; +} + +uint32_t CWelsParametersetSpsListing::GetNeededSubsetSpsNum() { + if (0 >= m_sParaSetOffset.uiNeededSubsetSpsNum) { + // sPSOVector.uiNeededSubsetSpsNum = ((pSvcParam->bSimulcastAVC) ? (0) :((SPS_LISTING & pSvcParam->eSpsPpsIdStrategy) ? (MAX_SPS_COUNT) : (pSvcParam->iSpatialLayerNum - 1))); + m_sParaSetOffset.uiNeededSubsetSpsNum = ((m_bSimulcastAVC) ? (0) : + (MAX_SPS_COUNT)); + } + return m_sParaSetOffset.uiNeededSubsetSpsNum; +} + +void CWelsParametersetSpsListing::LoadPreviousSps (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray) { + //if ((SPS_LISTING & pParam->eSpsPpsIdStrategy) && (NULL != pExistingParasetList)) { + m_sParaSetOffset.uiInUseSpsNum = pExistingParasetList->uiInUseSpsNum; + memcpy (pSpsArray, pExistingParasetList->sSps, MAX_SPS_COUNT * sizeof (SWelsSPS)); + + if (GetNeededSubsetSpsNum() > 0) { + m_sParaSetOffset.uiInUseSubsetSpsNum = pExistingParasetList->uiInUseSubsetSpsNum; + memcpy (pSubsetArray, pExistingParasetList->sSubsetSps, MAX_SPS_COUNT * sizeof (SSubsetSps)); + } else { + m_sParaSetOffset.uiInUseSubsetSpsNum = 0; + } + //} + +} +void CWelsParametersetSpsListing::LoadPrevious (SExistingParasetList* pExistingParasetList, SWelsSPS* pSpsArray, + SSubsetSps* pSubsetArray, SWelsPPS* pPpsArray) { + if (NULL == pExistingParasetList) { + return; + } + LoadPreviousSps (pExistingParasetList, pSpsArray, pSubsetArray); + LoadPreviousPps (pExistingParasetList, pPpsArray); +} + +bool CWelsParametersetSpsListing::CheckParamCompatibility (SWelsSvcCodingParam* pCodingParam, SLogContext* pLogCtx) { + if (pCodingParam->iSpatialLayerNum > 1 && (!pCodingParam->bSimulcastAVC)) { + WelsLog (pLogCtx, WELS_LOG_WARNING, + "ParamValidationExt(), eSpsPpsIdStrategy setting (%d) with multiple svc SpatialLayers (%d) not supported! eSpsPpsIdStrategy adjusted to CONSTANT_ID", + pCodingParam->eSpsPpsIdStrategy, pCodingParam->iSpatialLayerNum); + pCodingParam->eSpsPpsIdStrategy = CONSTANT_ID; + return false; + } + return true; +} + +bool CWelsParametersetSpsListing::CheckPpsGenerating() { + return true; +} +int32_t CWelsParametersetSpsListing::SpsReset (sWelsEncCtx* pCtx, bool kbUseSubsetSps) { + + // reset current list + if (!kbUseSubsetSps) { + m_sParaSetOffset.uiInUseSpsNum = 1; + memset (pCtx->pSpsArray, 0, MAX_SPS_COUNT * sizeof (SWelsSPS)); + } else { + m_sParaSetOffset.uiInUseSubsetSpsNum = 1; + memset (pCtx->pSubsetArray, 0, MAX_SPS_COUNT * sizeof (SSubsetSps)); + } + + //iSpsId = 0; + return 0; +} +uint32_t CWelsParametersetSpsListing::GenerateNewSps (sWelsEncCtx* pCtx, const bool kbUseSubsetSps, + const int32_t iDlayerIndex, + const int32_t iDlayerCount, uint32_t kuiSpsId, + SWelsSPS*& pSps, SSubsetSps*& pSubsetSps, bool bSvcBaselayer) { + //check if the current param can fit in an existing SPS + const int32_t kiFoundSpsId = FindExistingSps (pCtx->pSvcParam, kbUseSubsetSps, iDlayerIndex, iDlayerCount, + kbUseSubsetSps ? (m_sParaSetOffset.uiInUseSubsetSpsNum) : (m_sParaSetOffset.uiInUseSpsNum), + pCtx->pSpsArray, + pCtx->pSubsetArray, bSvcBaselayer); + + + if (INVALID_ID != kiFoundSpsId) { + //if yes, set pSps or pSubsetSps to it + kuiSpsId = kiFoundSpsId; + if (!kbUseSubsetSps) { + pSps = & (pCtx->pSpsArray[kiFoundSpsId]); + } else { + pSubsetSps = & (pCtx->pSubsetArray[kiFoundSpsId]); + } + } else { + //if no, generate a new SPS as usual + if (!CheckPpsGenerating()) { + return -1; + } + + kuiSpsId = (!kbUseSubsetSps) ? (m_sParaSetOffset.uiInUseSpsNum++) : (m_sParaSetOffset.uiInUseSubsetSpsNum++); + if (kuiSpsId >= MAX_SPS_COUNT) { + if (SpsReset (pCtx, kbUseSubsetSps) < 0) { + return -1; + } + kuiSpsId = 0; + } + + WelsGenerateNewSps (pCtx, kbUseSubsetSps, iDlayerIndex, + iDlayerCount, kuiSpsId, pSps, pSubsetSps, bSvcBaselayer); + } + return kuiSpsId; +} + +void CWelsParametersetSpsListing::UpdateParaSetNum (sWelsEncCtx* pCtx) { + pCtx->iSpsNum = m_sParaSetOffset.uiInUseSpsNum; + pCtx->iSubsetSpsNum = m_sParaSetOffset.uiInUseSubsetSpsNum; +}; + +void CWelsParametersetSpsListing::OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, + int32_t* pPpsIdList, sWelsEncCtx* pCtx, SExistingParasetList* pExistingParasetList) { + CWelsParametersetIdNonConstant::OutputCurrentStructure (pParaSetOffsetVariable, pPpsIdList, pCtx, pExistingParasetList); + pExistingParasetList->uiInUseSpsNum = m_sParaSetOffset.uiInUseSpsNum; + + memcpy (pExistingParasetList->sSps, pCtx->pSpsArray, MAX_SPS_COUNT * sizeof (SWelsSPS)); + if (NULL != pCtx->pSubsetArray) { + pExistingParasetList->uiInUseSubsetSpsNum = m_sParaSetOffset.uiInUseSubsetSpsNum; + memcpy (pExistingParasetList->sSubsetSps, pCtx->pSubsetArray, MAX_SPS_COUNT * sizeof (SSubsetSps)); + } else { + pExistingParasetList->uiInUseSubsetSpsNum = 0; + } +} + +// +//CWelsParametersetSpsPpsListing +// + +CWelsParametersetSpsPpsListing::CWelsParametersetSpsPpsListing (const bool bSimulcastAVC, + const int32_t kiSpatialLayerNum): CWelsParametersetSpsListing (bSimulcastAVC, kiSpatialLayerNum) { + memset (&m_sParaSetOffset, 0, sizeof (m_sParaSetOffset)); + + m_bSimulcastAVC = bSimulcastAVC; + m_iSpatialLayerNum = kiSpatialLayerNum; + + m_iBasicNeededSpsNum = MAX_SPS_COUNT; + m_iBasicNeededPpsNum = MAX_PPS_COUNT; +} + +void CWelsParametersetSpsPpsListing::LoadPreviousPps (SExistingParasetList* pExistingParasetList, SWelsPPS* pPpsArray) { + // copy from existing if the pointer exists + //if ((SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy) && (NULL != pExistingParasetList)) { + m_sParaSetOffset.uiInUsePpsNum = pExistingParasetList->uiInUsePpsNum; + memcpy (pPpsArray, pExistingParasetList->sPps, MAX_PPS_COUNT * sizeof (SWelsPPS)); + //} +} + +/* if ((SPS_PPS_LISTING == pCtx->pSvcParam->eSpsPpsIdStrategy) && (pCtx->iPpsNum < MAX_PPS_COUNT)) { + UpdatePpsList (pCtx); + }*/ +void CWelsParametersetSpsPpsListing::UpdatePpsList (sWelsEncCtx* pCtx) { + if (pCtx->iPpsNum >= MAX_PPS_COUNT) { + return; + } + assert (pCtx->iPpsNum <= MAX_DQ_LAYER_NUM); + + //Generate PPS LIST + int32_t iPpsId = 0, iUsePpsNum = pCtx->iPpsNum; + + for (int32_t iIdrRound = 0; iIdrRound < MAX_PPS_COUNT; iIdrRound++) { + for (iPpsId = 0; iPpsId < pCtx->iPpsNum; iPpsId++) { + m_sParaSetOffset.iPpsIdList[iPpsId][iIdrRound] = ((iIdrRound * iUsePpsNum + iPpsId) % MAX_PPS_COUNT); + } + } + + for (iPpsId = iUsePpsNum; iPpsId < MAX_PPS_COUNT; iPpsId++) { + memcpy (& (pCtx->pPPSArray[iPpsId]), & (pCtx->pPPSArray[iPpsId % iUsePpsNum]), sizeof (SWelsPPS)); + pCtx->pPPSArray[iPpsId].iPpsId = iPpsId; + pCtx->iPpsNum++; + } + + assert (pCtx->iPpsNum == MAX_PPS_COUNT); + m_sParaSetOffset.uiInUsePpsNum = pCtx->iPpsNum; +} + + +bool CWelsParametersetSpsPpsListing::CheckPpsGenerating() { + /*if ((SPS_PPS_LISTING == pCtx->pSvcParam->eSpsPpsIdStrategy) && (MAX_PPS_COUNT <= pCtx->sPSOVector.uiInUsePpsNum)) { + //check if we can generate new SPS or not + WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, + "InitDqLayers(), cannot generate new SPS under the SPS_PPS_LISTING mode!"); + return ENC_RETURN_UNSUPPORTED_PARA; + }*/ + if (MAX_PPS_COUNT <= m_sParaSetOffset.uiInUsePpsNum) { + return false; + } + + return true; +} +int32_t CWelsParametersetSpsPpsListing::SpsReset (sWelsEncCtx* pCtx, bool kbUseSubsetSps) { + /* if (SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy) { + WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, + "InitDqLayers(), cannot generate new SPS under the SPS_PPS_LISTING mode!"); + return ENC_RETURN_UNSUPPORTED_PARA; + }*/ + return -1; +} + +int32_t FindExistingPps (SWelsSPS* pSps, SSubsetSps* pSubsetSps, const bool kbUseSubsetSps, const int32_t iSpsId, + const bool kbEntropyCodingFlag, const int32_t iPpsNumInUse, + SWelsPPS* pPpsArray) { +#if !defined(DISABLE_FMO_FEATURE) + // feature not supported yet + return INVALID_ID; +#endif//!DISABLE_FMO_FEATURE + + SWelsPPS sTmpPps; + WelsInitPps (&sTmpPps, + pSps, + pSubsetSps, + 0, + true, + kbUseSubsetSps, + kbEntropyCodingFlag); + + assert (iPpsNumInUse <= MAX_PPS_COUNT); + for (int32_t iId = 0; iId < iPpsNumInUse; iId++) { + if ((sTmpPps.iSpsId == pPpsArray[iId].iSpsId) + && (sTmpPps.bEntropyCodingModeFlag == pPpsArray[iId].bEntropyCodingModeFlag) + && (sTmpPps.iPicInitQp == pPpsArray[iId].iPicInitQp) + && (sTmpPps.iPicInitQs == pPpsArray[iId].iPicInitQs) + && (sTmpPps.uiChromaQpIndexOffset == pPpsArray[iId].uiChromaQpIndexOffset) + && (sTmpPps.bDeblockingFilterControlPresentFlag == pPpsArray[iId].bDeblockingFilterControlPresentFlag) + ) { + return iId; + } + } + + return INVALID_ID; +} + +uint32_t CWelsParametersetSpsPpsListing::InitPps (sWelsEncCtx* pCtx, uint32_t kiSpsId, + SWelsSPS* pSps, + SSubsetSps* pSubsetSps, + uint32_t kuiPpsId, + const bool kbDeblockingFilterPresentFlag, + const bool kbUsingSubsetSps, + const bool kbEntropyCodingModeFlag) { + const int32_t kiFoundPpsId = FindExistingPps (pSps, pSubsetSps, kbUsingSubsetSps, kiSpsId, + kbEntropyCodingModeFlag, + m_sParaSetOffset.uiInUsePpsNum, + pCtx->pPPSArray); + + + if (INVALID_ID != kiFoundPpsId) { + //if yes, set pPps to it + kuiPpsId = kiFoundPpsId; + } else { + kuiPpsId = (m_sParaSetOffset.uiInUsePpsNum++); + WelsInitPps (& pCtx->pPPSArray[kuiPpsId], pSps, pSubsetSps, kuiPpsId, true, kbUsingSubsetSps, kbEntropyCodingModeFlag); + } + SetUseSubsetFlag (kuiPpsId, kbUsingSubsetSps); + return kuiPpsId; +} + +void CWelsParametersetSpsPpsListing::UpdateParaSetNum (sWelsEncCtx* pCtx) { + CWelsParametersetSpsListing::UpdateParaSetNum (pCtx); + + //UpdatePpsList (pCtx); + pCtx->iPpsNum = m_sParaSetOffset.uiInUsePpsNum; +} + +int32_t CWelsParametersetSpsPpsListing::GetCurrentPpsId (const int32_t iPpsId, const int32_t iIdrLoop) { + return m_sParaSetOffset.iPpsIdList[iPpsId][iIdrLoop]; +} + +void CWelsParametersetSpsPpsListing::LoadPreviousStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, + int32_t* pPpsIdList) { + memcpy (m_sParaSetOffset.sParaSetOffsetVariable, pParaSetOffsetVariable, + (PARA_SET_TYPE)*sizeof (SParaSetOffsetVariable)); // confirmed_safe_unsafe_usage + + memcpy ((m_sParaSetOffset.iPpsIdList), pPpsIdList, MAX_DQ_LAYER_NUM * MAX_PPS_COUNT * sizeof (int32_t)); + +} + +void CWelsParametersetSpsPpsListing::OutputCurrentStructure (SParaSetOffsetVariable* pParaSetOffsetVariable, + int32_t* pPpsIdList, sWelsEncCtx* pCtx, SExistingParasetList* pExistingParasetList) { + CWelsParametersetSpsListing::OutputCurrentStructure (pParaSetOffsetVariable, pPpsIdList, pCtx, pExistingParasetList); + + pExistingParasetList->uiInUsePpsNum = m_sParaSetOffset.uiInUsePpsNum; + memcpy (pExistingParasetList->sPps, pCtx->pPps, MAX_PPS_COUNT * sizeof (SWelsPPS)); + memcpy (pPpsIdList, (m_sParaSetOffset.iPpsIdList), MAX_DQ_LAYER_NUM * MAX_PPS_COUNT * sizeof (int32_t)); +} + +// +//CWelsParametersetSpsListingPpsIncreasing +// + +int32_t CWelsParametersetSpsListingPpsIncreasing::GetPpsIdOffset (const int32_t kiPpsId) { + //same as CWelsParametersetIdIncreasing::GetPpsIdOffset + return (m_sParaSetOffset.sParaSetOffsetVariable[PARA_SET_TYPE_PPS].iParaSetIdDelta[kiPpsId]); +} + +void CWelsParametersetSpsListingPpsIncreasing::Update (const uint32_t kuiId, const int iParasetType) { + //same as CWelsParametersetIdIncreasing::Update +#if _DEBUG + assert (kuiId < MAX_DQ_LAYER_NUM); +#endif + + ParasetIdAdditionIdAdjust (& (m_sParaSetOffset.sParaSetOffsetVariable[iParasetType]), + kuiId, + (iParasetType != PARA_SET_TYPE_PPS) ? MAX_SPS_COUNT : MAX_PPS_COUNT); +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/picture_handle.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/picture_handle.cpp new file mode 100644 index 000000000..b0721c438 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/picture_handle.cpp @@ -0,0 +1,184 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file picture_handle.c + * + * \brief picture pData handling + * + * \date 5/20/2009 Created + * + *************************************************************************************/ +#include "picture_handle.h" +#include "svc_motion_estimate.h" + +namespace WelsEnc { +/*! + * \brief alloc picture pData with borders for each plane based width and height of picture + * \param cx width of picture in pixels + * \param cy height of picture in pixels + * \param need_data need pData allocation + * \pram need_expand need borders expanding + * \return successful if effective picture pointer returned, otherwise failed with NULL + */ +SPicture* AllocPicture (CMemoryAlign* pMa, const int32_t kiWidth , const int32_t kiHeight, + bool bNeedMbInfo, int32_t iNeedFeatureStorage) { + SPicture* pPic = NULL; + int32_t iPicWidth = 0; + int32_t iPicHeight = 0; + + int32_t iPicChromaWidth = 0; + int32_t iPicChromaHeight = 0; + int32_t iLumaSize = 0; + int32_t iChromaSize = 0; + + pPic = static_cast (pMa->WelsMallocz (sizeof (SPicture), "pPic")); + + WELS_VERIFY_RETURN_IF (NULL, NULL == pPic); + + iPicWidth = WELS_ALIGN (kiWidth, MB_WIDTH_LUMA) + (PADDING_LENGTH << 1); // with width of horizon + iPicHeight = WELS_ALIGN (kiHeight, MB_HEIGHT_LUMA) + (PADDING_LENGTH << 1); // with height of vertical + iPicChromaWidth = iPicWidth >> 1; + iPicChromaHeight = iPicHeight >> 1; + iPicWidth = WELS_ALIGN (iPicWidth, + 32); // 32(or 16 for chroma below) to match original imp. here instead of cache_line_size + iPicChromaWidth = WELS_ALIGN (iPicChromaWidth, 16); + iLumaSize = iPicWidth * iPicHeight; + iChromaSize = iPicChromaWidth * iPicChromaHeight; + + pPic->pBuffer = (uint8_t*)pMa->WelsMalloc (iLumaSize /* luma */ + + (iChromaSize << 1) /* Cb,Cr */ + , "pPic->pBuffer"); + WELS_VERIFY_RETURN_PROC_IF (NULL, NULL == pPic->pBuffer, FreePicture (pMa, &pPic)); + pPic->iLineSize[0] = iPicWidth; + pPic->iLineSize[1] = pPic->iLineSize[2] = iPicChromaWidth; + pPic->pData[0] = pPic->pBuffer + (1 + pPic->iLineSize[0]) * PADDING_LENGTH; + pPic->pData[1] = pPic->pBuffer + iLumaSize + (((1 + pPic->iLineSize[1]) * PADDING_LENGTH) >> 1); + pPic->pData[2] = pPic->pBuffer + iLumaSize + iChromaSize + (((1 + pPic->iLineSize[2]) * PADDING_LENGTH) >> 1); + + pPic->iWidthInPixel = kiWidth; + pPic->iHeightInPixel = kiHeight; + pPic->iFrameNum = -1; + + pPic->bIsLongRef = false; + pPic->iLongTermPicNum = -1; + pPic->uiRecieveConfirmed = 0; + pPic->iMarkFrameNum = -1; + + if (bNeedMbInfo) { + const uint32_t kuiCountMbNum = ((15 + kiWidth) >> 4) * ((15 + kiHeight) >> 4); + + pPic->uiRefMbType = (uint32_t*)pMa->WelsMallocz (kuiCountMbNum * sizeof (uint32_t), "pPic->uiRefMbType"); + WELS_VERIFY_RETURN_PROC_IF (NULL, NULL == pPic->uiRefMbType, FreePicture (pMa, &pPic)); + + pPic->pRefMbQp = (uint8_t*)pMa->WelsMallocz (kuiCountMbNum * sizeof (uint8_t), "pPic->pRefMbQp"); + WELS_VERIFY_RETURN_PROC_IF (NULL, NULL == pPic->pRefMbQp, FreePicture (pMa, &pPic)); + + pPic->sMvList = static_cast (pMa->WelsMallocz (kuiCountMbNum * sizeof (SMVUnitXY), + "pPic->sMvList")); + WELS_VERIFY_RETURN_PROC_IF (NULL, NULL == pPic->sMvList, FreePicture (pMa, &pPic)); + + pPic->pMbSkipSad = (int32_t*)pMa->WelsMallocz (kuiCountMbNum * sizeof (int32_t), "pPic->pMbSkipSad"); + WELS_VERIFY_RETURN_PROC_IF (NULL, NULL == pPic->pMbSkipSad, FreePicture (pMa, &pPic)); + } + + if (iNeedFeatureStorage) { + pPic->pScreenBlockFeatureStorage = static_cast (pMa->WelsMallocz (sizeof ( + SScreenBlockFeatureStorage), "pScreenBlockFeatureStorage")); + int32_t iReturn = RequestScreenBlockFeatureStorage (pMa, kiWidth, kiHeight, iNeedFeatureStorage, + pPic->pScreenBlockFeatureStorage); + WELS_VERIFY_RETURN_PROC_IF (NULL, ENC_RETURN_SUCCESS != iReturn, FreePicture (pMa, &pPic)); + } else { + pPic->pScreenBlockFeatureStorage = NULL; + } + return pPic; +} + +/*! + * \brief free picture pData planes + * \param pPic picture pointer to be destoryed + * \return none + */ +void FreePicture (CMemoryAlign* pMa, SPicture** ppPic) { + if (NULL != ppPic && NULL != *ppPic) { + SPicture* pPic = *ppPic; + + if (NULL != pPic->pBuffer) { + pMa->WelsFree (pPic->pBuffer, "pPic->pBuffer"); + pPic->pBuffer = NULL; + } + pPic->pBuffer = NULL; + pPic->pData[0] = + pPic->pData[1] = + pPic->pData[2] = NULL; + pPic->iLineSize[0] = + pPic->iLineSize[1] = + pPic->iLineSize[2] = 0; + + pPic->iWidthInPixel = 0; + pPic->iHeightInPixel = 0; + pPic->iFrameNum = -1; + + pPic->bIsLongRef = false; + pPic->uiRecieveConfirmed = 0; + pPic->iLongTermPicNum = -1; + pPic->iMarkFrameNum = -1; + + if (pPic->uiRefMbType) { + pMa->WelsFree (pPic->uiRefMbType, "pPic->uiRefMbType"); + pPic->uiRefMbType = NULL; + } + if (pPic->pRefMbQp) { + pMa->WelsFree (pPic->pRefMbQp, "pPic->pRefMbQp"); + pPic->pRefMbQp = NULL; + } + + if (pPic->sMvList) { + pMa->WelsFree (pPic->sMvList, "pPic->sMvList"); + pPic->sMvList = NULL; + } + if (pPic->pMbSkipSad) { + pMa->WelsFree (pPic->pMbSkipSad, "pPic->pMbSkipSad"); + pPic->pMbSkipSad = NULL; + } + + if (pPic->pScreenBlockFeatureStorage) { + ReleaseScreenBlockFeatureStorage (pMa, pPic->pScreenBlockFeatureStorage); + pMa->WelsFree (pPic->pScreenBlockFeatureStorage, "pPic->pScreenBlockFeatureStorage"); + pPic->pScreenBlockFeatureStorage = NULL; + } + + pMa->WelsFree (*ppPic, "pPic"); + *ppPic = NULL; + } +} + +} // namespace WelsEnc + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/ratectl.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/ratectl.cpp new file mode 100644 index 000000000..6634f94e4 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/ratectl.cpp @@ -0,0 +1,1556 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * ratectl.c + * + * Abstract + * Rate Control + * + * History + * 9/8/2009 Created + * 12/26/2011 Modified + * + * + * + *************************************************************************/ +#include "rc.h" +#include "encoder_context.h" +#include "utils.h" +#include "svc_enc_golomb.h" + + +namespace WelsEnc { + +const int32_t g_kiQpToQstepTable[52] = { + 63, 71, 79, 89, 100, 112, 126, 141, 159, 178, + 200, 224, 252, 283, 317, 356, 400, 449, 504, 566, + 635, 713, 800, 898, 1008, 1131, 1270, 1425, 1600, 1796, + 2016, 2263, 2540, 2851, 3200, 3592, 4032, 4525, 5080, 5702, + 6400, 7184, 8063, 9051, 10159, 11404, 12800, 14368, 16127, 18102, + 20319, 22807 +}; //WELS_ROUND(INT_MULTIPLY*pow (2.0, (iQP - 4.0) / 6.0)) + +void RcInitLayerMemory (SWelsSvcRc* pWelsSvcRc, CMemoryAlign* pMA, const int32_t kiMaxTl) { + const int32_t kiGomSize = pWelsSvcRc->iGomSize; + const int32_t kiGomSizeD = kiGomSize * sizeof (double); + const int32_t kiGomSizeI = kiGomSize * sizeof (int32_t); + const int32_t kiLayerRcSize = kiGomSizeD + (kiGomSizeI * 3) + sizeof (SRCTemporal) * kiMaxTl; + uint8_t* pBaseMem = (uint8_t*)pMA->WelsMalloc (kiLayerRcSize, "pWelsSvcRc->pTemporalOverRc"); + + if (NULL == pBaseMem) + return; + + pWelsSvcRc->pTemporalOverRc = (SRCTemporal*)pBaseMem; + pBaseMem += sizeof (SRCTemporal) * kiMaxTl; + pWelsSvcRc->pGomComplexity = (double*)pBaseMem; + pBaseMem += kiGomSizeD; + pWelsSvcRc->pGomForegroundBlockNum = (int32_t*)pBaseMem; + pBaseMem += kiGomSizeI; + pWelsSvcRc->pCurrentFrameGomSad = (int32_t*)pBaseMem; + pBaseMem += kiGomSizeI; + pWelsSvcRc->pGomCost = (int32_t*)pBaseMem; + +} + +void RcFreeLayerMemory (SWelsSvcRc* pWelsSvcRc, CMemoryAlign* pMA) { + if (pWelsSvcRc != NULL && pWelsSvcRc->pTemporalOverRc != NULL) { + pMA->WelsFree (pWelsSvcRc->pTemporalOverRc, "pWelsSvcRc->pTemporalOverRc"); + pWelsSvcRc->pTemporalOverRc = NULL; + pWelsSvcRc->pGomComplexity = NULL; + pWelsSvcRc->pGomForegroundBlockNum = NULL; + pWelsSvcRc->pCurrentFrameGomSad = NULL; + pWelsSvcRc->pGomCost = NULL; + } +} + +static inline int32_t RcConvertQp2QStep (int32_t iQP) { + return g_kiQpToQstepTable[iQP]; +} +static inline int32_t RcConvertQStep2Qp (int32_t iQpStep) { + if (iQpStep <= g_kiQpToQstepTable[0]) //Qp step too small, return qp=0 + return 0; + return WELS_ROUND ((6 * log (iQpStep * 1.0f / INT_MULTIPLY) / log (2.0) + 4.0)); +} + +void RcInitSequenceParameter (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = NULL; + SSpatialLayerConfig* pDLayerParam = NULL; + + int32_t j = 0; + int32_t iMbWidth = 0; + + bool bMultiSliceMode = false; + int32_t iGomRowMode0 = 1, iGomRowMode1 = 1; + for (j = 0; j < pEncCtx->pSvcParam->iSpatialLayerNum; j++) { + pWelsSvcRc = &pEncCtx->pWelsSvcRc[j]; + pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[j]; + iMbWidth = (pDLayerParam->iVideoWidth >> 4); + pWelsSvcRc->iNumberMbFrame = iMbWidth * (pDLayerParam->iVideoHeight >> 4); + + pWelsSvcRc->iRcVaryPercentage = pEncCtx->pSvcParam->iBitsVaryPercentage; // % -- for temp + pWelsSvcRc->iRcVaryRatio = pWelsSvcRc->iRcVaryPercentage; + + pWelsSvcRc->iBufferFullnessSkip = 0; + pWelsSvcRc->uiLastTimeStamp = 0; + pWelsSvcRc->iCost2BitsIntra = 1; + pWelsSvcRc->iAvgCost2Bits = 1; + pWelsSvcRc->iSkipBufferRatio = SKIP_RATIO; + pWelsSvcRc->iContinualSkipFrames = 0; + pWelsSvcRc->iQpRangeUpperInFrame = (QP_RANGE_UPPER_MODE1 * MAX_BITS_VARY_PERCENTAGE - (( + QP_RANGE_UPPER_MODE1 - QP_RANGE_MODE0) * + pWelsSvcRc->iRcVaryRatio)) / MAX_BITS_VARY_PERCENTAGE; + pWelsSvcRc->iQpRangeLowerInFrame = (QP_RANGE_LOWER_MODE1 * MAX_BITS_VARY_PERCENTAGE - (( + QP_RANGE_LOWER_MODE1 - QP_RANGE_MODE0) * + pWelsSvcRc->iRcVaryRatio)) / MAX_BITS_VARY_PERCENTAGE; + + if (iMbWidth <= MB_WIDTH_THRESHOLD_90P) { + pWelsSvcRc->iSkipQpValue = SKIP_QP_90P; + iGomRowMode0 = GOM_ROW_MODE0_90P; + iGomRowMode1 = GOM_ROW_MODE1_90P; + } else if (iMbWidth <= MB_WIDTH_THRESHOLD_180P) { + pWelsSvcRc->iSkipQpValue = SKIP_QP_180P; + iGomRowMode0 = GOM_ROW_MODE0_180P; + iGomRowMode1 = GOM_ROW_MODE1_180P; + } else if (iMbWidth <= MB_WIDTH_THRESHOLD_360P) { + pWelsSvcRc->iSkipQpValue = SKIP_QP_360P; + iGomRowMode0 = GOM_ROW_MODE0_360P; + iGomRowMode1 = GOM_ROW_MODE1_360P; + } else { + pWelsSvcRc->iSkipQpValue = SKIP_QP_720P; + iGomRowMode0 = GOM_ROW_MODE0_720P; + iGomRowMode1 = GOM_ROW_MODE1_720P; + } + iGomRowMode0 = iGomRowMode1 + ((iGomRowMode0 - iGomRowMode1) * pWelsSvcRc->iRcVaryRatio / MAX_BITS_VARY_PERCENTAGE); + + pWelsSvcRc->iNumberMbGom = iMbWidth * iGomRowMode0; + + pWelsSvcRc->iMinQp = pEncCtx->pSvcParam->iMinQp; + + pWelsSvcRc->iMaxQp = pEncCtx->pSvcParam->iMaxQp; + + pWelsSvcRc->iFrameDeltaQpUpper = LAST_FRAME_QP_RANGE_UPPER_MODE1 - ((LAST_FRAME_QP_RANGE_UPPER_MODE1 - + LAST_FRAME_QP_RANGE_UPPER_MODE0) * pWelsSvcRc->iRcVaryRatio / MAX_BITS_VARY_PERCENTAGE); + pWelsSvcRc->iFrameDeltaQpLower = LAST_FRAME_QP_RANGE_LOWER_MODE1 - ((LAST_FRAME_QP_RANGE_LOWER_MODE1 - + LAST_FRAME_QP_RANGE_LOWER_MODE0) * pWelsSvcRc->iRcVaryRatio / MAX_BITS_VARY_PERCENTAGE); + + pWelsSvcRc->iSkipFrameNum = 0; + pWelsSvcRc->iGomSize = (pWelsSvcRc->iNumberMbFrame + pWelsSvcRc->iNumberMbGom - 1) / pWelsSvcRc->iNumberMbGom; + pWelsSvcRc->bEnableGomQp = true; + + RcInitLayerMemory (pWelsSvcRc, pEncCtx->pMemAlign, 1 + pEncCtx->pSvcParam->sDependencyLayers[j].iHighestTemporalId); + + bMultiSliceMode = ((SM_RASTER_SLICE == pDLayerParam->sSliceArgument.uiSliceMode) || + (SM_SIZELIMITED_SLICE == pDLayerParam->sSliceArgument.uiSliceMode)); + if (bMultiSliceMode) + pWelsSvcRc->iNumberMbGom = pWelsSvcRc->iNumberMbFrame; + } +} + + +void RcInitTlWeight (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; + SSpatialLayerInternal* pDLayerParam = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + const int32_t kiDecompositionStages = pDLayerParam->iDecompositionStages; + const int32_t kiHighestTid = pDLayerParam->iHighestTemporalId; + +//Index 0:Virtual GOP size, Index 1:Frame rate +//double WeightArray[4][4] = { {1.0, 0, 0, 0}, {0.6, 0.4, 0, 0}, {0.4, 0.3, 0.15, 0}, {0.25, 0.15, 0.125, 0.0875}}; + int32_t iWeightArray[4][4] = { {2000, 0, 0, 0}, {1200, 800, 0, 0}, {800, 600, 300, 0}, {500, 300, 250, 175}}; // original*WEIGHT_MULTIPLY + const int32_t kiGopSize = (1 << kiDecompositionStages); + int32_t i, k, n; + + n = 0; + while (n <= kiHighestTid) { + pTOverRc[n].iTlayerWeight = iWeightArray[kiDecompositionStages][n]; + pTOverRc[n].iMinQp = pWelsSvcRc->iMinQp + (n << 1); + pTOverRc[n].iMinQp = WELS_CLIP3 (pTOverRc[n].iMinQp, 0, 51); + pTOverRc[n].iMaxQp = pWelsSvcRc->iMaxQp + (n << 1); + pTOverRc[n].iMaxQp = WELS_CLIP3 (pTOverRc[n].iMaxQp, pTOverRc[n].iMinQp, 51); + ++ n; + } +//Calculate the frame index for the current frame and its reference frame + for (n = 0; n < VGOP_SIZE; n += kiGopSize) { + pWelsSvcRc->iTlOfFrames[n] = 0; + for (i = 1; i <= kiDecompositionStages; i++) { + for (k = 1 << (kiDecompositionStages - i); k < kiGopSize; k += (kiGopSize >> (i - 1))) { + pWelsSvcRc->iTlOfFrames[k + n] = i; + } + } + } + pWelsSvcRc->iPreviousGopSize = kiGopSize; + pWelsSvcRc->iGopNumberInVGop = VGOP_SIZE / kiGopSize; +} + +void RcUpdateBitrateFps (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; + + SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId]; + SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + const int32_t kiGopSize = (1 << pDLayerParamInternal->iDecompositionStages); + const int32_t kiHighestTid = pDLayerParamInternal->iHighestTemporalId; + const int32_t input_iBitsPerFrame = WELS_DIV_ROUND (pDLayerParam->iSpatialBitrate, + pDLayerParamInternal->fOutputFrameRate); + const int64_t kiGopBits = input_iBitsPerFrame * kiGopSize; + int32_t i; + + pWelsSvcRc->iBitRate = pDLayerParam->iSpatialBitrate; + pWelsSvcRc->fFrameRate = pDLayerParamInternal->fOutputFrameRate; + + int32_t iTargetVaryRange = ((MAX_BITS_VARY_PERCENTAGE - pWelsSvcRc->iRcVaryRatio) >> 1); + int32_t iMinBitsRatio = MAX_BITS_VARY_PERCENTAGE - iTargetVaryRange; + int32_t iMaxBitsRatio = MAX_BITS_VARY_PERCENTAGE_x3d2; + + for (i = 0; i <= kiHighestTid; i++) { + const int64_t kdConstraitBits = kiGopBits * pTOverRc[i].iTlayerWeight; + pTOverRc[i].iMinBitsTl = WELS_DIV_ROUND (kdConstraitBits * iMinBitsRatio, + MAX_BITS_VARY_PERCENTAGE * WEIGHT_MULTIPLY); + pTOverRc[i].iMaxBitsTl = WELS_DIV_ROUND (kdConstraitBits * iMaxBitsRatio, + MAX_BITS_VARY_PERCENTAGE * WEIGHT_MULTIPLY); + } +//When bitrate is changed, pBuffer size should be updated + pWelsSvcRc->iBufferSizeSkip = WELS_DIV_ROUND (pWelsSvcRc->iBitRate * pWelsSvcRc->iSkipBufferRatio, INT_MULTIPLY); + pWelsSvcRc->iBufferSizePadding = WELS_DIV_ROUND (pWelsSvcRc->iBitRate * PADDING_BUFFER_RATIO, INT_MULTIPLY); + +//change remaining bits + if (pWelsSvcRc->iBitsPerFrame > REMAIN_BITS_TH) { + pWelsSvcRc->iRemainingBits = WELS_DIV_ROUND (static_cast (pWelsSvcRc->iRemainingBits) * input_iBitsPerFrame, + pWelsSvcRc->iBitsPerFrame); + } + pWelsSvcRc->iBitsPerFrame = input_iBitsPerFrame; + pWelsSvcRc->iMaxBitsPerFrame = WELS_DIV_ROUND (pDLayerParam->iMaxSpatialBitrate, + pDLayerParamInternal->fOutputFrameRate); +} + + +void RcInitVGop (sWelsEncCtx* pEncCtx) { + const int32_t kiDid = pEncCtx->uiDependencyId; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[kiDid]; + SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; + const int32_t kiHighestTid = pEncCtx->pSvcParam->sDependencyLayers[kiDid].iHighestTemporalId; + + pWelsSvcRc->iRemainingBits = VGOP_SIZE * pWelsSvcRc->iBitsPerFrame; + pWelsSvcRc->iRemainingWeights = pWelsSvcRc->iGopNumberInVGop * WEIGHT_MULTIPLY; + + pWelsSvcRc->iFrameCodedInVGop = 0; + pWelsSvcRc->iGopIndexInVGop = 0; + + for (int32_t i = 0; i <= kiHighestTid; ++ i) + pTOverRc[i].iGopBitsDq = 0; + pWelsSvcRc->iSkipFrameInVGop = 0; +} + +void RcInitRefreshParameter (sWelsEncCtx* pEncCtx) { + const int32_t kiDid = pEncCtx->uiDependencyId; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[kiDid]; + SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; + SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[kiDid]; + SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[kiDid]; + const int32_t kiHighestTid = pDLayerParamInternal->iHighestTemporalId; + int32_t i; + +//I frame R-Q Model + pWelsSvcRc->iIntraComplexity = 0; + pWelsSvcRc->iIntraMbCount = 0; + pWelsSvcRc->iIntraComplxMean = 0; +//P frame R-Q Model + for (i = 0; i <= kiHighestTid; i++) { + pTOverRc[i].iPFrameNum = 0; + pTOverRc[i].iLinearCmplx = 0; + pTOverRc[i].iFrameCmplxMean = 0; + } + + pWelsSvcRc->iBufferFullnessSkip = 0; + pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW] = 0; + pWelsSvcRc->iBufferMaxBRFullness[ODD_TIME_WINDOW] = 0; + pWelsSvcRc->iPredFrameBit = 0; + pWelsSvcRc->iBufferFullnessPadding = 0; + + pWelsSvcRc->iGopIndexInVGop = 0; + pWelsSvcRc->iRemainingBits = 0; + pWelsSvcRc->iBitsPerFrame = 0; + +//Backup the initial bitrate and fps + pWelsSvcRc->iPreviousBitrate = pDLayerParam->iSpatialBitrate; + pWelsSvcRc->dPreviousFps = pDLayerParamInternal->fOutputFrameRate; + + memset (pWelsSvcRc->pCurrentFrameGomSad, 0, pWelsSvcRc->iGomSize * sizeof (int32_t)); + + RcInitTlWeight (pEncCtx); + RcUpdateBitrateFps (pEncCtx); + RcInitVGop (pEncCtx); +} + +bool RcJudgeBitrateFpsUpdate (sWelsEncCtx* pEncCtx) { + int32_t iCurDid = pEncCtx->uiDependencyId; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[iCurDid]; + SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[iCurDid]; + SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[iCurDid]; + + if ((pWelsSvcRc->iPreviousBitrate != pDLayerParam->iSpatialBitrate) || + (pWelsSvcRc->dPreviousFps - pDLayerParamInternal->fOutputFrameRate) > EPSN || + (pWelsSvcRc->dPreviousFps - pDLayerParamInternal->fOutputFrameRate) < -EPSN) { + pWelsSvcRc->iPreviousBitrate = pDLayerParam->iSpatialBitrate; + pWelsSvcRc->dPreviousFps = pDLayerParamInternal->fOutputFrameRate; + return true; + } else + return false; +} + +#if GOM_TRACE_FLAG +void RcTraceVGopBitrate (sWelsEncCtx* pEncCtx) { + const int32_t kiDid = pEncCtx->uiDependencyId; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[kiDid]; + + if (pWelsSvcRc->iFrameCodedInVGop) { + const int32_t kiHighestTid = pEncCtx->pSvcParam->sDependencyLayers[kiDid].iHighestTemporalId; + SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; + int32_t iVGopBitrate = 0; + int32_t iTotalBits = pWelsSvcRc->iPaddingBitrateStat; + int32_t iTid = 0; + while (iTid <= kiHighestTid) { + iTotalBits += pTOverRc[iTid].iGopBitsDq; + ++ iTid; + } + int32_t iFrameInVGop = pWelsSvcRc->iFrameCodedInVGop + pWelsSvcRc->iSkipFrameInVGop; + if (0 != iFrameInVGop) + iVGopBitrate = WELS_ROUND (iTotalBits / iFrameInVGop * pWelsSvcRc->fFrameRate); + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, "[Rc] VGOPbitrate%d: %d ", kiDid, iVGopBitrate); + if (iTotalBits > 0) { + iTid = 0; + while (iTid <= kiHighestTid) { + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, "T%d=%8.3f ", iTid, (double) (pTOverRc[iTid].iGopBitsDq / iTotalBits)); + ++ iTid; + } + } + } +} +#endif + +void RcUpdateTemporalZero (sWelsEncCtx* pEncCtx) { + const int32_t kiDid = pEncCtx->uiDependencyId; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[kiDid]; + SSpatialLayerInternal* pDLayerParam = &pEncCtx->pSvcParam->sDependencyLayers[kiDid]; + const int32_t kiGopSize = (1 << pDLayerParam->iDecompositionStages); + + if (pWelsSvcRc->iPreviousGopSize != kiGopSize) { +#if GOM_TRACE_FLAG + RcTraceVGopBitrate (pEncCtx); +#endif + RcInitTlWeight (pEncCtx); + RcInitVGop (pEncCtx); + } else if (pWelsSvcRc->iGopIndexInVGop == pWelsSvcRc->iGopNumberInVGop || pEncCtx->eSliceType == I_SLICE) { +#if GOM_TRACE_FLAG + RcTraceVGopBitrate (pEncCtx); +#endif + RcInitVGop (pEncCtx); + } + pWelsSvcRc->iGopIndexInVGop++; +} + + +void RcCalculateIdrQp (sWelsEncCtx* pEncCtx) { + double dBpp = 0; + int32_t i; + +//64k@6fps for 90p: bpp 0.74 QP:24 +//192k@12fps for 180p: bpp 0.28 QP:26 +//512k@24fps for 360p: bpp 0.09 QP:30 +//1500k@30fps for 720p: bpp 0.05 QP:32 + double dBppArray[4][3] = {{0.5, 0.75, 1.0}, {0.2, 0.3, 0.4}, {0.05, 0.09, 0.13}, {0.03, 0.06, 0.1}}; + int32_t dInitialQPArray[4][4] = {{28, 26, 24, 22}, {30, 28, 26, 24}, {32, 30, 28, 26}, {34, 32, 30, 28}}; + int32_t iBppIndex = 0; + int32_t iQpRangeArray[4][2] = {{37, 25}, {36, 24}, {35, 23}, {34, 22}}; + int64_t iFrameComplexity = pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity; + if (pEncCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + SVAAFrameInfoExt* pVaa = static_cast (pEncCtx->pVaa); + iFrameComplexity = pVaa->sComplexityScreenParam.iFrameComplexity; + } + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId]; + SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + if (pDLayerParamInternal->fOutputFrameRate > EPSN && pDLayerParam->iVideoWidth && pDLayerParam->iVideoHeight) + dBpp = (double) (pDLayerParam->iSpatialBitrate) / (double) (pDLayerParamInternal->fOutputFrameRate * + pDLayerParam->iVideoWidth * + pDLayerParam->iVideoHeight); + else + dBpp = 0.1; +//Area*2 + if (pDLayerParam->iVideoWidth * pDLayerParam->iVideoHeight <= 28800) // 90p video:160*90 + iBppIndex = 0; + else if (pDLayerParam->iVideoWidth * pDLayerParam->iVideoHeight <= 115200) // 180p video:320*180 + iBppIndex = 1; + else if (pDLayerParam->iVideoWidth * pDLayerParam->iVideoHeight <= 460800) // 360p video:640*360 + iBppIndex = 2; + else + iBppIndex = 3; + +//Search + for (i = 0; i < 3; i++) { + if (dBpp <= dBppArray[iBppIndex][i]) + break; + } + int32_t iMaxQp = iQpRangeArray[i][0]; + int32_t iMinQp = iQpRangeArray[i][1]; + iMinQp = WELS_CLIP3 (iMinQp, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); + iMaxQp = WELS_CLIP3 (iMaxQp, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); + if (0 == pWelsSvcRc->iIdrNum) { //the first IDR frame + pWelsSvcRc->iInitialQp = dInitialQPArray[iBppIndex][i]; + } else { + + //obtain the idr qp using previous idr complexity + if (pWelsSvcRc->iNumberMbFrame != pWelsSvcRc->iIntraMbCount) { + pWelsSvcRc->iIntraComplexity = pWelsSvcRc->iIntraComplexity * pWelsSvcRc->iNumberMbFrame / + pWelsSvcRc->iIntraMbCount; + } + + int64_t iCmplxRatio = WELS_DIV_ROUND64 (iFrameComplexity * INT_MULTIPLY, + pWelsSvcRc->iIntraComplxMean); + iCmplxRatio = WELS_CLIP3 (iCmplxRatio, INT_MULTIPLY - FRAME_CMPLX_RATIO_RANGE, INT_MULTIPLY + FRAME_CMPLX_RATIO_RANGE); + pWelsSvcRc->iQStep = WELS_DIV_ROUND ((pWelsSvcRc->iIntraComplexity * iCmplxRatio), + (pWelsSvcRc->iTargetBits * INT_MULTIPLY)); + pWelsSvcRc->iInitialQp = RcConvertQStep2Qp (pWelsSvcRc->iQStep); + } + + pWelsSvcRc->iInitialQp = WELS_CLIP3 (pWelsSvcRc->iInitialQp, iMinQp, iMaxQp); + pEncCtx->iGlobalQp = pWelsSvcRc->iInitialQp; + pWelsSvcRc->iQStep = RcConvertQp2QStep (pEncCtx->iGlobalQp); + pWelsSvcRc->iLastCalculatedQScale = pEncCtx->iGlobalQp; + pWelsSvcRc->iMinFrameQp = WELS_CLIP3 (pEncCtx->iGlobalQp - DELTA_QP_BGD_THD, iMinQp, iMaxQp); + pWelsSvcRc->iMaxFrameQp = WELS_CLIP3 (pEncCtx->iGlobalQp + DELTA_QP_BGD_THD, iMinQp, iMaxQp); + +} + +void RcCalculatePictureQp (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + int32_t iTl = pEncCtx->uiTemporalId; + SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[iTl]; + int32_t iLumaQp = 0; + int32_t iDeltaQpTemporal = 0; + int64_t iFrameComplexity = pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity; + if (pEncCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + SVAAFrameInfoExt* pVaa = static_cast (pEncCtx->pVaa); + iFrameComplexity = pVaa->sComplexityScreenParam.iFrameComplexity; + } + if (0 == pTOverRc->iPFrameNum) { + iLumaQp = pWelsSvcRc->iInitialQp; + } else if (pWelsSvcRc->iCurrentBitsLevel == BITS_EXCEEDED) { + iLumaQp = pWelsSvcRc->iLastCalculatedQScale + DELTA_QP_BGD_THD; +//limit QP + int32_t iLastIdxCodecInVGop = pWelsSvcRc->iFrameCodedInVGop - 1; + if (iLastIdxCodecInVGop < 0) + iLastIdxCodecInVGop += VGOP_SIZE; + int32_t iTlLast = pWelsSvcRc->iTlOfFrames[iLastIdxCodecInVGop]; + iDeltaQpTemporal = iTl - iTlLast; + if (0 == iTlLast && iTl > 0) + iDeltaQpTemporal += 1; + else if (0 == iTl && iTlLast > 0) + iDeltaQpTemporal -= 1; + + } else { + int64_t iCmplxRatio = WELS_DIV_ROUND64 (iFrameComplexity * INT_MULTIPLY, + pTOverRc->iFrameCmplxMean); + iCmplxRatio = WELS_CLIP3 (iCmplxRatio, INT_MULTIPLY - FRAME_CMPLX_RATIO_RANGE, INT_MULTIPLY + FRAME_CMPLX_RATIO_RANGE); + + pWelsSvcRc->iQStep = WELS_DIV_ROUND ((pTOverRc->iLinearCmplx * iCmplxRatio), (pWelsSvcRc->iTargetBits * INT_MULTIPLY)); + iLumaQp = RcConvertQStep2Qp (pWelsSvcRc->iQStep); + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "iCmplxRatio = %d,frameComplexity = %" PRId64 ",iFrameCmplxMean = %" PRId64 ",iQStep = %d,iLumaQp = %d", (int)iCmplxRatio, + iFrameComplexity, pTOverRc->iFrameCmplxMean, pWelsSvcRc->iQStep, iLumaQp); +//limit QP + int32_t iLastIdxCodecInVGop = pWelsSvcRc->iFrameCodedInVGop - 1; + if (iLastIdxCodecInVGop < 0) + iLastIdxCodecInVGop += VGOP_SIZE; + int32_t iTlLast = pWelsSvcRc->iTlOfFrames[iLastIdxCodecInVGop]; + int32_t iDeltaQpTemporal = iTl - iTlLast; + if (0 == iTlLast && iTl > 0) + iDeltaQpTemporal += 1; + else if (0 == iTl && iTlLast > 0) + iDeltaQpTemporal -= 1; + } + pWelsSvcRc->iMinFrameQp = WELS_CLIP3 (pWelsSvcRc->iLastCalculatedQScale - pWelsSvcRc->iFrameDeltaQpLower + + iDeltaQpTemporal, pTOverRc->iMinQp, pTOverRc->iMaxQp) ; + pWelsSvcRc->iMaxFrameQp = WELS_CLIP3 (pWelsSvcRc->iLastCalculatedQScale + pWelsSvcRc->iFrameDeltaQpUpper + + iDeltaQpTemporal, pTOverRc->iMinQp, pTOverRc->iMaxQp); + + iLumaQp = WELS_CLIP3 (iLumaQp, pWelsSvcRc->iMinFrameQp, pWelsSvcRc->iMaxFrameQp); + + if (pEncCtx->pSvcParam->bEnableAdaptiveQuant) { + + iLumaQp = WELS_DIV_ROUND (iLumaQp * INT_MULTIPLY - pEncCtx->pVaa->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp, + INT_MULTIPLY); + iLumaQp = WELS_CLIP3 (iLumaQp, pWelsSvcRc->iMinFrameQp, pWelsSvcRc->iMaxFrameQp); + } + pWelsSvcRc->iQStep = RcConvertQp2QStep (iLumaQp); + pWelsSvcRc->iLastCalculatedQScale = iLumaQp; + pEncCtx->iGlobalQp = iLumaQp; +} + +void GomRCInitForOneSlice (SSlice* pSlice, const int32_t kiBitsPerMb) { + SRCSlicing* pSOverRc = &pSlice->sSlicingOverRc; + pSOverRc->iStartMbSlice = pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice; + pSOverRc->iEndMbSlice = pSOverRc->iStartMbSlice + pSlice->iCountMbNumInSlice - 1; + pSOverRc->iTargetBitsSlice = WELS_DIV_ROUND (static_cast (kiBitsPerMb) * pSlice->iCountMbNumInSlice, + INT_MULTIPLY); +} + +void RcInitSliceInformation (sWelsEncCtx* pEncCtx) { + SSlice** ppSliceInLayer = pEncCtx->pCurDqLayer->ppSliceInLayer; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + const int32_t kiSliceNum = pEncCtx->pCurDqLayer->iMaxSliceNum; + pWelsSvcRc->iBitsPerMb = WELS_DIV_ROUND (static_cast (pWelsSvcRc->iTargetBits) * INT_MULTIPLY, + pWelsSvcRc->iNumberMbFrame); + pWelsSvcRc->bGomRC = (RC_OFF_MODE == pEncCtx->pSvcParam->iRCMode || + RC_BUFFERBASED_MODE == pEncCtx->pSvcParam->iRCMode) ? false : true; + for (int32_t i = 0; i < kiSliceNum; i++) { + SRCSlicing* pSOverRc = &ppSliceInLayer[i]->sSlicingOverRc; + pSOverRc->iTotalQpSlice = 0; + pSOverRc->iTotalMbSlice = 0; + pSOverRc->iFrameBitsSlice = 0; + pSOverRc->iGomBitsSlice = 0; + pSOverRc->iStartMbSlice = 0; + pSOverRc->iEndMbSlice = 0; + pSOverRc->iTargetBitsSlice = 0; + } +} + +void RcDecideTargetBits (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[pEncCtx->uiTemporalId]; + + pWelsSvcRc->iCurrentBitsLevel = BITS_NORMAL; + //allocate bits + if (pEncCtx->eSliceType == I_SLICE) { + pWelsSvcRc->iTargetBits = pWelsSvcRc->iBitsPerFrame * IDR_BITRATE_RATIO; + } else { + if (pWelsSvcRc->iRemainingWeights > pTOverRc->iTlayerWeight) + pWelsSvcRc->iTargetBits = WELS_DIV_ROUND (static_cast (pWelsSvcRc->iRemainingBits) * pTOverRc->iTlayerWeight, + pWelsSvcRc->iRemainingWeights); + else //this case should be not hit. needs to more test case to verify this + pWelsSvcRc->iTargetBits = pWelsSvcRc->iRemainingBits; + if ((pWelsSvcRc->iTargetBits <= 0) && ((pEncCtx->pSvcParam->iRCMode == RC_BITRATE_MODE) + && (pEncCtx->pSvcParam->bEnableFrameSkip == false))) { + pWelsSvcRc->iCurrentBitsLevel = BITS_EXCEEDED; + } + pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, pTOverRc->iMinBitsTl, pTOverRc->iMaxBitsTl); + } + pWelsSvcRc->iRemainingWeights -= pTOverRc->iTlayerWeight; + +} + +void RcDecideTargetBitsTimestamp (sWelsEncCtx* pEncCtx) { + //decide one frame bits allocated + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId]; + int32_t iTl = pEncCtx->uiTemporalId; + SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[iTl]; + pWelsSvcRc->iCurrentBitsLevel = BITS_NORMAL; + + if (pEncCtx->eSliceType == I_SLICE) { + int32_t iBufferTh = static_cast (pWelsSvcRc->iBufferSizeSkip - pWelsSvcRc->iBufferFullnessSkip); + if (iBufferTh <= 0) { + pWelsSvcRc->iCurrentBitsLevel = BITS_EXCEEDED; + pWelsSvcRc->iTargetBits = pTOverRc->iMinBitsTl; + } else { + int32_t iMaxTh = iBufferTh * 3 / 4; + int32_t iMinTh = static_cast((pDLayerParam->fFrameRate < 8) ? iBufferTh * 1.0 / 4 : iBufferTh * 2 / pDLayerParam->fFrameRate); + if (pDLayerParam->fFrameRate < (IDR_BITRATE_RATIO + 1)) + pWelsSvcRc->iTargetBits = static_cast (((double) (pDLayerParam->iSpatialBitrate) / (double) ( + pDLayerParam->fFrameRate))); + else + pWelsSvcRc->iTargetBits = static_cast (((double) (pDLayerParam->iSpatialBitrate) / (double) ( + pDLayerParam->fFrameRate) * IDR_BITRATE_RATIO)); + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "iMaxTh = %d,iMinTh = %d,pWelsSvcRc->iTargetBits = %d,pWelsSvcRc->iBufferSizeSkip = %d, pWelsSvcRc->iBufferFullnessSkip= %" + PRId64, + iMaxTh, iMinTh, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferSizeSkip, pWelsSvcRc->iBufferFullnessSkip); + pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, iMinTh, iMaxTh); + } + + } else { + int32_t iBufferTh = static_cast (pWelsSvcRc->iBufferSizeSkip - pWelsSvcRc->iBufferFullnessSkip); + if (iBufferTh <= 0) { + pWelsSvcRc->iCurrentBitsLevel = BITS_EXCEEDED; + pWelsSvcRc->iTargetBits = pTOverRc->iMinBitsTl; + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "iMaxTh = %d,pWelsSvcRc->iTargetBits = %d,pWelsSvcRc->iBufferSizeSkip = %d, pWelsSvcRc->iBufferFullnessSkip= %" PRId64, + iBufferTh, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferSizeSkip, pWelsSvcRc->iBufferFullnessSkip); + } else { + + SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + const int32_t kiGopSize = (1 << pDLayerParamInternal->iDecompositionStages); + int32_t iAverageFrameSize = (int32_t) ((double) (pDLayerParam->iSpatialBitrate) / (double) (pDLayerParam->fFrameRate)); + const int32_t kiGopBits = iAverageFrameSize * kiGopSize; + pWelsSvcRc->iTargetBits = WELS_DIV_ROUND (pTOverRc->iTlayerWeight * kiGopBits, INT_MULTIPLY * 10 * 2); + + int32_t iMaxTh = iBufferTh / 2; + int32_t iMinTh = static_cast((pDLayerParam->fFrameRate < 8) ? iBufferTh * 1.0 / 4 : iBufferTh * 2 / pDLayerParam->fFrameRate); + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "iMaxTh = %d,iMinTh = %d,pWelsSvcRc->iTargetBits = %d,pWelsSvcRc->iBufferSizeSkip = %d, pWelsSvcRc->iBufferFullnessSkip= % " + PRId64, + iMaxTh, iMinTh, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferSizeSkip, pWelsSvcRc->iBufferFullnessSkip); + pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, iMinTh, iMaxTh); + } + } +} + +void RcInitGomParameters (sWelsEncCtx* pEncCtx) { + SSlice** ppSliceInLayer = pEncCtx->pCurDqLayer->ppSliceInLayer; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCSlicing* pSOverRc = &ppSliceInLayer[0]->sSlicingOverRc; + const int32_t kiSliceNum = pEncCtx->pCurDqLayer->iMaxSliceNum; + const int32_t kiGlobalQp = pEncCtx->iGlobalQp; + + pWelsSvcRc->iAverageFrameQp = 0; + for (int32_t i = 0; i < kiSliceNum; ++i) { + pSOverRc = &ppSliceInLayer[i]->sSlicingOverRc; + pSOverRc->iComplexityIndexSlice = 0; + pSOverRc->iCalculatedQpSlice = kiGlobalQp; + } + memset (pWelsSvcRc->pGomComplexity, 0, pWelsSvcRc->iGomSize * sizeof (double)); + memset (pWelsSvcRc->pGomCost, 0, pWelsSvcRc->iGomSize * sizeof (int32_t)); +} + +void RcCalculateMbQp (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCSlicing* pSOverRc = &pSlice->sSlicingOverRc; + + int32_t iLumaQp = pSOverRc->iCalculatedQpSlice; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + if (pEncCtx->pSvcParam->bEnableAdaptiveQuant) { + iLumaQp = (int8_t)WELS_CLIP3 (iLumaQp + + pEncCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[pCurMb->iMbXY], pWelsSvcRc->iMinFrameQp, + pWelsSvcRc->iMaxFrameQp); + } + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (iLumaQp + kuiChromaQpIndexOffset)]; + pCurMb->uiLumaQp = iLumaQp; +} + +SWelsSvcRc* RcJudgeBaseUsability (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = NULL, *pWelsSvcRc_Base = NULL; + SSpatialLayerConfig* pDlpBase = NULL, *pDLayerParam = NULL; + SSpatialLayerInternal* pDlpBaseInternal = NULL; + if (pEncCtx->uiDependencyId <= 0) + return NULL; + pDlpBaseInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId - 1]; + pDlpBase = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId - 1]; + pWelsSvcRc_Base = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId - 1]; + if (pEncCtx->uiTemporalId <= pDlpBaseInternal->iDecompositionStages) { + pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + pWelsSvcRc_Base = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId - 1]; + pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId]; + pDlpBase = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId - 1]; + if ((pDLayerParam->iVideoWidth * pDLayerParam->iVideoHeight / pWelsSvcRc->iNumberMbGom) == + (pDlpBase->iVideoWidth * pDlpBase->iVideoHeight / pWelsSvcRc_Base->iNumberMbGom)) + return pWelsSvcRc_Base; + else + return NULL; + } else + return NULL; +} + +void RcGomTargetBits (sWelsEncCtx* pEncCtx, SSlice* pSlice) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SWelsSvcRc* pWelsSvcRc_Base = NULL; + SRCSlicing* pSOverRc = &pSlice->sSlicingOverRc; + + int32_t iAllocateBits = 0; + int32_t iSumSad = 0; + int32_t iLastGomIndex = 0; + int32_t iLeftBits = 0; + const int32_t kiComplexityIndex = pSOverRc->iComplexityIndexSlice; + int32_t i; + + iLastGomIndex = pSOverRc->iEndMbSlice / pWelsSvcRc->iNumberMbGom; + iLeftBits = pSOverRc->iTargetBitsSlice - pSOverRc->iFrameBitsSlice; + if (iLeftBits <= 0) { + pSOverRc->iGomTargetBits = 0; + return; + } else if (kiComplexityIndex >= iLastGomIndex) { + iAllocateBits = iLeftBits; + } else { + pWelsSvcRc_Base = RcJudgeBaseUsability (pEncCtx); + pWelsSvcRc_Base = (pWelsSvcRc_Base) ? pWelsSvcRc_Base : pWelsSvcRc; + for (i = kiComplexityIndex + 1; i <= iLastGomIndex; i++) { + iSumSad += pWelsSvcRc_Base->pCurrentFrameGomSad[i]; + } + + if (0 == iSumSad) + iAllocateBits = WELS_DIV_ROUND (iLeftBits, (iLastGomIndex - kiComplexityIndex)); + else + iAllocateBits = WELS_DIV_ROUND ((int64_t)iLeftBits * pWelsSvcRc_Base->pCurrentFrameGomSad[kiComplexityIndex + 1], + iSumSad); + } + pSOverRc->iGomTargetBits = iAllocateBits; +} + + + +void RcCalculateGomQp (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCSlicing* pSOverRc = &pSlice->sSlicingOverRc; + int64_t iBitsRatio = 1; + + int64_t iLeftBits = pSOverRc->iTargetBitsSlice - pSOverRc->iFrameBitsSlice; + int64_t iTargetLeftBits = iLeftBits + pSOverRc->iGomBitsSlice - pSOverRc->iGomTargetBits; + if ((iLeftBits <= 0) || (iTargetLeftBits <= 0)) { + pSOverRc->iCalculatedQpSlice += 2; + } else { +//globe decision + iBitsRatio = 10000 * iLeftBits / (iTargetLeftBits + 1); + if (iBitsRatio < 8409) //2^(-1.5/6)*10000 + pSOverRc->iCalculatedQpSlice += 2; + else if (iBitsRatio < 9439) //2^(-0.5/6)*10000 + pSOverRc->iCalculatedQpSlice += 1; + else if (iBitsRatio > 10600) //2^(0.5/6)*10000 + pSOverRc->iCalculatedQpSlice -= 1; + else if (iBitsRatio > 11900) //2^(1.5/6)*10000 + pSOverRc->iCalculatedQpSlice -= 2; + } + pSOverRc->iCalculatedQpSlice = WELS_CLIP3 (pSOverRc->iCalculatedQpSlice, pWelsSvcRc->iMinFrameQp, + pWelsSvcRc->iMaxFrameQp); +// WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,"iCalculatedQpSlice =%d,iBitsRatio = %d\n",pSOverRc->iCalculatedQpSlice,iBitsRatio); + pSOverRc->iGomBitsSlice = 0; + +} + +void RcVBufferCalculationSkip (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; + const int32_t kiOutputBits = pWelsSvcRc->iBitsPerFrame; + const int32_t kiOutputMaxBits = pWelsSvcRc->iMaxBitsPerFrame; +//condition 1: whole pBuffer fullness + pWelsSvcRc->iBufferFullnessSkip += (pWelsSvcRc->iFrameDqBits - kiOutputBits); + pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW] += (pWelsSvcRc->iFrameDqBits - kiOutputMaxBits); + pWelsSvcRc->iBufferMaxBRFullness[ODD_TIME_WINDOW] += (pWelsSvcRc->iFrameDqBits - kiOutputMaxBits); + + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc] bits in buffer = %" PRId64 ", bits in Max bitrate buffer = %" PRId64, + pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW]); +//condition 2: VGOP bits constraint + int64_t iVGopBitsPred = 0; + for (int32_t i = pWelsSvcRc->iFrameCodedInVGop + 1; i < VGOP_SIZE; i++) + iVGopBitsPred += pTOverRc[pWelsSvcRc->iTlOfFrames[i]].iMinBitsTl; + iVGopBitsPred -= pWelsSvcRc->iRemainingBits; + double dIncPercent = iVGopBitsPred * 100.0 / (pWelsSvcRc->iBitsPerFrame * VGOP_SIZE) - + (double)VGOP_BITS_PERCENTAGE_DIFF; + + if ((pWelsSvcRc->iBufferFullnessSkip > pWelsSvcRc->iBufferSizeSkip + && pWelsSvcRc->iAverageFrameQp > pWelsSvcRc->iSkipQpValue) + || (dIncPercent > pWelsSvcRc->iRcVaryPercentage)) { + pWelsSvcRc->bSkipFlag = true; + } + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc] VBV_Skip,dIncPercent = %f,iRcVaryPercentage = %d,pWelsSvcRc->bSkipFlag = %d", dIncPercent, + pWelsSvcRc->iRcVaryPercentage, pWelsSvcRc->bSkipFlag); +} +void CheckFrameSkipBasedMaxbr (sWelsEncCtx* pEncCtx, const long long uiTimeStamp, int32_t iDidIdx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[iDidIdx]; + SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[iDidIdx]; + //SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + if (!pEncCtx->pSvcParam->bEnableFrameSkip) + return; + const int32_t iSentBits = pWelsSvcRc->iBitsPerFrame; + const int32_t kiOutputMaxBits = pWelsSvcRc->iMaxBitsPerFrame; + const int64_t kiMaxSpatialBitRate = pDLayerParam->iMaxSpatialBitrate; + +//estimate allowed continual skipped frames in the sequence + const int32_t iPredSkipFramesTarBr = (WELS_DIV_ROUND (pWelsSvcRc->iBufferFullnessSkip, iSentBits) + 1) >> 1; + const int32_t iPredSkipFramesMaxBr = (WELS_MAX (WELS_DIV_ROUND (pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW], + kiOutputMaxBits), 0) + 1) >> 1; + +//calculate the remaining bits in TIME_CHECK_WINDOW + const int32_t iAvailableBitsInTimeWindow = WELS_DIV_ROUND ((TIME_CHECK_WINDOW - pEncCtx->iCheckWindowInterval) * + kiMaxSpatialBitRate, 1000); + const int32_t iAvailableBitsInShiftTimeWindow = WELS_DIV_ROUND ((TIME_CHECK_WINDOW - pEncCtx->iCheckWindowIntervalShift) + * kiMaxSpatialBitRate, 1000); + + bool bJudgeMaxBRbSkip[TIME_WINDOW_TOTAL];//0: EVEN_TIME_WINDOW; 1: ODD_TIME_WINDOW + + /* 4 cases for frame skipping + 1:skipping when buffer size larger than target threshold and current continual skip frames is allowed + 2:skipping when MaxBr buffer size + predict frame size - remaining bits in time window < 0 and current continual skip frames is allowed + 3:if in last ODD_TIME_WINDOW the MAX Br is overflowed, make more strict skipping conditions + 4:such as case 3 in the other window + */ + bool bJudgeBufferFullSkip = (pWelsSvcRc->iContinualSkipFrames <= iPredSkipFramesTarBr) + && (pWelsSvcRc->iBufferFullnessSkip > pWelsSvcRc->iBufferSizeSkip); + bool bJudgeMaxBRbufferFullSkip = (pWelsSvcRc->iContinualSkipFrames <= iPredSkipFramesMaxBr) + && (pEncCtx->iCheckWindowInterval > TIME_CHECK_WINDOW / 2) + && (pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW] + pWelsSvcRc->iPredFrameBit - iAvailableBitsInTimeWindow > 0); + bJudgeMaxBRbSkip[EVEN_TIME_WINDOW] = (pEncCtx->iCheckWindowInterval > TIME_CHECK_WINDOW / 2) + && (pWelsSvcRc->bNeedShiftWindowCheck[EVEN_TIME_WINDOW]) + && (pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW] + pWelsSvcRc->iPredFrameBit - iAvailableBitsInTimeWindow + + kiOutputMaxBits > 0); + bJudgeMaxBRbSkip[ODD_TIME_WINDOW] = (pEncCtx->iCheckWindowIntervalShift > TIME_CHECK_WINDOW / 2) + && (pWelsSvcRc->bNeedShiftWindowCheck[ODD_TIME_WINDOW]) + && (pWelsSvcRc->iBufferMaxBRFullness[ODD_TIME_WINDOW] + pWelsSvcRc->iPredFrameBit - iAvailableBitsInShiftTimeWindow + + kiOutputMaxBits > 0); + + pWelsSvcRc->bSkipFlag = false; + if (bJudgeBufferFullSkip || bJudgeMaxBRbufferFullSkip || bJudgeMaxBRbSkip[EVEN_TIME_WINDOW] + || bJudgeMaxBRbSkip[ODD_TIME_WINDOW]) { + pWelsSvcRc->bSkipFlag = true; + pWelsSvcRc->iSkipFrameNum++; + pWelsSvcRc->iSkipFrameInVGop++; + pWelsSvcRc->iBufferFullnessSkip -= iSentBits; + pWelsSvcRc->iRemainingBits += iSentBits; + pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW] -= kiOutputMaxBits; + pWelsSvcRc->iBufferMaxBRFullness[ODD_TIME_WINDOW] -= kiOutputMaxBits; + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc] bits in buffer = %" PRId64 ", bits in Max bitrate buffer = %" PRId64 ", Predict skip frames = %d and %d", + pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW], iPredSkipFramesTarBr, + iPredSkipFramesMaxBr); + pWelsSvcRc->iBufferFullnessSkip = WELS_MAX (pWelsSvcRc->iBufferFullnessSkip, 0); + } +} + +bool WelsRcCheckFrameStatus (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_t iSpatialNum, int32_t iCurDid) { + + bool bSkipMustFlag = false; + + SSpatialPicIndex* pSpatialIndexMap = &pEncCtx->sSpatialIndexMap[0]; + + //simul_cast AVC control + if (pEncCtx->pSvcParam->bSimulcastAVC) { + //check target_br skip and update info + int32_t iDidIdx = iCurDid; + if (pEncCtx->pFuncList->pfRc.pfWelsRcPicDelayJudge) { + pEncCtx->pFuncList->pfRc.pfWelsRcPicDelayJudge (pEncCtx, uiTimeStamp, iDidIdx); + } + if (true == pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag) { + bSkipMustFlag = true; + } + //check max_br skip + if (pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr) { + if ((!bSkipMustFlag) && (pEncCtx->pSvcParam->sSpatialLayers[iDidIdx].iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE)) { + pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr (pEncCtx, uiTimeStamp, iDidIdx); + if (true == pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag) { + bSkipMustFlag = true; + + } + } + } + if (bSkipMustFlag) { + pEncCtx->pWelsSvcRc[iDidIdx].uiLastTimeStamp = uiTimeStamp; + pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag = false; + pEncCtx->pWelsSvcRc[iDidIdx].iContinualSkipFrames++; + return true; + } + } else { //SVC control + for (int32_t i = 0; i < iSpatialNum; i++) { + int32_t iDidIdx = (pSpatialIndexMap + i)->iDid; + //check target_br skip and update info + + if (pEncCtx->pFuncList->pfRc.pfWelsRcPicDelayJudge) { + pEncCtx->pFuncList->pfRc.pfWelsRcPicDelayJudge (pEncCtx, uiTimeStamp, iDidIdx); + } + if (true == pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag) { + bSkipMustFlag = true; + } + //check max_br skip + if (pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr) { + if ((!bSkipMustFlag) && (pEncCtx->pSvcParam->sSpatialLayers[iDidIdx].iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE)) { + pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr (pEncCtx, uiTimeStamp, iDidIdx); + if (true == pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag) { + bSkipMustFlag = true; + } + } + } + if (bSkipMustFlag) { + break; + } + } + + if (bSkipMustFlag) { + for (int32_t i = 0; i < iSpatialNum; i++) { + int32_t iDidIdx = (pSpatialIndexMap + i)->iDid; + pEncCtx->pWelsSvcRc[iDidIdx].uiLastTimeStamp = uiTimeStamp; + pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag = false; + pEncCtx->pWelsSvcRc[iDidIdx].iContinualSkipFrames++; + } + return true; + } + } + return false; +} +void UpdateBufferWhenFrameSkipped (sWelsEncCtx* pEncCtx, int32_t iCurDid) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[iCurDid]; + const int32_t kiOutputBits = pWelsSvcRc->iBitsPerFrame; + const int32_t kiOutputMaxBits = pWelsSvcRc->iMaxBitsPerFrame; + pWelsSvcRc->iBufferFullnessSkip = pWelsSvcRc->iBufferFullnessSkip - kiOutputBits; + pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW] -= kiOutputMaxBits; + pWelsSvcRc->iBufferMaxBRFullness[ODD_TIME_WINDOW] -= kiOutputMaxBits; + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc] iDid = %d,bits in buffer = %" PRId64 ", bits in Max bitrate buffer = %" PRId64, + iCurDid, pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW]); + + pWelsSvcRc->iBufferFullnessSkip = WELS_MAX (pWelsSvcRc->iBufferFullnessSkip, 0); + + pWelsSvcRc->iRemainingBits += kiOutputBits; + pWelsSvcRc->iSkipFrameNum++; + pWelsSvcRc->iSkipFrameInVGop++; + + if ((pWelsSvcRc->iContinualSkipFrames % 3) == 0) { + //output a warning when iContinualSkipFrames is large enough, which may indicate subjective quality problem + //note that here iContinualSkipFrames must be >0, so the log output will be 3/6/.... + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_WARNING, "[Rc] iDid = %d,iContinualSkipFrames(%d) is large", + iCurDid, pWelsSvcRc->iContinualSkipFrames); + } +} +void UpdateMaxBrCheckWindowStatus (sWelsEncCtx* pEncCtx, int32_t iSpatialNum, const long long uiTimeStamp) { + SSpatialPicIndex* pSpatialIndexMap = &pEncCtx->sSpatialIndexMap[0]; + if (pEncCtx->bCheckWindowStatusRefreshFlag) { + pEncCtx->iCheckWindowCurrentTs = uiTimeStamp; + } else { + pEncCtx->iCheckWindowCurrentTs = pEncCtx->iCheckWindowStartTs = uiTimeStamp; + pEncCtx->bCheckWindowStatusRefreshFlag = true; + for (int32_t i = 0; i < iSpatialNum; i++) { + int32_t iCurDid = (pSpatialIndexMap + i)->iDid; + pEncCtx->pWelsSvcRc[iCurDid].iBufferFullnessSkip = 0; + pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[ODD_TIME_WINDOW] = 0; + pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[EVEN_TIME_WINDOW] = 0; + pEncCtx->pWelsSvcRc[iCurDid].bNeedShiftWindowCheck[ODD_TIME_WINDOW] = false; + pEncCtx->pWelsSvcRc[iCurDid].bNeedShiftWindowCheck[EVEN_TIME_WINDOW] = false; + } + + } + pEncCtx->iCheckWindowInterval = (int32_t) (pEncCtx->iCheckWindowCurrentTs - pEncCtx->iCheckWindowStartTs); + if (pEncCtx->iCheckWindowInterval >= (TIME_CHECK_WINDOW >> 1) && !pEncCtx->bCheckWindowShiftResetFlag) { + pEncCtx->bCheckWindowShiftResetFlag = true; + for (int32_t i = 0; i < iSpatialNum; i++) { + int32_t iCurDid = (pSpatialIndexMap + i)->iDid; + if (pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[ODD_TIME_WINDOW] > 0 + && pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[ODD_TIME_WINDOW] != + pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[0]) { + pEncCtx->pWelsSvcRc[iCurDid].bNeedShiftWindowCheck[EVEN_TIME_WINDOW] = true; + } else { + pEncCtx->pWelsSvcRc[iCurDid].bNeedShiftWindowCheck[EVEN_TIME_WINDOW] = false; + } + pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[ODD_TIME_WINDOW] = 0; + } + } + pEncCtx->iCheckWindowIntervalShift = pEncCtx->iCheckWindowInterval >= (TIME_CHECK_WINDOW >> 1) ? + pEncCtx->iCheckWindowInterval - (TIME_CHECK_WINDOW >> 1) : pEncCtx->iCheckWindowInterval + (TIME_CHECK_WINDOW >> 1); + + if (pEncCtx->iCheckWindowInterval >= TIME_CHECK_WINDOW || pEncCtx->iCheckWindowInterval == 0) { + pEncCtx->iCheckWindowStartTs = pEncCtx->iCheckWindowCurrentTs; + pEncCtx->iCheckWindowInterval = 0; + pEncCtx->bCheckWindowShiftResetFlag = false; + for (int32_t i = 0; i < iSpatialNum; i++) { + int32_t iCurDid = (pSpatialIndexMap + i)->iDid; + if (pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[EVEN_TIME_WINDOW] > 0) { + pEncCtx->pWelsSvcRc[iCurDid].bNeedShiftWindowCheck[ODD_TIME_WINDOW] = true; + } else { + pEncCtx->pWelsSvcRc[iCurDid].bNeedShiftWindowCheck[ODD_TIME_WINDOW] = false; + } + pEncCtx->pWelsSvcRc[iCurDid].iBufferMaxBRFullness[EVEN_TIME_WINDOW] = 0; + } + } + return; +} + +bool WelsRcPostFrameSkipping (sWelsEncCtx* pCtx, const int32_t iDid, const long long uiTimeStamp) { + //TODO: put in the decision of rate-control + return false; +} + +void WelsRcPostFrameSkippedUpdate (sWelsEncCtx* pCtx, const int32_t iDid) { + //TODO: do something to update buffers after post-skipping is done + //let RC know post-skipping happened and adjust strategy accordingly +} + +void RcVBufferCalculationPadding (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + const int32_t kiOutputBits = pWelsSvcRc->iBitsPerFrame; + const int32_t kiBufferThreshold = WELS_DIV_ROUND (PADDING_THRESHOLD * (-pWelsSvcRc->iBufferSizePadding), INT_MULTIPLY); + + pWelsSvcRc->iBufferFullnessPadding += (pWelsSvcRc->iFrameDqBits - kiOutputBits); + + if (pWelsSvcRc->iBufferFullnessPadding < kiBufferThreshold) { + pWelsSvcRc->iPaddingSize = -pWelsSvcRc->iBufferFullnessPadding; + pWelsSvcRc->iPaddingSize >>= 3; // /8 + pWelsSvcRc->iBufferFullnessPadding = 0; + } else + pWelsSvcRc->iPaddingSize = 0; +} + + +void RcTraceFrameBits (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_t iFrameSize) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + if (pWelsSvcRc->iPredFrameBit != 0) + pWelsSvcRc->iPredFrameBit = (int32_t) (LAST_FRAME_PREDICT_WEIGHT * pWelsSvcRc->iFrameDqBits + + (1 - LAST_FRAME_PREDICT_WEIGHT) * pWelsSvcRc->iPredFrameBit); + else + pWelsSvcRc->iPredFrameBit = pWelsSvcRc->iFrameDqBits; + + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "[Rc]Layer %d: Frame timestamp = %lld, Frame type = %d, encoding_qp = %d, average qp = %d, max qp = %d, min qp = %d, index = %d, " + "iTid = %d, used = %d, bitsperframe = %d, target = %d, remainingbits = %d, skipbuffersize = %d", + pEncCtx->uiDependencyId, uiTimeStamp, pEncCtx->eSliceType, pEncCtx->iGlobalQp, pWelsSvcRc->iAverageFrameQp, + pWelsSvcRc->iMaxFrameQp, + pWelsSvcRc->iMinFrameQp, + pParamInternal->iFrameIndex, pEncCtx->uiTemporalId, + ( pWelsSvcRc->iFrameDqBits > 0 ) ? pWelsSvcRc->iFrameDqBits : (iFrameSize<<3) , + pWelsSvcRc->iBitsPerFrame, + pWelsSvcRc->iTargetBits, pWelsSvcRc->iRemainingBits, pWelsSvcRc->iBufferSizeSkip); + +} + +void RcUpdatePictureQpBits (sWelsEncCtx* pEncCtx, int32_t iCodedBits) { + SSlice** ppSliceInLayer = pEncCtx->pCurDqLayer->ppSliceInLayer; + SRCSlicing* pSOverRc = &ppSliceInLayer[0]->sSlicingOverRc; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SSliceCtx* pCurSliceCtx = &pEncCtx->pCurDqLayer->sSliceEncCtx; + int32_t iTotalQp = 0, iTotalMb = 0; + int32_t i; + + if (pEncCtx->eSliceType == P_SLICE) { + for (i = 0; i < pCurSliceCtx->iSliceNumInFrame; i++) { + pSOverRc = &ppSliceInLayer[i]->sSlicingOverRc; + iTotalQp += pSOverRc->iTotalQpSlice; + iTotalMb += pSOverRc->iTotalMbSlice; + } + if (iTotalMb > 0) + pWelsSvcRc->iAverageFrameQp = WELS_DIV_ROUND (INT_MULTIPLY * iTotalQp, iTotalMb * INT_MULTIPLY); + else + pWelsSvcRc->iAverageFrameQp = pEncCtx->iGlobalQp; + } else { + pWelsSvcRc->iAverageFrameQp = pEncCtx->iGlobalQp; + } + pWelsSvcRc->iFrameDqBits = iCodedBits; + pWelsSvcRc->iLastCalculatedQScale = pWelsSvcRc->iAverageFrameQp; + pWelsSvcRc->pTemporalOverRc[pEncCtx->uiTemporalId].iGopBitsDq += pWelsSvcRc->iFrameDqBits; +} + +void RcUpdateIntraComplexity (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + int32_t iAlpha = WELS_DIV_ROUND (INT_MULTIPLY, (1 + pWelsSvcRc->iIdrNum)); + if (iAlpha < (INT_MULTIPLY / 4)) iAlpha = INT_MULTIPLY / 4; + int32_t iQStep = RcConvertQp2QStep (pWelsSvcRc->iAverageFrameQp); + int64_t iIntraCmplx = iQStep * static_cast (pWelsSvcRc->iFrameDqBits); + int64_t iFrameComplexity = pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity; + if (pEncCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + SVAAFrameInfoExt* pVaa = static_cast (pEncCtx->pVaa); + iFrameComplexity = pVaa->sComplexityScreenParam.iFrameComplexity; + } + if (pWelsSvcRc->iIdrNum == 0) { + pWelsSvcRc->iIntraComplexity = iIntraCmplx; + pWelsSvcRc->iIntraComplxMean = iFrameComplexity; + } else { + pWelsSvcRc->iIntraComplexity = WELS_DIV_ROUND64 (((LINEAR_MODEL_DECAY_FACTOR) * pWelsSvcRc->iIntraComplexity + + (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * + iIntraCmplx), INT_MULTIPLY); + + + pWelsSvcRc->iIntraComplxMean = WELS_DIV_ROUND64 (((LINEAR_MODEL_DECAY_FACTOR) * static_cast + (pWelsSvcRc->iIntraComplxMean) + + (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * (iFrameComplexity)), + INT_MULTIPLY); + } + + pWelsSvcRc->iIntraMbCount = pWelsSvcRc->iNumberMbFrame; + pWelsSvcRc->iIdrNum++; + if (pWelsSvcRc->iIdrNum > 255) + pWelsSvcRc->iIdrNum = 255; + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "RcUpdateIntraComplexity iFrameDqBits = %d,iQStep= %d,iIntraCmplx = %" PRId64, + pWelsSvcRc->iFrameDqBits, pWelsSvcRc->iQStep, pWelsSvcRc->iIntraComplexity); + +} + +void RcUpdateFrameComplexity (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + const int32_t kiTl = pEncCtx->uiTemporalId; + SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[kiTl]; + + int64_t iFrameComplexity = pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity; + if (pEncCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + SVAAFrameInfoExt* pVaa = static_cast (pEncCtx->pVaa); + iFrameComplexity = pVaa->sComplexityScreenParam.iFrameComplexity; + } + int32_t iQStep = RcConvertQp2QStep (pWelsSvcRc->iAverageFrameQp); + int32_t iAlpha = WELS_DIV_ROUND (INT_MULTIPLY, (1 + pTOverRc->iPFrameNum)); + if (iAlpha < SMOOTH_FACTOR_MIN_VALUE) + iAlpha = SMOOTH_FACTOR_MIN_VALUE; + if (0 == pTOverRc->iPFrameNum) { + pTOverRc->iLinearCmplx = ((int64_t)pWelsSvcRc->iFrameDqBits) * iQStep; + pTOverRc->iFrameCmplxMean = (int32_t)iFrameComplexity; + } else { + pTOverRc->iLinearCmplx = WELS_DIV_ROUND64 (((LINEAR_MODEL_DECAY_FACTOR) * (int64_t)pTOverRc->iLinearCmplx + + (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * ((int64_t)pWelsSvcRc->iFrameDqBits * iQStep)), + INT_MULTIPLY); + pTOverRc->iFrameCmplxMean = WELS_DIV_ROUND64 (((LINEAR_MODEL_DECAY_FACTOR) * static_cast + (pTOverRc->iFrameCmplxMean) + + (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * iFrameComplexity), + INT_MULTIPLY); + } + + + pTOverRc->iPFrameNum++; + if (pTOverRc->iPFrameNum > 255) + pTOverRc->iPFrameNum = 255; + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "RcUpdateFrameComplexity iFrameDqBits = %d,iQStep= %d,pWelsSvcRc->iQStep= %d,pTOverRc->iLinearCmplx = %" PRId64, + pWelsSvcRc->iFrameDqBits, + iQStep, pWelsSvcRc->iQStep, pTOverRc->iLinearCmplx); + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, "iFrameCmplxMean = %" PRId64 ",iFrameComplexity = %" PRId64, + pTOverRc->iFrameCmplxMean, iFrameComplexity); +} + +int32_t RcCalculateCascadingQp (struct TagWelsEncCtx* pEncCtx, int32_t iQp) { + int32_t iTemporalQp = 0; + if (pEncCtx->pSvcParam->iDecompStages) { + if (pEncCtx->uiTemporalId == 0) + iTemporalQp = iQp - 3 - (pEncCtx->pSvcParam->iDecompStages - 1); + else + iTemporalQp = iQp - (pEncCtx->pSvcParam->iDecompStages - pEncCtx->uiTemporalId); + iTemporalQp = WELS_CLIP3 (iTemporalQp, 1, 51); + } else + iTemporalQp = iQp; + return iTemporalQp; +} + +void WelsRcPictureInitGom (sWelsEncCtx* pEncCtx, long long uiTimeStamp) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + const int32_t kiSliceNum = pEncCtx->pCurDqLayer->iMaxSliceNum; + pWelsSvcRc->iContinualSkipFrames = 0; + + if (pEncCtx->eSliceType == I_SLICE) { + if (0 == pWelsSvcRc->iIdrNum) { //iIdrNum == 0 means encoder has been initialed + RcInitRefreshParameter (pEncCtx); + } + } + if (RcJudgeBitrateFpsUpdate (pEncCtx)) { + RcUpdateBitrateFps (pEncCtx); + } + if (pEncCtx->uiTemporalId == 0) { + RcUpdateTemporalZero (pEncCtx); + } + if (pEncCtx->pSvcParam->iRCMode == RC_TIMESTAMP_MODE) { + RcDecideTargetBitsTimestamp (pEncCtx); + pWelsSvcRc->uiLastTimeStamp = uiTimeStamp; + } else { + RcDecideTargetBits (pEncCtx); + } + //turn off GOM QP when slicenum is larger 1 + if ((kiSliceNum > 1) || ((pEncCtx->pSvcParam->iRCMode == RC_BITRATE_MODE) + && (pEncCtx->eSliceType == I_SLICE))) { + pWelsSvcRc->bEnableGomQp = false; + } else + pWelsSvcRc->bEnableGomQp = true; + + //decide globe_qp + if (pEncCtx->eSliceType == I_SLICE) { + RcCalculateIdrQp (pEncCtx); + } else { + RcCalculatePictureQp (pEncCtx); + } + RcInitSliceInformation (pEncCtx); + RcInitGomParameters (pEncCtx); +} + +void WelsRcPictureInfoUpdateGom (sWelsEncCtx* pEncCtx, int32_t iLayerSize) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + int32_t iCodedBits = (iLayerSize << 3); + + RcUpdatePictureQpBits (pEncCtx, iCodedBits); + + if (pEncCtx->eSliceType == P_SLICE) { + RcUpdateFrameComplexity (pEncCtx); + } else { + RcUpdateIntraComplexity (pEncCtx); + } + pWelsSvcRc->iRemainingBits -= pWelsSvcRc->iFrameDqBits; + + if (pEncCtx->pSvcParam->bEnableFrameSkip /*&& + pEncCtx->uiDependencyId == pEncCtx->pSvcParam->iSpatialLayerNum - 1*/) { + RcVBufferCalculationSkip (pEncCtx); + } + + if (pEncCtx->pSvcParam->iPaddingFlag) + RcVBufferCalculationPadding (pEncCtx); + pWelsSvcRc->iFrameCodedInVGop++; +} + +void WelsRcMbInitGom (sWelsEncCtx* pEncCtx, SMB* pCurMb, SSlice* pSlice) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCSlicing* pSOverRc = &pSlice->sSlicingOverRc; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + + pSOverRc->iBsPosSlice = pEncCtx->pFuncList->pfGetBsPosition (pSlice); + if (pWelsSvcRc->bEnableGomQp) { + //calculate gom qp and target bits at the beginning of gom + if (0 == (pCurMb->iMbXY % pWelsSvcRc->iNumberMbGom)) { + if (pCurMb->iMbXY != pSOverRc->iStartMbSlice) { + pSOverRc->iComplexityIndexSlice++; + RcCalculateGomQp (pEncCtx, pSlice, pCurMb); + } + RcGomTargetBits (pEncCtx, pSlice); + } + + RcCalculateMbQp (pEncCtx, pSlice, pCurMb); + } else { + pCurMb->uiLumaQp = pEncCtx->iGlobalQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + kuiChromaQpIndexOffset)]; + } + +} + +void WelsRcMbInfoUpdateGom (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iCostLuma, SSlice* pSlice) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SRCSlicing* pSOverRc = &pSlice->sSlicingOverRc; + const int32_t kiComplexityIndex = pSOverRc->iComplexityIndexSlice; + + int32_t iCurMbBits = pEncCtx->pFuncList->pfGetBsPosition (pSlice) - pSOverRc->iBsPosSlice; + pSOverRc->iFrameBitsSlice += iCurMbBits; + pSOverRc->iGomBitsSlice += iCurMbBits; + + pWelsSvcRc->pGomCost[kiComplexityIndex] += iCostLuma; + if (iCurMbBits > 0) { + pSOverRc->iTotalQpSlice += pCurMb->uiLumaQp; + pSOverRc->iTotalMbSlice++; + } +} + +void WelsRcPictureInitDisable (sWelsEncCtx* pEncCtx, long long uiTimeStamp) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId]; + const int32_t kiQp = pDLayerParam->iDLayerQp; + + pEncCtx->iGlobalQp = RcCalculateCascadingQp (pEncCtx, kiQp); + + if (pEncCtx->pSvcParam->bEnableAdaptiveQuant && (pEncCtx->eSliceType == P_SLICE)) { + pEncCtx->iGlobalQp = WELS_CLIP3 ((pEncCtx->iGlobalQp * INT_MULTIPLY - + pEncCtx->pVaa->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp) / INT_MULTIPLY, pWelsSvcRc->iMinQp, + pWelsSvcRc->iMaxQp); + } else { + pEncCtx->iGlobalQp = WELS_CLIP3 (pEncCtx->iGlobalQp, 0, 51); + } + + pWelsSvcRc->iAverageFrameQp = pEncCtx->iGlobalQp; +} + +void WelsRcPictureInfoUpdateDisable (sWelsEncCtx* pEncCtx, int32_t iLayerSize) { +} + +void WelsRcMbInitDisable (sWelsEncCtx* pEncCtx, SMB* pCurMb, SSlice* pSlice) { + int32_t iLumaQp = pEncCtx->iGlobalQp; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + + const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + + + if (pEncCtx->pSvcParam->bEnableAdaptiveQuant && (pEncCtx->eSliceType == P_SLICE)) { + iLumaQp = (int8_t)WELS_CLIP3 (iLumaQp + + pEncCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[pCurMb->iMbXY], pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); + } else { + iLumaQp = WELS_CLIP3 (iLumaQp, 0, 51); + } + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (iLumaQp + kuiChromaQpIndexOffset)]; + pCurMb->uiLumaQp = iLumaQp; +} + +void WelsRcMbInfoUpdateDisable (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iCostLuma, SSlice* pSlice) { +} + +void WelRcPictureInitBufferBasedQp (sWelsEncCtx* pEncCtx, long long uiTimeStamp) { + + SVAAFrameInfo* pVaa = static_cast (pEncCtx->pVaa); + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + + int32_t iMinQp = pEncCtx->pSvcParam->iMinQp; + if (pVaa->eSceneChangeIdc == LARGE_CHANGED_SCENE) + iMinQp += 2; + else if (pVaa->eSceneChangeIdc == MEDIUM_CHANGED_SCENE) + iMinQp += 1; + if (pEncCtx->bDeliveryFlag) + pEncCtx->iGlobalQp -= 1; + else + pEncCtx->iGlobalQp += 2; + pEncCtx->iGlobalQp = WELS_CLIP3 (pEncCtx->iGlobalQp, iMinQp, pWelsSvcRc->iMaxQp); + pWelsSvcRc->iAverageFrameQp = pWelsSvcRc->iMaxFrameQp = pWelsSvcRc->iMinFrameQp = pEncCtx->iGlobalQp; +} +void WelRcPictureInitScc (sWelsEncCtx* pEncCtx, long long uiTimeStamp) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SVAAFrameInfoExt* pVaa = static_cast (pEncCtx->pVaa); + SSpatialLayerConfig* pDLayerConfig = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId]; + SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + int64_t iFrameCplx = pVaa->sComplexityScreenParam.iFrameComplexity; + int32_t iBitRate = pDLayerConfig->iSpatialBitrate;// pEncCtx->pSvcParam->target_bitrate; + + int32_t iBaseQp = pWelsSvcRc->iBaseQp; + pEncCtx->iGlobalQp = iBaseQp; + int32_t iDeltaQp = 0; + if (pEncCtx->eSliceType == I_SLICE) { + int64_t iTargetBits = iBitRate * 2 - pWelsSvcRc->iBufferFullnessSkip; + iTargetBits = WELS_MAX (1, iTargetBits); + int32_t iQstep = WELS_DIV_ROUND (iFrameCplx * pWelsSvcRc->iCost2BitsIntra, iTargetBits); + int32_t iQp = RcConvertQStep2Qp (iQstep); + + pEncCtx->iGlobalQp = WELS_CLIP3 (iQp, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); + } else { + int64_t iTargetBits = WELS_ROUND (((float)iBitRate / pDLayerParamInternal->fOutputFrameRate)); //iBitRate / 10; + int32_t iQstep = WELS_DIV_ROUND (iFrameCplx * pWelsSvcRc->iAvgCost2Bits, iTargetBits); + int32_t iQp = RcConvertQStep2Qp (iQstep); + iDeltaQp = iQp - iBaseQp; + if (pWelsSvcRc->iBufferFullnessSkip > iBitRate) { + if (iDeltaQp > 0) { + ++iBaseQp; + } + } else if (pWelsSvcRc->iBufferFullnessSkip == 0) { + if (iDeltaQp < 0) { + --iBaseQp; + } + } + if (iDeltaQp >= 6) { + iBaseQp += 3; + } else if ((iDeltaQp <= -6)) { + --iBaseQp; + } + iBaseQp = WELS_CLIP3 (iBaseQp, pWelsSvcRc->iMinQp, pWelsSvcRc->iMinQp); + + pEncCtx->iGlobalQp = iBaseQp; + + + if (iDeltaQp < -6) { + pEncCtx->iGlobalQp = WELS_CLIP3 (pWelsSvcRc->iBaseQp - 6, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); + } + + if (iDeltaQp > 5) { + if (LARGE_CHANGED_SCENE == pEncCtx->pVaa->eSceneChangeIdc || pWelsSvcRc->iBufferFullnessSkip > 2 * iBitRate + || iDeltaQp > 10) { + pEncCtx->iGlobalQp = WELS_CLIP3 (pWelsSvcRc->iBaseQp + iDeltaQp, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); + } else if (MEDIUM_CHANGED_SCENE == pEncCtx->pVaa->eSceneChangeIdc || pWelsSvcRc->iBufferFullnessSkip > iBitRate) { + pEncCtx->iGlobalQp = WELS_CLIP3 (pWelsSvcRc->iBaseQp + 5, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); + } + } + pWelsSvcRc->iBaseQp = iBaseQp; + } + pWelsSvcRc->iAverageFrameQp = pEncCtx->iGlobalQp; + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, "WelRcPictureInitScc iLumaQp = %d\n", pEncCtx->iGlobalQp); + pWelsSvcRc->uiLastTimeStamp = uiTimeStamp; + +} +void WelsRcDropFrameUpdate (sWelsEncCtx* pEncCtx, uint32_t iDropSize) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[0]; + + pWelsSvcRc->iBufferFullnessSkip -= (int32_t)iDropSize; + pWelsSvcRc->iBufferFullnessSkip = WELS_MAX (0, pWelsSvcRc->iBufferFullnessSkip); + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, "[WelsRcDropFrameUpdate:\tdrop:%d\t%" PRId64 "\n", iDropSize, + pWelsSvcRc->iBufferFullnessSkip); +} + +void WelsRcPictureInfoUpdateScc (sWelsEncCtx* pEncCtx, int32_t iNalSize) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + int32_t iFrameBits = (iNalSize << 3); + pWelsSvcRc->iBufferFullnessSkip += iFrameBits; + + SVAAFrameInfoExt* pVaa = static_cast (pEncCtx->pVaa); + + int32_t iQstep = RcConvertQp2QStep (pEncCtx->iGlobalQp); + int64_t iCost2Bits = WELS_DIV_ROUND64 ((((int64_t)iFrameBits * iQstep)), pVaa->sComplexityScreenParam.iFrameComplexity); + + if (pEncCtx->eSliceType == P_SLICE) { + pWelsSvcRc->iAvgCost2Bits = WELS_DIV_ROUND64 ((95 * pWelsSvcRc->iAvgCost2Bits + 5 * iCost2Bits), INT_MULTIPLY); + } else { + pWelsSvcRc->iCost2BitsIntra = WELS_DIV_ROUND64 ((90 * pWelsSvcRc->iCost2BitsIntra + 10 * iCost2Bits), INT_MULTIPLY); + } +} + + +void WelsRcMbInitScc (sWelsEncCtx* pEncCtx, SMB* pCurMb, SSlice* pSlice) { + /* Get delta iQp of this MB */ + pCurMb->uiLumaQp = pEncCtx->iGlobalQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[WELS_CLIP3 (pCurMb->uiLumaQp + pEncCtx->pPps->uiChromaQpIndexOffset, 0, 51)]; +} + +void WelsRcFrameDelayJudgeTimeStamp (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_t iDidIdx) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[iDidIdx]; + SSpatialLayerConfig* pDLayerConfig = &pEncCtx->pSvcParam->sSpatialLayers[iDidIdx]; + + int32_t iBitRate = pDLayerConfig->iSpatialBitrate; + int32_t iEncTimeInv = (pWelsSvcRc->uiLastTimeStamp == 0) ? 0 : (int32_t) (uiTimeStamp - pWelsSvcRc->uiLastTimeStamp); + if ((iEncTimeInv < 0) || (iEncTimeInv > 1000)) { + iEncTimeInv = (int32_t) (1000.0 / pDLayerConfig->fFrameRate); + pWelsSvcRc->uiLastTimeStamp = uiTimeStamp - iEncTimeInv; + } + int32_t iSentBits = (int32_t) ((double)iBitRate * iEncTimeInv * (1.0E-3) + 0.5); + iSentBits = WELS_MAX (iSentBits, 0); + + //When bitrate is changed, pBuffer size should be updated + pWelsSvcRc->iBufferSizeSkip = WELS_DIV_ROUND (pDLayerConfig->iSpatialBitrate * pWelsSvcRc->iSkipBufferRatio, + INT_MULTIPLY); + pWelsSvcRc->iBufferSizePadding = WELS_DIV_ROUND (pDLayerConfig->iSpatialBitrate * PADDING_BUFFER_RATIO, INT_MULTIPLY); + + pWelsSvcRc->iBufferFullnessSkip -= iSentBits; + pWelsSvcRc->iBufferFullnessSkip = WELS_MAX ((-1) * (pDLayerConfig->iSpatialBitrate / 4), + pWelsSvcRc->iBufferFullnessSkip); + + if (pEncCtx->pSvcParam->bEnableFrameSkip) { + pWelsSvcRc->bSkipFlag = true; + if (pWelsSvcRc->iBufferFullnessSkip < pWelsSvcRc->iBufferSizeSkip) { + pWelsSvcRc->bSkipFlag = false; + } + if (pWelsSvcRc->bSkipFlag) { + pWelsSvcRc->iSkipFrameNum++; + pWelsSvcRc->uiLastTimeStamp = uiTimeStamp; + } + } + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG, + "WelsRcFrameDelayJudgeTimeStamp iDidIdx = %d,iSkipFrameNum = %d,buffer = %" PRId64 + ",threadhold = %d,bitrate = %d,iSentBits = %d,lasttimestamp = %lld,timestamp=%lld", iDidIdx, + pWelsSvcRc->iSkipFrameNum, pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iBufferSizeSkip, iBitRate, iSentBits, + pWelsSvcRc->uiLastTimeStamp, uiTimeStamp); +} + +void WelsRcPictureInfoUpdateGomTimeStamp (sWelsEncCtx* pEncCtx, int32_t iLayerSize) { + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + int32_t iCodedBits = (iLayerSize << 3); + + RcUpdatePictureQpBits (pEncCtx, iCodedBits); + if (pEncCtx->eSliceType == P_SLICE) { + RcUpdateFrameComplexity (pEncCtx); + } else { + RcUpdateIntraComplexity (pEncCtx); + } + + pWelsSvcRc->iRemainingBits -= pWelsSvcRc->iFrameDqBits; + //condition 1: whole pBuffer fullness + pWelsSvcRc->iBufferFullnessSkip += pWelsSvcRc->iFrameDqBits; + + if (pEncCtx->pSvcParam->iPaddingFlag) + RcVBufferCalculationPadding (pEncCtx); + pWelsSvcRc->iFrameCodedInVGop++; +} + +void WelsRcInitFuncPointers (sWelsEncCtx* pEncCtx, RC_MODES iRcMode) { + SWelsRcFunc* pRcf = &pEncCtx->pFuncList->pfRc; + switch (iRcMode) { + case RC_OFF_MODE: + pRcf->pfWelsRcPictureInit = WelsRcPictureInitDisable; + pRcf->pfWelsRcPicDelayJudge = NULL; + pRcf->pfWelsRcPictureInfoUpdate = WelsRcPictureInfoUpdateDisable; + pRcf->pfWelsRcMbInit = WelsRcMbInitDisable; + pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateDisable; + pRcf->pfWelsCheckSkipBasedMaxbr = NULL; + pRcf->pfWelsUpdateBufferWhenSkip = NULL; + pRcf->pfWelsUpdateMaxBrWindowStatus = NULL; + pRcf->pfWelsRcPostFrameSkipping = NULL; + break; + case RC_BUFFERBASED_MODE: + pRcf->pfWelsRcPictureInit = WelRcPictureInitBufferBasedQp; + pRcf->pfWelsRcPicDelayJudge = NULL; + pRcf->pfWelsRcPictureInfoUpdate = WelsRcPictureInfoUpdateDisable; + pRcf->pfWelsRcMbInit = WelsRcMbInitDisable; + pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateDisable; + pRcf->pfWelsCheckSkipBasedMaxbr = NULL; + pRcf->pfWelsUpdateBufferWhenSkip = NULL; + pRcf->pfWelsUpdateMaxBrWindowStatus = NULL; + pRcf->pfWelsRcPostFrameSkipping = NULL; + break; + case RC_BITRATE_MODE: + pRcf->pfWelsRcPictureInit = WelsRcPictureInitGom; + pRcf->pfWelsRcPicDelayJudge = NULL; + pRcf->pfWelsRcPictureInfoUpdate = WelsRcPictureInfoUpdateGom; + pRcf->pfWelsRcMbInit = WelsRcMbInitGom; + pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateGom; + pRcf->pfWelsCheckSkipBasedMaxbr = CheckFrameSkipBasedMaxbr; + pRcf->pfWelsUpdateBufferWhenSkip = UpdateBufferWhenFrameSkipped; + pRcf->pfWelsUpdateMaxBrWindowStatus = UpdateMaxBrCheckWindowStatus; + pRcf->pfWelsRcPostFrameSkipping = WelsRcPostFrameSkipping; + break; + case RC_BITRATE_MODE_POST_SKIP: + pRcf->pfWelsRcPictureInit = WelsRcPictureInitGom; + pRcf->pfWelsRcPicDelayJudge = NULL; + pRcf->pfWelsRcPictureInfoUpdate = WelsRcPictureInfoUpdateGom; + pRcf->pfWelsRcMbInit = WelsRcMbInitGom; + pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateGom; + pRcf->pfWelsCheckSkipBasedMaxbr = CheckFrameSkipBasedMaxbr; + pRcf->pfWelsUpdateBufferWhenSkip = UpdateBufferWhenFrameSkipped; + pRcf->pfWelsUpdateMaxBrWindowStatus = UpdateMaxBrCheckWindowStatus; + pRcf->pfWelsRcPostFrameSkipping = WelsRcPostFrameSkipping; + break; + case RC_TIMESTAMP_MODE: + + pRcf->pfWelsRcPictureInit = WelsRcPictureInitGom; + pRcf->pfWelsRcPictureInfoUpdate = WelsRcPictureInfoUpdateGomTimeStamp; + pRcf->pfWelsRcMbInit = WelsRcMbInitGom; + pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateGom; + + pRcf->pfWelsRcPicDelayJudge = WelsRcFrameDelayJudgeTimeStamp; + pRcf->pfWelsCheckSkipBasedMaxbr = NULL; + pRcf->pfWelsUpdateBufferWhenSkip = NULL; + pRcf->pfWelsUpdateMaxBrWindowStatus = NULL; + pRcf->pfWelsRcPostFrameSkipping = NULL; + break; + case RC_QUALITY_MODE: + default: + pRcf->pfWelsRcPictureInit = WelsRcPictureInitGom; + pRcf->pfWelsRcPicDelayJudge = NULL; + pRcf->pfWelsRcPictureInfoUpdate = WelsRcPictureInfoUpdateGom; + pRcf->pfWelsRcMbInit = WelsRcMbInitGom; + pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateGom; + pRcf->pfWelsCheckSkipBasedMaxbr = CheckFrameSkipBasedMaxbr; + pRcf->pfWelsUpdateBufferWhenSkip = UpdateBufferWhenFrameSkipped; + pRcf->pfWelsUpdateMaxBrWindowStatus = UpdateMaxBrCheckWindowStatus; + pRcf->pfWelsRcPostFrameSkipping = NULL; + break; + } +} + +void WelsRcInitModule (sWelsEncCtx* pEncCtx, RC_MODES iRcMode) { + WelsRcInitFuncPointers (pEncCtx, iRcMode); + RcInitSequenceParameter (pEncCtx); +} + +void WelsRcFreeMemory (sWelsEncCtx* pEncCtx) { + SWelsSvcRc* pWelsSvcRc = NULL; + int32_t i = 0; + for (i = 0; i < pEncCtx->pSvcParam->iSpatialLayerNum; i++) { + pWelsSvcRc = &pEncCtx->pWelsSvcRc[i]; + RcFreeLayerMemory (pWelsSvcRc, pEncCtx->pMemAlign); + } +} + +long long GetTimestampForRc (const long long uiTimeStamp, const long long uiLastTimeStamp, const float fFrameRate) { + if ((uiLastTimeStamp >= uiTimeStamp) || ((uiTimeStamp == 0) && (uiLastTimeStamp != -1))) { + return (uiLastTimeStamp + (int32_t) (1000.0 / fFrameRate)); + } + return uiTimeStamp; +} + +}//end of namespace diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp new file mode 100644 index 000000000..80ba25aa8 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp @@ -0,0 +1,1077 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +// ref_list_mgr_svc.c +#include "ref_list_mgr_svc.h" +#include "utils.h" +#include "picture_handle.h" +namespace WelsEnc { + +#define STR_ROOM 1 +/* +* reset LTR marking , recovery ,feedback state to default +*/ +void ResetLtrState (SLTRState* pLtr) { + pLtr->bReceivedT0LostFlag = false; + pLtr->iLastRecoverFrameNum = 0; + pLtr->iLastCorFrameNumDec = -1; + pLtr->iCurFrameNumInDec = -1; + + // LTR mark + pLtr->iLTRMarkMode = LTR_DIRECT_MARK; + pLtr->iLTRMarkSuccessNum = 0; //successful marked num + pLtr->bLTRMarkingFlag = false; //decide whether current frame marked as LTR + pLtr->bLTRMarkEnable = false; //when LTR is confirmed and the interval is no smaller than the marking period + pLtr->iCurLtrIdx = 0; + memset (&pLtr->iLastLtrIdx , 0 , sizeof (pLtr->iLastLtrIdx)) ; + pLtr->uiLtrMarkInterval = 0; + + // LTR mark feedback + pLtr->uiLtrMarkState = NO_LTR_MARKING_FEEDBACK ; + pLtr->iLtrMarkFbFrameNum = -1; +} + +/* + * reset reference picture list + */ +void WelsResetRefList (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + int32_t i; + + for (i = 0; i < MAX_SHORT_REF_COUNT + 1; i++) + pRefList->pShortRefList[i] = NULL; + for (i = 0; i < pCtx->pSvcParam->iLTRRefNum + 1; i++) + pRefList->pLongRefList[i] = NULL; + for (i = 0; i < pCtx->pSvcParam->iNumRefFrame + 1; i++) + pRefList->pRef[i]->SetUnref(); + + pRefList->uiLongRefCount = 0; + pRefList->uiShortRefCount = 0; + pRefList->pNextBuffer = pRefList->pRef[0]; +} + +static inline void DeleteLTRFromLongList (sWelsEncCtx* pCtx, int32_t iIdx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + int32_t k ; + + for (k = iIdx; k < pRefList->uiLongRefCount - 1; k++) { + pRefList->pLongRefList[k] = pRefList->pLongRefList[k + 1]; + } + pRefList->pLongRefList[k] = NULL; + pRefList->uiLongRefCount--; + +} +static inline void DeleteSTRFromShortList (sWelsEncCtx* pCtx, int32_t iIdx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + int32_t k ; + + for (k = iIdx; k < pRefList->uiShortRefCount - 1; k++) { + pRefList->pShortRefList[k] = pRefList->pShortRefList[k + 1]; + } + pRefList->pShortRefList[k] = NULL; + pRefList->uiShortRefCount--; + +} +static void DeleteNonSceneLTR (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + for (int32_t i = 0; i < pCtx->pSvcParam->iNumRefFrame; ++i) { + SPicture* pRef = pRefList->pLongRefList[i]; + if (pRef != NULL && pRef->bUsedAsRef && pRef->bIsLongRef && (!pRef->bIsSceneLTR) && + (pCtx->uiTemporalId < pRef->uiTemporalId || pCtx->bCurFrameMarkedAsSceneLtr)) { + //this is our strategy to Unref all non-sceneLTR when the the current frame is sceneLTR + pRef->SetUnref(); + DeleteLTRFromLongList (pCtx, i); + i--; + } + } +} + +static inline int32_t CompareFrameNum (int32_t iFrameNumA, int32_t iFrameNumB, int32_t iMaxFrameNumPlus1) { + int64_t iNumA, iNumB, iDiffAB, iDiffMin; + if (iFrameNumA > iMaxFrameNumPlus1 || iFrameNumB > iMaxFrameNumPlus1) { + return -2; + } +#define WelsAbsDiffInt64(a,b) ( (a) > (b) )?( a - b ):( b - a ) + + iDiffAB = WelsAbsDiffInt64 ((int64_t) (iFrameNumA), (int64_t) (iFrameNumB)); + + iDiffMin = iDiffAB; + if (iDiffMin == 0) { + return FRAME_NUM_EQUAL; + } + + iNumA = WelsAbsDiffInt64 ((int64_t) (iFrameNumA + iMaxFrameNumPlus1), (int64_t) (iFrameNumB)); + if (iNumA == 0) { + return FRAME_NUM_EQUAL; + } else if (iDiffMin > iNumA) { + return FRAME_NUM_BIGGER; + } + + iNumB = WelsAbsDiffInt64 ((int64_t) (iFrameNumB + iMaxFrameNumPlus1), (int64_t) (iFrameNumA)); + if (iNumB == 0) { + return FRAME_NUM_EQUAL; + } else if (iDiffMin > iNumB) { + return FRAME_NUM_SMALLER; + } + + return (iFrameNumA > iFrameNumB) ? (FRAME_NUM_BIGGER) : (FRAME_NUM_SMALLER); + +} +/* +* delete failed mark according LTR recovery pRequest +*/ +static inline void DeleteInvalidLTR (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SPicture** pLongRefList = pRefList->pLongRefList; + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + int32_t iMaxFrameNumPlus1 = (1 << pCtx->pSps->uiLog2MaxFrameNum); + int32_t i; + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + SLogContext* pLogCtx = & (pCtx->sLogCtx); + + for (i = 0; i < LONG_TERM_REF_NUM; i++) { + if (pLongRefList[i] != NULL) { + if (CompareFrameNum (pLongRefList[i]->iFrameNum , pLtr->iLastCorFrameNumDec, iMaxFrameNumPlus1) == FRAME_NUM_BIGGER + && (CompareFrameNum (pLongRefList[i]->iFrameNum , pLtr->iCurFrameNumInDec, + iMaxFrameNumPlus1) & (FRAME_NUM_EQUAL | FRAME_NUM_SMALLER))) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "LTR ,invalid LTR delete ,long_term_idx = %d , iFrameNum =%d ", + pLongRefList[i]->iLongTermPicNum, pLongRefList[i]->iFrameNum); + pLongRefList[i]->SetUnref(); + DeleteLTRFromLongList (pCtx, i); + pLtr->bLTRMarkEnable = true; + if (pRefList->uiLongRefCount == 0) { + pParamInternal->bEncCurFrmAsIdrFlag = true; + } + } else if (CompareFrameNum (pLongRefList[i]->iMarkFrameNum , pLtr->iLastCorFrameNumDec , + iMaxFrameNumPlus1) == FRAME_NUM_BIGGER + && (CompareFrameNum (pLongRefList[i]->iMarkFrameNum, pLtr->iCurFrameNumInDec , + iMaxFrameNumPlus1) & (FRAME_NUM_EQUAL | FRAME_NUM_SMALLER)) + && pLtr->iLTRMarkMode == LTR_DELAY_MARK) { + WelsLog (pLogCtx, WELS_LOG_WARNING, "LTR ,iMarkFrameNum invalid LTR delete ,long_term_idx = %d , iFrameNum =%d ", + pLongRefList[i]->iLongTermPicNum, pLongRefList[i]->iFrameNum); + pLongRefList[i]->SetUnref(); + DeleteLTRFromLongList (pCtx, i); + pLtr->bLTRMarkEnable = true; + if (pRefList->uiLongRefCount == 0) { + pParamInternal->bEncCurFrmAsIdrFlag = true; + } + } + } + } + +} +/* +* handle LTR Mark feedback message +*/ +static inline void HandleLTRMarkFeedback (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SPicture** pLongRefList = pRefList->pLongRefList; + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + int32_t i, j; + + if (pLtr->uiLtrMarkState == LTR_MARKING_SUCCESS) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "pLtr->uiLtrMarkState = %d, pLtr.iCurLtrIdx = %d , pLtr->iLtrMarkFbFrameNum = %d ,pCtx->iFrameNum = %d ", + pLtr->uiLtrMarkState, pLtr->iCurLtrIdx, pLtr->iLtrMarkFbFrameNum, pParamInternal->iFrameNum); + for (i = 0; i < pRefList->uiLongRefCount; i++) { + if (pLongRefList[i]->iFrameNum == pLtr->iLtrMarkFbFrameNum && pLongRefList[i]->uiRecieveConfirmed != RECIEVE_SUCCESS) { + + pLongRefList[i]->uiRecieveConfirmed = RECIEVE_SUCCESS; + pCtx->pVaa->uiValidLongTermPicIdx = pLongRefList[i]->iLongTermPicNum; + + pLtr->iCurFrameNumInDec = + pLtr->iLastRecoverFrameNum = + pLtr->iLastCorFrameNumDec = pLtr->iLtrMarkFbFrameNum; + + for (j = 0; j < pRefList->uiLongRefCount; j++) { + if (pLongRefList[j]->iLongTermPicNum != pLtr->iCurLtrIdx) { + pLongRefList[j]->SetUnref(); + DeleteLTRFromLongList (pCtx, j); + } + } + + pLtr->iLTRMarkSuccessNum++; + pLtr->iCurLtrIdx = (pLtr->iCurLtrIdx + 1) % LONG_TERM_REF_NUM; + pLtr->iLTRMarkMode = (pLtr->iLTRMarkSuccessNum >= (LONG_TERM_REF_NUM)) ? (LTR_DELAY_MARK) : (LTR_DIRECT_MARK); + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "LTR mark mode =%d", pLtr->iLTRMarkMode); + pLtr->bLTRMarkEnable = true; + break; + } + } + pLtr->uiLtrMarkState = NO_LTR_MARKING_FEEDBACK; + } else if (pLtr->uiLtrMarkState == LTR_MARKING_FAILED) { + for (i = 0; i < pRefList->uiLongRefCount; i++) { + if (pLongRefList[i]->iFrameNum == pLtr->iLtrMarkFbFrameNum) { + pLongRefList[i]->SetUnref(); + DeleteLTRFromLongList (pCtx, i); + break; + } + } + pLtr->uiLtrMarkState = NO_LTR_MARKING_FEEDBACK; + pLtr->bLTRMarkEnable = true; + + if (pLtr->iLTRMarkSuccessNum == 0) { + pParamInternal->bEncCurFrmAsIdrFlag = true; // no LTR , means IDR recieve failed, force next frame IDR + } + } +} +/* + * LTR mark process + */ +static inline void LTRMarkProcess (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SPicture** pLongRefList = pRefList->pLongRefList; + SPicture** pShortRefList = pRefList->pShortRefList; + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + int32_t iGoPFrameNumInterval = ((pCtx->pSvcParam->uiGopSize >> 1) > 1) ? (pCtx->pSvcParam->uiGopSize >> 1) : (1); + int32_t iMaxFrameNumPlus1 = (1 << pCtx->pSps->uiLog2MaxFrameNum); + int32_t i = 0; + int32_t j = 0; + bool bMoveLtrFromShortToLong = false; + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + + if (pCtx->eSliceType == I_SLICE) { + i = 0; + pShortRefList[i]->uiRecieveConfirmed = RECIEVE_SUCCESS; + } else if (pLtr->bLTRMarkingFlag) { + pCtx->pVaa->uiMarkLongTermPicIdx = pLtr->iCurLtrIdx; + + if (pLtr->iLTRMarkMode == LTR_DELAY_MARK) { + for (i = 0; i < pRefList->uiShortRefCount; i++) { + if (CompareFrameNum (pParamInternal->iFrameNum, pShortRefList[i]->iFrameNum + iGoPFrameNumInterval, + iMaxFrameNumPlus1) == FRAME_NUM_EQUAL) { + break; + } + } + } + } + + if (pCtx->eSliceType == I_SLICE || pLtr->bLTRMarkingFlag) { + pShortRefList[i]->bIsLongRef = true; + pShortRefList[i]->iLongTermPicNum = pLtr->iCurLtrIdx; + pShortRefList[i]->iMarkFrameNum = pParamInternal->iFrameNum; + } + + // delay one gop to move LTR from int16_t list to int32_t list + if (pLtr->iLTRMarkMode == LTR_DIRECT_MARK && pCtx->eSliceType != I_SLICE && !pLtr->bLTRMarkingFlag) { + for (j = 0; j < pRefList->uiShortRefCount; j++) { + if (pRefList->pShortRefList[j]->bIsLongRef) { + i = j; + bMoveLtrFromShortToLong = true; + break; + } + } + } + + if ((pLtr->iLTRMarkMode == LTR_DELAY_MARK && pLtr->bLTRMarkingFlag) + || ((pLtr->iLTRMarkMode == LTR_DIRECT_MARK) && (bMoveLtrFromShortToLong))) { + pCtx->bRefOfCurTidIsLtr[pCtx->uiDependencyId][pCtx->uiTemporalId] = true; + + if (pRefList->uiLongRefCount > 0) { + memmove (&pRefList->pLongRefList[1], &pRefList->pLongRefList[0], + pRefList->uiLongRefCount * sizeof (SPicture*)); // confirmed_safe_unsafe_usage + } + pLongRefList[0] = pShortRefList[i]; + pRefList->uiLongRefCount++; + if (pRefList->uiLongRefCount > pCtx->pSvcParam->iLTRRefNum) { + pRefList->pLongRefList[pRefList->uiLongRefCount - 1]->SetUnref(); + DeleteLTRFromLongList (pCtx, pRefList->uiLongRefCount - 1); + } + DeleteSTRFromShortList (pCtx, i); + } +} + +static inline void LTRMarkProcessScreen (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SPicture** pLongRefList = pRefList->pLongRefList; + int32_t iLtrIdx = pCtx->pDecPic->iLongTermPicNum; + pCtx->pVaa->uiMarkLongTermPicIdx = pCtx->pDecPic->iLongTermPicNum; + + assert (CheckInRangeCloseOpen (iLtrIdx, 0, MAX_REF_PIC_COUNT)); + if (pLongRefList[iLtrIdx] != NULL) { + pLongRefList[iLtrIdx]->SetUnref(); + } else { + pRefList->uiLongRefCount++; + } + pLongRefList[iLtrIdx] = pCtx->pDecPic; +} + +static void PrefetchNextBuffer (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + const int32_t kiNumRef = pCtx->pSvcParam->iNumRefFrame; + int32_t i; + + pRefList->pNextBuffer = NULL; + for (i = 0; i < kiNumRef + 1; i++) { + if (!pRefList->pRef[i]->bUsedAsRef) { + pRefList->pNextBuffer = pRefList->pRef[i]; + break; + } + } + + if (pRefList->pNextBuffer == NULL && pRefList->uiShortRefCount > 0) { + pRefList->pNextBuffer = pRefList->pShortRefList[pRefList->uiShortRefCount - 1]; + pRefList->pNextBuffer->SetUnref(); + } + + pCtx->pDecPic = pRefList->pNextBuffer; +} + +/* + * update reference picture list + */ +bool WelsUpdateRefList (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + SSpatialLayerInternal* pParamD = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + + int32_t iRefIdx = 0; + const uint8_t kuiTid = pCtx->uiTemporalId; + const uint8_t kuiDid = pCtx->uiDependencyId; + const EWelsSliceType keSliceType = pCtx->eSliceType; + uint32_t i = 0; + // Need update pRef list in case store base layer or target dependency layer construction + if (NULL == pCtx->pCurDqLayer) + return false; + + if (NULL == pRefList || NULL == pRefList->pRef[0]) + return false; + + if (NULL != pCtx->pDecPic) { +#if !defined(ENABLE_FRAME_DUMP) // to save complexity, 1/6/2009 + if ((pParamD->iHighestTemporalId == 0) || (kuiTid < pParamD->iHighestTemporalId)) +#endif// !ENABLE_FRAME_DUMP + // Expanding picture for future reference + ExpandReferencingPicture (pCtx->pDecPic->pData, pCtx->pDecPic->iWidthInPixel, pCtx->pDecPic->iHeightInPixel, + pCtx->pDecPic->iLineSize, + pCtx->pFuncList->sExpandPicFunc.pfExpandLumaPicture, pCtx->pFuncList->sExpandPicFunc.pfExpandChromaPicture); + + // move picture in list + pCtx->pDecPic->uiTemporalId = kuiTid; + pCtx->pDecPic->uiSpatialId = kuiDid; + pCtx->pDecPic->iFrameNum = pParamD->iFrameNum; + pCtx->pDecPic->iFramePoc = pParamD->iPOC; + pCtx->pDecPic->uiRecieveConfirmed = RECIEVE_UNKOWN; + pCtx->pDecPic->bUsedAsRef = true; + + for (iRefIdx = pRefList->uiShortRefCount - 1; iRefIdx >= 0; --iRefIdx) { + pRefList->pShortRefList[iRefIdx + 1] = pRefList->pShortRefList[iRefIdx]; + } + pRefList->pShortRefList[0] = pCtx->pDecPic; + pRefList->uiShortRefCount++; + } + + if (keSliceType == P_SLICE) { + if (pCtx->uiTemporalId == 0) { + if (pCtx->pSvcParam->bEnableLongTermReference) { + LTRMarkProcess (pCtx); + DeleteInvalidLTR (pCtx); + HandleLTRMarkFeedback (pCtx); + + pLtr->bReceivedT0LostFlag = false; // reset to false due to the recovery is finished + pLtr->bLTRMarkingFlag = false; + ++pLtr->uiLtrMarkInterval; + } + + for (i = pRefList->uiShortRefCount - 1; i > 0; i--) { + pRefList->pShortRefList[i]->SetUnref(); + DeleteSTRFromShortList (pCtx, i); + } + if (pRefList->uiShortRefCount > 0 && (pRefList->pShortRefList[0]->uiTemporalId > 0 + || pRefList->pShortRefList[0]->iFrameNum != pParamD->iFrameNum)) { + pRefList->pShortRefList[0]->SetUnref(); + DeleteSTRFromShortList (pCtx, 0); + } + } + } else { // in case IDR currently coding + if (pCtx->pSvcParam->bEnableLongTermReference) { + LTRMarkProcess (pCtx); + + pLtr->iCurLtrIdx = (pLtr->iCurLtrIdx + 1) % LONG_TERM_REF_NUM; + pLtr->iLTRMarkSuccessNum = 1; //IDR default suceess + pLtr->bLTRMarkEnable = true; + pLtr->uiLtrMarkInterval = 0; + + pCtx->pVaa->uiValidLongTermPicIdx = 0; + pCtx->pVaa->uiMarkLongTermPicIdx = 0; + } + } + pCtx->pReferenceStrategy->EndofUpdateRefList(); + return true; +} + +bool CheckCurMarkFrameNumUsed (sWelsEncCtx* pCtx) { + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SPicture** pLongRefList = pRefList->pLongRefList; + int32_t iGoPFrameNumInterval = ((pCtx->pSvcParam->uiGopSize >> 1) > 1) ? (pCtx->pSvcParam->uiGopSize >> 1) : (1); + int32_t iMaxFrameNumPlus1 = (1 << pCtx->pSps->uiLog2MaxFrameNum); + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + int32_t i; + + for (i = 0; i < pRefList->uiLongRefCount; i++) { + if ((pParamInternal->iFrameNum == pLongRefList[i]->iFrameNum && pLtr->iLTRMarkMode == LTR_DIRECT_MARK) || + (CompareFrameNum (pParamInternal->iFrameNum + iGoPFrameNumInterval, pLongRefList[i]->iFrameNum, + iMaxFrameNumPlus1) == FRAME_NUM_EQUAL && pLtr->iLTRMarkMode == LTR_DELAY_MARK)) { + return false; + } + } + + return true; +} + +static inline void WelsMarkMMCORefInfoWithBase (SSlice** ppSliceList, + SSlice* pBaseSlice, + const int32_t kiCountSliceNum) { + int32_t iSliceIdx = 0; + SSliceHeaderExt* pSliceHdrExt = NULL; + SSliceHeaderExt* pBaseSHExt = &pBaseSlice->sSliceHeaderExt; + + for (iSliceIdx = 0; iSliceIdx < kiCountSliceNum; iSliceIdx++) { + pSliceHdrExt = &ppSliceList[iSliceIdx]->sSliceHeaderExt; + memcpy (&pSliceHdrExt->sSliceHeader.sRefMarking, &pBaseSHExt->sSliceHeader.sRefMarking, sizeof (SRefPicMarking)); + } +} + +void WelsMarkMMCORefInfo (sWelsEncCtx* pCtx, SLTRState* pLtr, + SSlice** ppSliceList, const int32_t kiCountSliceNum) { + SSlice* pBaseSlice = ppSliceList[0]; + SRefPicMarking* pRefPicMark = &pBaseSlice->sSliceHeaderExt.sSliceHeader.sRefMarking; + int32_t iGoPFrameNumInterval = ((pCtx->pSvcParam->uiGopSize >> 1) > 1) ? (pCtx->pSvcParam->uiGopSize >> 1) : (1); + + memset (pRefPicMark, 0, sizeof (SRefPicMarking)); + + if (pCtx->pSvcParam->bEnableLongTermReference && pLtr->bLTRMarkingFlag) { + if (pLtr->iLTRMarkMode == LTR_DIRECT_MARK) { + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount].iMaxLongTermFrameIdx = LONG_TERM_REF_NUM - 1; + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount++].iMmcoType = MMCO_SET_MAX_LONG; + + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount].iDiffOfPicNum = iGoPFrameNumInterval; + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount++].iMmcoType = MMCO_SHORT2UNUSED; + + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount].iLongTermFrameIdx = pLtr->iCurLtrIdx; + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount++].iMmcoType = MMCO_LONG; + } else if (pLtr->iLTRMarkMode == LTR_DELAY_MARK) { + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount].iDiffOfPicNum = iGoPFrameNumInterval; + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount].iLongTermFrameIdx = pLtr->iCurLtrIdx; + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount++].iMmcoType = MMCO_SHORT2LONG; + } + } + + WelsMarkMMCORefInfoWithBase (ppSliceList, pBaseSlice, kiCountSliceNum); +} + +void WelsMarkPic (sWelsEncCtx* pCtx) { + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + const int32_t kiCountSliceNum = pCtx->pCurDqLayer->iMaxSliceNum; + + if (pCtx->pSvcParam->bEnableLongTermReference && pLtr->bLTRMarkEnable && pCtx->uiTemporalId == 0) { + if (!pLtr->bReceivedT0LostFlag && pLtr->uiLtrMarkInterval > pCtx->pSvcParam->iLtrMarkPeriod + && CheckCurMarkFrameNumUsed (pCtx)) { + pLtr->bLTRMarkingFlag = true; + pLtr->bLTRMarkEnable = false; + pLtr->uiLtrMarkInterval = 0; + for (int32_t i = 0 ; i < MAX_TEMPORAL_LAYER_NUM; ++i) { + if (pCtx->uiTemporalId < i || pCtx->uiTemporalId == 0) { + pLtr->iLastLtrIdx[i] = pLtr->iCurLtrIdx; + } + } + } else { + pLtr->bLTRMarkingFlag = false; + } + } + + WelsMarkMMCORefInfo (pCtx, pLtr, pCtx->pCurDqLayer->ppSliceInLayer, kiCountSliceNum); +} + +int32_t FilterLTRRecoveryRequest (sWelsEncCtx* pCtx, SLTRRecoverRequest* pLTRRecoverRequest) { + //if disable LTR, force IDR + if (!pCtx->pSvcParam->bEnableLongTermReference) { + for (int32_t iDid = 0; iDid < pCtx->pSvcParam->iSpatialLayerNum; iDid++) { + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[iDid]; + pParamInternal->bEncCurFrmAsIdrFlag = true; + } + } else { + SLTRRecoverRequest* pRequest = pLTRRecoverRequest; + int32_t iLayerId = pLTRRecoverRequest->iLayerId; + if ((iLayerId < 0) || (iLayerId >= pCtx->pSvcParam->iSpatialLayerNum)) + return false; + + SLTRState* pLtr = &pCtx->pLtr[iLayerId]; + int32_t iMaxFrameNumPlus1 = (1 << pCtx->pSps->uiLog2MaxFrameNum); + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[iLayerId]; + if (pRequest->uiFeedbackType == LTR_RECOVERY_REQUEST && pRequest->uiIDRPicId == pParamInternal->uiIdrPicId) { + if (pRequest->iLastCorrectFrameNum == -1) { + pParamInternal->bEncCurFrmAsIdrFlag = true; + return true; + } else if (pRequest->iCurrentFrameNum == -1) { + pLtr->bReceivedT0LostFlag = true; + return true; + } else if ((CompareFrameNum (pLtr->iLastRecoverFrameNum , pRequest->iLastCorrectFrameNum, + iMaxFrameNumPlus1) & (FRAME_NUM_EQUAL | FRAME_NUM_SMALLER)) // t0 lost + || ((CompareFrameNum (pLtr->iLastRecoverFrameNum , pRequest->iCurrentFrameNum, + iMaxFrameNumPlus1) & (FRAME_NUM_EQUAL | FRAME_NUM_SMALLER)) && + CompareFrameNum (pLtr->iLastRecoverFrameNum , pRequest->iLastCorrectFrameNum, + iMaxFrameNumPlus1) == FRAME_NUM_BIGGER)) { // recovery failed + + pLtr->bReceivedT0LostFlag = true; + pLtr->iLastCorFrameNumDec = pRequest->iLastCorrectFrameNum; + pLtr->iCurFrameNumInDec = pRequest->iCurrentFrameNum; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "Receive valid LTR recovery pRequest,feedback_type = %d ,uiIdrPicId = %d , current_frame_num = %d , last correct frame num = %d" + , pRequest->uiFeedbackType, pRequest->uiIDRPicId, pRequest->iCurrentFrameNum, pRequest->iLastCorrectFrameNum); + } + + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "Receive LTR recovery pRequest,feedback_type = %d ,uiIdrPicId = %d , current_frame_num = %d , last correct frame num = %d" + , pRequest->uiFeedbackType, pRequest->uiIDRPicId, pRequest->iCurrentFrameNum, pRequest->iLastCorrectFrameNum); + } + } + + return true; +} +void FilterLTRMarkingFeedback (sWelsEncCtx* pCtx, SLTRMarkingFeedback* pLTRMarkingFeedback) { + int32_t iLayerId = pLTRMarkingFeedback->iLayerId; + if ((iLayerId < 0) || (iLayerId >= pCtx->pSvcParam->iSpatialLayerNum)) { + return; + } + SLTRState* pLtr = &pCtx->pLtr[iLayerId]; + assert (pLTRMarkingFeedback); + if (pCtx->pSvcParam->bEnableLongTermReference) { + SSpatialLayerInternal* pParamInternal = &pCtx->pSvcParam->sDependencyLayers[iLayerId]; + if (pLTRMarkingFeedback->uiIDRPicId == pParamInternal->uiIdrPicId + && (pLTRMarkingFeedback->uiFeedbackType == LTR_MARKING_SUCCESS + || pLTRMarkingFeedback->uiFeedbackType == LTR_MARKING_FAILED)) { // avoid error pData + pLtr->uiLtrMarkState = pLTRMarkingFeedback->uiFeedbackType; + pLtr->iLtrMarkFbFrameNum = pLTRMarkingFeedback->iLTRFrameNum ; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "Receive valid LTR marking feedback, feedback_type = %d , uiIdrPicId = %d , LTR_frame_num = %d , cur_idr_pic_id = %d", + pLTRMarkingFeedback->uiFeedbackType, pLTRMarkingFeedback->uiIDRPicId, pLTRMarkingFeedback->iLTRFrameNum , + pParamInternal->uiIdrPicId); + + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "Receive LTR marking feedback, feedback_type = %d , uiIdrPicId = %d , LTR_frame_num = %d , cur_idr_pic_id = %d", + pLTRMarkingFeedback->uiFeedbackType, pLTRMarkingFeedback->uiIDRPicId, pLTRMarkingFeedback->iLTRFrameNum , + pParamInternal->uiIdrPicId); + } + } +} + +/* + * build reference picture list + */ +bool WelsBuildRefList (sWelsEncCtx* pCtx, const int32_t iPOC, int32_t iBestLtrRefIdx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + const int32_t kiNumRef = pCtx->pSvcParam->iNumRefFrame; + const uint8_t kuiTid = pCtx->uiTemporalId; + uint32_t i = 0; + SSpatialLayerInternal* pParamD = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + // to support any type of cur_dq->mgs_control + // [ 0: using current layer to do ME/MC; + // -1: using store base layer to do ME/MC; + // 2: using highest layer to do ME/MC; ] + + // build reference list 0/1 if applicable + + pCtx->iNumRef0 = 0; + if (pCtx->eSliceType != I_SLICE) { + if (pCtx->pSvcParam->bEnableLongTermReference && pLtr->bReceivedT0LostFlag && pCtx->uiTemporalId == 0) { + for (i = 0; i < pRefList->uiLongRefCount; i++) { + if (pRefList->pLongRefList[i]->uiRecieveConfirmed == RECIEVE_SUCCESS) { + pCtx->pCurDqLayer->pRefOri[pCtx->iNumRef0] = pRefList->pLongRefList[i]; + pCtx->pRefList0[pCtx->iNumRef0++] = pRefList->pLongRefList[i]; + pLtr->iLastRecoverFrameNum = pParamD->iFrameNum; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, + "pRef is int32_t !iLastRecoverFrameNum = %d, pRef iFrameNum = %d,LTR number = %d,", + pLtr->iLastRecoverFrameNum, pCtx->pRefList0[0]->iFrameNum, pRefList->uiLongRefCount); + break; + } + } + } else { + for (i = 0; i < pRefList->uiShortRefCount; ++ i) { + SPicture* pRef = pRefList->pShortRefList[i]; + if (pRef != NULL && pRef->bUsedAsRef && pRef->iFramePoc >= 0 && pRef->uiTemporalId <= kuiTid) { + pCtx->pCurDqLayer->pRefOri[pCtx->iNumRef0] = pRef; + pCtx->pRefList0[pCtx->iNumRef0++] = pRef; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DETAIL, + "WelsBuildRefList pCtx->uiTemporalId = %d,pRef->iFrameNum = %d,pRef->uiTemporalId = %d", + pCtx->uiTemporalId, pRef->iFrameNum, pRef->uiTemporalId); + } + } + } + } else { // safe for IDR + WelsResetRefList (pCtx); //for IDR, SHOULD reset pRef list. + ResetLtrState (&pCtx->pLtr[pCtx->uiDependencyId]); //SHOULD update it when IDR. + for (int32_t k = 0; k < MAX_TEMPORAL_LEVEL; k++) { + pCtx->bRefOfCurTidIsLtr[pCtx->uiDependencyId][k] = false; + } + pCtx->pRefList0[0] = NULL; + } + + if (pCtx->iNumRef0 > kiNumRef) + pCtx->iNumRef0 = kiNumRef; + return (pCtx->iNumRef0 > 0 || pCtx->eSliceType == I_SLICE) ? (true) : (false); +} + +static void UpdateBlockStatic (sWelsEncCtx* pCtx) { + SVAAFrameInfoExt* pVaaExt = static_cast (pCtx->pVaa); + assert (pCtx->iNumRef0 == 1); //multi-ref is not support yet? + for (int32_t idx = 0; idx < pCtx->iNumRef0; idx++) { + //TODO: we need to re-factor the source picture storage first, + //and then use original frame of the ref to do this calculation for better vaa algo implementation + SPicture* pRef = pCtx->pRefList0[idx]; + if (pVaaExt->iVaaBestRefFrameNum != pRef->iFrameNum) { + //re-do the calculation + pCtx->pVpp->UpdateBlockIdcForScreen (pVaaExt->pVaaBestBlockStaticIdc, pRef, pCtx->pEncPic); + } + } +} + +void WelsUpdateSliceHeaderSyntax (sWelsEncCtx* pCtx, const int32_t iAbsDiffPicNumMinus1, + SSlice** ppSliceList, const int32_t uiFrameType) { + const int32_t kiCountSliceNum = pCtx->pCurDqLayer->iMaxSliceNum; + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + int32_t iIdx = 0; + + assert (kiCountSliceNum > 0); + + for (iIdx = 0; iIdx < kiCountSliceNum; iIdx++) { + SSliceHeaderExt* pSliceHdrExt = &ppSliceList[iIdx]->sSliceHeaderExt; + SSliceHeader* pSliceHdr = &pSliceHdrExt->sSliceHeader; + SRefPicListReorderSyntax* pRefReorder = &pSliceHdr->sRefReordering; + SRefPicMarking* pRefPicMark = &pSliceHdr->sRefMarking; + + /*syntax for num_ref_idx_l0_active_minus1*/ + pSliceHdr->uiRefCount = pCtx->iNumRef0; + if (pCtx->iNumRef0 > 0) { + if ((!pCtx->pRefList0[0]->bIsLongRef) || (!pCtx->pSvcParam->bEnableLongTermReference)) { + pRefReorder->SReorderingSyntax[0].uiReorderingOfPicNumsIdc = 0; + pRefReorder->SReorderingSyntax[0].uiAbsDiffPicNumMinus1 = iAbsDiffPicNumMinus1; + pRefReorder->SReorderingSyntax[1].uiReorderingOfPicNumsIdc = 3; + } else { + int32_t iRefIdx = 0; + for (iRefIdx = 0; iRefIdx < pCtx->iNumRef0; iRefIdx++) { + pRefReorder->SReorderingSyntax[iRefIdx].uiReorderingOfPicNumsIdc = 2; + pRefReorder->SReorderingSyntax[iRefIdx].iLongTermPicNum = pCtx->pRefList0[iRefIdx]->iLongTermPicNum; + } + pRefReorder->SReorderingSyntax[iRefIdx].uiReorderingOfPicNumsIdc = 3; + } + } + + /*syntax for dec_ref_pic_marking()*/ + if (videoFrameTypeIDR == uiFrameType) { + pRefPicMark->bNoOutputOfPriorPicsFlag = false; + pRefPicMark->bLongTermRefFlag = pCtx->pSvcParam->bEnableLongTermReference; + } else { + if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) + pRefPicMark->bAdaptiveRefPicMarkingModeFlag = pCtx->pSvcParam->bEnableLongTermReference; + else + pRefPicMark->bAdaptiveRefPicMarkingModeFlag = (pCtx->pSvcParam->bEnableLongTermReference + && pLtr->bLTRMarkingFlag) ? (true) : (false); + } + } +} + +/* + * update syntax for reference base related + */ +void WelsUpdateRefSyntax (sWelsEncCtx* pCtx, const int32_t iPOC, const int32_t uiFrameType) { + int32_t iAbsDiffPicNumMinus1 = -1; + SSpatialLayerInternal* pParamD = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + /*syntax for ref_pic_list_reordering()*/ + if (pCtx->iNumRef0 > 0) { + iAbsDiffPicNumMinus1 = pParamD->iFrameNum - (pCtx->pRefList0[0]->iFrameNum) - 1; + + if (iAbsDiffPicNumMinus1 < 0) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "WelsUpdateRefSyntax():::uiAbsDiffPicNumMinus1:%d", iAbsDiffPicNumMinus1); + iAbsDiffPicNumMinus1 += (1 << (pCtx->pSps->uiLog2MaxFrameNum)); + WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "WelsUpdateRefSyntax():::uiAbsDiffPicNumMinus1< 0, update as:%d", + iAbsDiffPicNumMinus1); + } + } + + WelsUpdateSliceHeaderSyntax (pCtx, iAbsDiffPicNumMinus1, pCtx->pCurDqLayer->ppSliceInLayer, uiFrameType); +} + +static inline void UpdateOriginalPicInfo (SPicture* pOrigPic, SPicture* pReconPic) { + if (!pOrigPic) + return; + + pOrigPic->iPictureType = pReconPic->iPictureType; + pOrigPic->iFramePoc = pReconPic->iFramePoc; + pOrigPic->iFrameNum = pReconPic->iFrameNum; + pOrigPic->uiSpatialId = pReconPic->uiSpatialId; + pOrigPic->uiTemporalId = pReconPic->uiTemporalId; + pOrigPic->iLongTermPicNum = pReconPic->iLongTermPicNum; + pOrigPic->bUsedAsRef = pReconPic->bUsedAsRef; + pOrigPic->bIsLongRef = pReconPic->bIsLongRef; + pOrigPic->bIsSceneLTR = pReconPic->bIsSceneLTR; + pOrigPic->iFrameAverageQp = pReconPic->iFrameAverageQp; +} + +static void UpdateSrcPicListLosslessScreenRefSelectionWithLtr (sWelsEncCtx* pCtx) { + int32_t iDIdx = pCtx->uiDependencyId; + //update info in src list + UpdateOriginalPicInfo (pCtx->pEncPic, pCtx->pDecPic); + PrefetchNextBuffer (pCtx); + pCtx->pVpp->UpdateSrcListLosslessScreenRefSelectionWithLtr (pCtx->pEncPic, iDIdx, pCtx->pVaa->uiMarkLongTermPicIdx, + pCtx->ppRefPicListExt[iDIdx]->pLongRefList); +} + +static void UpdateSrcPicList (sWelsEncCtx* pCtx) { + int32_t iDIdx = pCtx->uiDependencyId; + //update info in src list + UpdateOriginalPicInfo (pCtx->pEncPic, pCtx->pDecPic); + PrefetchNextBuffer (pCtx); + pCtx->pVpp->UpdateSrcList (pCtx->pEncPic, iDIdx, pCtx->ppRefPicListExt[iDIdx]->pShortRefList, + pCtx->ppRefPicListExt[iDIdx]->uiShortRefCount); +} + +bool WelsUpdateRefListScreen (sWelsEncCtx* pCtx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + SSpatialLayerInternal* pParamD = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + const uint8_t kuiTid = pCtx->uiTemporalId; + // Need update ref list in case store base layer or target dependency layer construction + if (NULL == pCtx->pCurDqLayer) + return false; + + if (NULL == pRefList || NULL == pRefList->pRef[0]) + return false; + + if (NULL != pCtx->pDecPic) { +#if !defined(ENABLE_FRAME_DUMP) // to save complexity, 1/6/2009 + if ((pParamD->iHighestTemporalId == 0) || (kuiTid < pParamD->iHighestTemporalId)) +#endif// !ENABLE_FRAME_DUMP + // Expanding picture for future reference + ExpandReferencingPicture (pCtx->pDecPic->pData, pCtx->pDecPic->iWidthInPixel, pCtx->pDecPic->iHeightInPixel, + pCtx->pDecPic->iLineSize, + pCtx->pFuncList->sExpandPicFunc.pfExpandLumaPicture, pCtx->pFuncList->sExpandPicFunc.pfExpandChromaPicture); + + // move picture in list + pCtx->pDecPic->uiTemporalId = pCtx->uiTemporalId; + pCtx->pDecPic->uiSpatialId = pCtx->uiDependencyId; + pCtx->pDecPic->iFrameNum = pParamD->iFrameNum; + pCtx->pDecPic->iFramePoc = pParamD->iPOC; + pCtx->pDecPic->bUsedAsRef = true; + pCtx->pDecPic->bIsLongRef = true; + pCtx->pDecPic->bIsSceneLTR = pLtr->bLTRMarkingFlag || (pCtx->pSvcParam->bEnableLongTermReference + && pCtx->eSliceType == I_SLICE); + pCtx->pDecPic->iLongTermPicNum = pLtr->iCurLtrIdx; + } + if (pCtx->eSliceType == P_SLICE) { + DeleteNonSceneLTR (pCtx); + LTRMarkProcessScreen (pCtx); + pLtr->bLTRMarkingFlag = false; + ++pLtr->uiLtrMarkInterval; + } else { // in case IDR currently coding + LTRMarkProcessScreen (pCtx); + pLtr->iCurLtrIdx = 1; + pLtr->iSceneLtrIdx = 1; + pLtr->uiLtrMarkInterval = 0; + pCtx->pVaa->uiValidLongTermPicIdx = 0; + } + + pCtx->pReferenceStrategy->EndofUpdateRefList(); + return true; +} + +bool WelsBuildRefListScreen (sWelsEncCtx* pCtx, const int32_t iPOC, int32_t iBestLtrRefIdx) { + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SWelsSvcCodingParam* pParam = pCtx->pSvcParam; + SVAAFrameInfoExt* pVaaExt = static_cast (pCtx->pVaa); + const int32_t iNumRef = pParam->iNumRefFrame; + SSpatialLayerInternal* pParamD = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + pCtx->iNumRef0 = 0; + + if (pCtx->eSliceType != I_SLICE) { + int iLtrRefIdx = 0; + SPicture* pRefOri = NULL; + for (int idx = 0; idx < pVaaExt->iNumOfAvailableRef; idx++) { + iLtrRefIdx = pCtx->pVpp->GetRefFrameInfo (idx, pCtx->bCurFrameMarkedAsSceneLtr, pRefOri); + if (iLtrRefIdx >= 0 && iLtrRefIdx <= pParam->iLTRRefNum) { + SPicture* pRefPic = pRefList->pLongRefList[iLtrRefIdx]; + if (pRefPic != NULL && pRefPic->bUsedAsRef && pRefPic->bIsLongRef) { + if (pRefPic->uiTemporalId <= pCtx->uiTemporalId && (!pCtx->bCurFrameMarkedAsSceneLtr || pRefPic->bIsSceneLTR)) { + pCtx->pCurDqLayer->pRefOri[pCtx->iNumRef0] = pRefOri; + pCtx->pRefList0[pCtx->iNumRef0++] = pRefPic; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "WelsBuildRefListScreen(), current iFrameNum = %d, current Tid = %d, ref iFrameNum = %d, ref uiTemporalId = %d, ref is Scene LTR = %d, LTR count = %d,iNumRef = %d", + pParamD->iFrameNum, pCtx->uiTemporalId, + pRefPic->iFrameNum, pRefPic->uiTemporalId, pRefPic->bIsSceneLTR, + pRefList->uiLongRefCount, iNumRef); + } + } + } else { + for (int32_t i = iNumRef ; i >= 0 ; --i) { + if (pRefList->pLongRefList[i] == NULL) { + continue; + } else if (pRefList->pLongRefList[i]->uiTemporalId == 0 + || pRefList->pLongRefList[i]->uiTemporalId < pCtx->uiTemporalId) { + pCtx->pCurDqLayer->pRefOri[pCtx->iNumRef0] = pRefOri; + pCtx->pRefList0[pCtx->iNumRef0++] = pRefList->pLongRefList[i]; + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "WelsBuildRefListScreen(), ref !current iFrameNum = %d, ref iFrameNum = %d,LTR number = %d", + pParamD->iFrameNum, pCtx->pRefList0[pCtx->iNumRef0 - 1]->iFrameNum, pRefList->uiLongRefCount); + break; + } + } + } + } // end of (int idx = 0; idx < pVaaExt->iNumOfAvailableRef; idx++) + + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "WelsBuildRefListScreen(), CurrentFramePoc=%d, isLTR=%d", iPOC, pCtx->bCurFrameMarkedAsSceneLtr); + for (int j = 0; j < iNumRef; j++) { + SPicture* pARefPicture = pRefList->pLongRefList[j]; + if (pARefPicture != NULL) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "WelsBuildRefListScreen()\tRefLot[%d]: iPoc=%d, iPictureType=%d, bUsedAsRef=%d, bIsLongRef=%d, bIsSceneLTR=%d, uiTemporalId=%d, iFrameNum=%d, iMarkFrameNum=%d, iLongTermPicNum=%d, uiRecieveConfirmed=%d", + j, + pARefPicture->iFramePoc, + pARefPicture->iPictureType, + pARefPicture->bUsedAsRef, + pARefPicture->bIsLongRef, + pARefPicture->bIsSceneLTR, + pARefPicture->uiTemporalId, + pARefPicture->iFrameNum, + pARefPicture->iMarkFrameNum, + pARefPicture->iLongTermPicNum, + pARefPicture->uiRecieveConfirmed); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "WelsBuildRefListScreen()\tRefLot[%d]: NULL", j); + } + } + } else { + // dealing with IDR + WelsResetRefList (pCtx); //for IDR, SHOULD reset pRef list. + ResetLtrState (&pCtx->pLtr[pCtx->uiDependencyId]); //SHOULD update it when IDR. + pCtx->pRefList0[0] = NULL; + } + if (pCtx->iNumRef0 > iNumRef) { + pCtx->iNumRef0 = iNumRef; + } + + return (pCtx->iNumRef0 > 0 || pCtx->eSliceType == I_SLICE) ? (true) : (false); +} + +static inline bool IsValidFrameNum (const int32_t kiFrameNum) { + return (kiFrameNum < (1 << 30)); // TODO: use the original judge first, may be improved +} + +void WelsMarkMMCORefInfoScreen (sWelsEncCtx* pCtx, SLTRState* pLtr, + SSlice** ppSliceList, const int32_t kiCountSliceNum) { + SSlice* pBaseSlice = ppSliceList[0]; + SRefPicMarking* pRefPicMark = &pBaseSlice->sSliceHeaderExt.sSliceHeader.sRefMarking; + const int32_t iMaxLtrIdx = pCtx->pSvcParam->iNumRefFrame - STR_ROOM - 1; + + memset (pRefPicMark, 0, sizeof (SRefPicMarking)); + if (pCtx->pSvcParam->bEnableLongTermReference) { + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount].iMaxLongTermFrameIdx = iMaxLtrIdx; + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount++].iMmcoType = MMCO_SET_MAX_LONG; + + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount].iLongTermFrameIdx = pLtr->iCurLtrIdx; + pRefPicMark->SMmcoRef[pRefPicMark->uiMmcoCount++].iMmcoType = MMCO_LONG; + } + + WelsMarkMMCORefInfoWithBase (ppSliceList, pBaseSlice, kiCountSliceNum); +} + +void WelsMarkPicScreen (sWelsEncCtx* pCtx) { + SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId]; + int32_t iMaxTid = WELS_LOG2 (pCtx->pSvcParam->uiGopSize); + int32_t iMaxActualLtrIdx = -1; + SSpatialLayerInternal* pParamD = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId]; + if (pCtx->pSvcParam->bEnableLongTermReference) + iMaxActualLtrIdx = pCtx->pSvcParam->iNumRefFrame - STR_ROOM - 1 - WELS_MAX (iMaxTid , 1); + + SRefList* pRefList = pCtx->ppRefPicListExt[pCtx->uiDependencyId]; + SPicture** ppLongRefList = pRefList->pLongRefList; + const int32_t iNumRef = pCtx->pSvcParam->iNumRefFrame; + int32_t i; + const int32_t iLongRefNum = iNumRef - STR_ROOM; + const bool bIsRefListNotFull = pRefList->uiLongRefCount < iLongRefNum; + + if (!pCtx->pSvcParam->bEnableLongTermReference) { + pLtr->iCurLtrIdx = pCtx->uiTemporalId; + } else { + if (iMaxActualLtrIdx != -1 && pCtx->uiTemporalId == 0 && pCtx->bCurFrameMarkedAsSceneLtr) { + //Scene LTR + pLtr->bLTRMarkingFlag = true; + pLtr->uiLtrMarkInterval = 0; + pLtr->iCurLtrIdx = pLtr->iSceneLtrIdx % (iMaxActualLtrIdx + 1); + pLtr->iSceneLtrIdx++; + } else { + pLtr->bLTRMarkingFlag = false; + //for other LTR + if (bIsRefListNotFull) { + for (int32_t i = 0; i < iLongRefNum; ++i) { + if (pRefList->pLongRefList[i] == NULL) { + pLtr->iCurLtrIdx = i ; + break; + } + } + } else { + int32_t iRefNum_t[MAX_TEMPORAL_LAYER_NUM] = {0}; + for (i = 0 ; i < pRefList->uiLongRefCount ; ++i) { + if (ppLongRefList[i]->bUsedAsRef && ppLongRefList[i]->bIsLongRef && (!ppLongRefList[i]->bIsSceneLTR)) { + ++iRefNum_t[ ppLongRefList[i]->uiTemporalId ]; + } + } + + int32_t iMaxMultiRefTid = (iMaxTid) ? (iMaxTid - 1) : (0) ; + for (i = 0; i < MAX_TEMPORAL_LAYER_NUM ; ++i) { + if (iRefNum_t[i] > 1) { + iMaxMultiRefTid = i; + } + } + int32_t iLongestDeltaFrameNum = -1; + int32_t iMaxFrameNum = (1 << pCtx->pSps->uiLog2MaxFrameNum); + + for (i = 0 ; i < pRefList->uiLongRefCount ; ++i) { + if (ppLongRefList[i]->bUsedAsRef && ppLongRefList[i]->bIsLongRef && (!ppLongRefList[i]->bIsSceneLTR) + && iMaxMultiRefTid == ppLongRefList[i]->uiTemporalId) { + if (!IsValidFrameNum (ppLongRefList[i]->iFrameNum)) { // pLtr->iCurLtrIdx must have a value + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "WelsMarkPicScreen, Invalid Frame Number"); + return; + } + int32_t iDeltaFrameNum = (pParamD->iFrameNum >= ppLongRefList[i]->iFrameNum) + ? (pParamD->iFrameNum - ppLongRefList[i]->iFrameNum) + : (pParamD->iFrameNum + iMaxFrameNum - ppLongRefList[i]->iFrameNum); + + if (iDeltaFrameNum > iLongestDeltaFrameNum) { + pLtr->iCurLtrIdx = ppLongRefList[i]->iLongTermPicNum; + iLongestDeltaFrameNum = iDeltaFrameNum; + } + } + } + } + } + } + + for (i = 0 ; i < MAX_TEMPORAL_LAYER_NUM; ++i) { + if ((pCtx->uiTemporalId < i) || (pCtx->uiTemporalId == 0)) { + pLtr->iLastLtrIdx[i] = pLtr->iCurLtrIdx; + } + } + + const int32_t iSliceNum = pCtx->pCurDqLayer->iMaxSliceNum; + + WelsMarkMMCORefInfoScreen (pCtx, pLtr, pCtx->pCurDqLayer->ppSliceInLayer, iSliceNum); + + return; +} + +void DoNothing (sWelsEncCtx* pointer) { +} + + +IWelsReferenceStrategy* IWelsReferenceStrategy::CreateReferenceStrategy (sWelsEncCtx* pCtx, + const EUsageType keUsageType, + const bool kbLtrEnabled) { + + IWelsReferenceStrategy* pReferenceStrategy = NULL; + switch (keUsageType) { + case SCREEN_CONTENT_REAL_TIME: + if (kbLtrEnabled) { + pReferenceStrategy = WELS_NEW_OP (CWelsReference_LosslessWithLtr(), + CWelsReference_LosslessWithLtr); + } else { + pReferenceStrategy = WELS_NEW_OP (CWelsReference_Screen(), + CWelsReference_Screen); + } + WELS_VERIFY_RETURN_IF (NULL, NULL == pReferenceStrategy) + break; + case CAMERA_VIDEO_REAL_TIME: + case CAMERA_VIDEO_NON_REAL_TIME: + default: + pReferenceStrategy = WELS_NEW_OP (CWelsReference_TemporalLayer(), + CWelsReference_TemporalLayer); + WELS_VERIFY_RETURN_IF (NULL, NULL == pReferenceStrategy) + break; + } + pReferenceStrategy->Init (pCtx); + return pReferenceStrategy; +} + +void CWelsReference_TemporalLayer::Init (sWelsEncCtx* pCtx) { + m_pEncoderCtx = pCtx; +} + +bool CWelsReference_TemporalLayer::BuildRefList (const int32_t iPOC, int32_t iBestLtrRefIdx) { + return WelsBuildRefList (m_pEncoderCtx, iPOC, iBestLtrRefIdx); +} +void CWelsReference_TemporalLayer::MarkPic() { + WelsMarkPic (m_pEncoderCtx); +} +bool CWelsReference_TemporalLayer::UpdateRefList() { + return WelsUpdateRefList (m_pEncoderCtx); +} +void CWelsReference_TemporalLayer::EndofUpdateRefList() { + PrefetchNextBuffer (m_pEncoderCtx); +} +void CWelsReference_TemporalLayer::AfterBuildRefList() { + DoNothing (m_pEncoderCtx); +} + +bool CWelsReference_Screen::BuildRefList (const int32_t iPOC, int32_t iBestLtrRefIdx) { + return WelsBuildRefList (m_pEncoderCtx, iPOC, iBestLtrRefIdx); +} +void CWelsReference_Screen::MarkPic() { + WelsMarkPic (m_pEncoderCtx); +} +bool CWelsReference_Screen::UpdateRefList() { + return WelsUpdateRefList (m_pEncoderCtx); +} +void CWelsReference_Screen::EndofUpdateRefList() { + UpdateSrcPicList (m_pEncoderCtx); +} +void CWelsReference_Screen::AfterBuildRefList() { + UpdateBlockStatic (m_pEncoderCtx); +} + +bool CWelsReference_LosslessWithLtr::BuildRefList (const int32_t iPOC, int32_t iBestLtrRefIdx) { + return WelsBuildRefListScreen (m_pEncoderCtx, iPOC, iBestLtrRefIdx); +} +void CWelsReference_LosslessWithLtr::MarkPic() { + WelsMarkPicScreen (m_pEncoderCtx); +} +bool CWelsReference_LosslessWithLtr::UpdateRefList() { + return WelsUpdateRefListScreen (m_pEncoderCtx); +} +void CWelsReference_LosslessWithLtr::EndofUpdateRefList() { + UpdateSrcPicListLosslessScreenRefSelectionWithLtr (m_pEncoderCtx); +} +} // namespace WelsEnc + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/sample.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/sample.cpp new file mode 100644 index 000000000..78498b08d --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/sample.cpp @@ -0,0 +1,495 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file sample.c + * + * \brief compute SAD and SATD + * + * \date 2009.06.02 Created + * + ************************************************************************************* + */ + +#include "sample.h" +#include "sad_common.h" +#include "intra_pred_common.h" +#include "mc.h" +#include "cpu_core.h" + +namespace WelsEnc { +int32_t WelsSampleSatd4x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + int32_t pSampleMix[4][4] = {{ 0 }}; + int32_t iSample0, iSample1, iSample2, iSample3; + int32_t i = 0; + uint8_t* pSrc1 = pSample1; + uint8_t* pSrc2 = pSample2; + + //step 1: get the difference + for (i = 0; i < 4; i++) { + pSampleMix[i][0] = pSrc1[0] - pSrc2[0]; + pSampleMix[i][1] = pSrc1[1] - pSrc2[1]; + pSampleMix[i][2] = pSrc1[2] - pSrc2[2]; + pSampleMix[i][3] = pSrc1[3] - pSrc2[3]; + + pSrc1 += iStride1; + pSrc2 += iStride2; + } + + //step 2: horizontal transform + for (i = 0; i < 4; i++) { + iSample0 = pSampleMix[i][0] + pSampleMix[i][2]; + iSample1 = pSampleMix[i][1] + pSampleMix[i][3]; + iSample2 = pSampleMix[i][0] - pSampleMix[i][2]; + iSample3 = pSampleMix[i][1] - pSampleMix[i][3]; + + pSampleMix[i][0] = iSample0 + iSample1; + pSampleMix[i][1] = iSample2 + iSample3; + pSampleMix[i][2] = iSample2 - iSample3; + pSampleMix[i][3] = iSample0 - iSample1; + } + + //step 3: vertical transform and get the sum of SATD + for (i = 0; i < 4; i++) { + iSample0 = pSampleMix[0][i] + pSampleMix[2][i]; + iSample1 = pSampleMix[1][i] + pSampleMix[3][i]; + iSample2 = pSampleMix[0][i] - pSampleMix[2][i]; + iSample3 = pSampleMix[1][i] - pSampleMix[3][i]; + + pSampleMix[0][i] = iSample0 + iSample1; + pSampleMix[1][i] = iSample2 + iSample3; + pSampleMix[2][i] = iSample2 - iSample3; + pSampleMix[3][i] = iSample0 - iSample1; + + iSatdSum += (WELS_ABS (pSampleMix[0][i]) + WELS_ABS (pSampleMix[1][i]) + WELS_ABS (pSampleMix[2][i]) + WELS_ABS ( + pSampleMix[3][i])); + } + + return ((iSatdSum + 1) >> 1); +} + +int32_t WelsSampleSatd8x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + iSatdSum += WelsSampleSatd4x4_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd4x4_c (pSample1 + 4, iStride1, pSample2 + 4, iStride2); + return iSatdSum; +} + +int32_t WelsSampleSatd4x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + iSatdSum += WelsSampleSatd4x4_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd4x4_c (pSample1 + (iStride1 << 2), iStride1, pSample2 + (iStride2 << 2), iStride2); + return iSatdSum; +} + +int32_t WelsSampleSatd8x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + + iSatdSum += WelsSampleSatd4x4_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd4x4_c (pSample1 + 4, iStride1, pSample2 + 4, iStride2); + iSatdSum += WelsSampleSatd4x4_c (pSample1 + (iStride1 << 2), iStride1, pSample2 + (iStride2 << 2), iStride2); + iSatdSum += WelsSampleSatd4x4_c (pSample1 + (iStride1 << 2) + 4, iStride1, pSample2 + (iStride2 << 2) + 4, iStride2); + + return iSatdSum; +} +int32_t WelsSampleSatd16x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + + iSatdSum += WelsSampleSatd8x8_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd8x8_c (pSample1 + 8, iStride1, pSample2 + 8, iStride2); + + return iSatdSum; +} +int32_t WelsSampleSatd8x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + + iSatdSum += WelsSampleSatd8x8_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd8x8_c (pSample1 + (iStride1 << 3), iStride1, pSample2 + (iStride2 << 3), iStride2); + + return iSatdSum; +} +int32_t WelsSampleSatd16x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + + iSatdSum += WelsSampleSatd8x8_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd8x8_c (pSample1 + 8, iStride1, pSample2 + 8, iStride2); + iSatdSum += WelsSampleSatd8x8_c (pSample1 + (iStride1 << 3), iStride1, pSample2 + (iStride2 << 3), iStride2); + iSatdSum += WelsSampleSatd8x8_c (pSample1 + (iStride1 << 3) + 8, iStride1, pSample2 + (iStride2 << 3) + 8, iStride2); + + return iSatdSum; +} + + +extern void WelsI4x4LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +extern void WelsI4x4LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +extern void WelsI4x4LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); + +int32_t WelsSampleSatdIntra4x4Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride, + uint8_t* pDst, + int32_t* pBestMode, int32_t iLambda2, int32_t iLambda1, int32_t iLambda0) { + int32_t iBestMode = -1; + int32_t iCurCost, iBestCost = INT_MAX; + ENFORCE_STACK_ALIGN_2D (uint8_t, uiLocalBuffer, 3, 16, 16) + + WelsI4x4LumaPredDc_c (uiLocalBuffer[2], pDec, iDecStride); + iCurCost = WelsSampleSatd4x4_c (uiLocalBuffer[2], 4, pEnc, iEncStride) + iLambda2; + if (iCurCost < iBestCost) { + iBestMode = 2; + iBestCost = iCurCost; + } + + WelsI4x4LumaPredH_c (uiLocalBuffer[1], pDec, iDecStride); + iCurCost = WelsSampleSatd4x4_c (uiLocalBuffer[1], 4, pEnc, iEncStride) + iLambda1; + if (iCurCost < iBestCost) { + iBestMode = 1; + iBestCost = iCurCost; + } + WelsI4x4LumaPredV_c (uiLocalBuffer[0], pDec, iDecStride); + iCurCost = WelsSampleSatd4x4_c (uiLocalBuffer[0], 4, pEnc, iEncStride) + iLambda0; + if (iCurCost < iBestCost) { + iBestMode = 0; + iBestCost = iCurCost; + } + + memcpy (pDst, uiLocalBuffer[iBestMode], 16 * sizeof (uint8_t)); // confirmed_safe_unsafe_usage + *pBestMode = iBestMode; + + return iBestCost; +} +extern void WelsIChromaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +extern void WelsIChromaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +extern void WelsIChromaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); + +int32_t WelsSampleSatdIntra8x8Combined3_c (uint8_t* pDecCb, int32_t iDecStride, uint8_t* pEncCb, int32_t iEncStride, + int32_t* pBestMode, int32_t iLambda, uint8_t* pDstChroma, uint8_t* pDecCr, uint8_t* pEncCr) { + int32_t iBestMode = -1; + int32_t iCurCost, iBestCost = INT_MAX; + + WelsIChromaPredV_c (pDstChroma, pDecCb, iDecStride); + WelsIChromaPredV_c (pDstChroma + 64, pDecCr, iDecStride); + iCurCost = WelsSampleSatd8x8_c (pDstChroma, 8, pEncCb, iEncStride); + iCurCost += WelsSampleSatd8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2; + + if (iCurCost < iBestCost) { + iBestMode = 2; + iBestCost = iCurCost; + } + + WelsIChromaPredH_c (pDstChroma, pDecCb, iDecStride); + WelsIChromaPredH_c (pDstChroma + 64, pDecCr, iDecStride); + iCurCost = WelsSampleSatd8x8_c (pDstChroma, 8, pEncCb, iEncStride); + iCurCost += WelsSampleSatd8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2; + if (iCurCost < iBestCost) { + iBestMode = 1; + iBestCost = iCurCost; + } + WelsIChromaPredDc_c (pDstChroma, pDecCb, iDecStride); + WelsIChromaPredDc_c (pDstChroma + 64, pDecCr, iDecStride); + iCurCost = WelsSampleSatd8x8_c (pDstChroma, 8, pEncCb, iEncStride); + iCurCost += WelsSampleSatd8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride); + if (iCurCost < iBestCost) { + iBestMode = 0; + iBestCost = iCurCost; + } + + *pBestMode = iBestMode; + + return iBestCost; + + +} +int32_t WelsSampleSadIntra8x8Combined3_c (uint8_t* pDecCb, int32_t iDecStride, uint8_t* pEncCb, int32_t iEncStride, + int32_t* pBestMode, int32_t iLambda, uint8_t* pDstChroma, uint8_t* pDecCr, uint8_t* pEncCr) { + int32_t iBestMode = -1; + int32_t iCurCost, iBestCost = INT_MAX; + + WelsIChromaPredV_c (pDstChroma, pDecCb, iDecStride); + WelsIChromaPredV_c (pDstChroma + 64, pDecCr, iDecStride); + iCurCost = WelsSampleSad8x8_c (pDstChroma, 8, pEncCb, iEncStride); + iCurCost += WelsSampleSad8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2; + + if (iCurCost < iBestCost) { + iBestMode = 2; + iBestCost = iCurCost; + } + + WelsIChromaPredH_c (pDstChroma, pDecCb, iDecStride); + WelsIChromaPredH_c (pDstChroma + 64, pDecCr, iDecStride); + iCurCost = WelsSampleSad8x8_c (pDstChroma, 8, pEncCb, iEncStride); + iCurCost += WelsSampleSad8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2; + if (iCurCost < iBestCost) { + iBestMode = 1; + iBestCost = iCurCost; + } + WelsIChromaPredDc_c (pDstChroma, pDecCb, iDecStride); + WelsIChromaPredDc_c (pDstChroma + 64, pDecCr, iDecStride); + iCurCost = WelsSampleSad8x8_c (pDstChroma, 8, pEncCb, iEncStride); + iCurCost += WelsSampleSad8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride); + if (iCurCost < iBestCost) { + iBestMode = 0; + iBestCost = iCurCost; + } + + *pBestMode = iBestMode; + + return iBestCost; + +} + +extern void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +//extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +//extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); + +int32_t WelsSampleSatdIntra16x16Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride, + int32_t* pBestMode, int32_t iLambda, uint8_t* pDst) { + int32_t iBestMode = -1; + int32_t iCurCost, iBestCost = INT_MAX; + + WelsI16x16LumaPredV_c (pDst, pDec, iDecStride); + iCurCost = WelsSampleSatd16x16_c (pDst, 16, pEnc, iEncStride); + + if (iCurCost < iBestCost) { + iBestMode = 0; + iBestCost = iCurCost; + } + + WelsI16x16LumaPredH_c (pDst, pDec, iDecStride); + iCurCost = WelsSampleSatd16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2; + if (iCurCost < iBestCost) { + iBestMode = 1; + iBestCost = iCurCost; + } + WelsI16x16LumaPredDc_c (pDst, pDec, iDecStride); + iCurCost = WelsSampleSatd16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2; + if (iCurCost < iBestCost) { + iBestMode = 2; + iBestCost = iCurCost; + } + + *pBestMode = iBestMode; + + return iBestCost; + + +} +int32_t WelsSampleSadIntra16x16Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride, + int32_t* pBestMode, int32_t iLambda, uint8_t* pDst) { + int32_t iBestMode = -1; + int32_t iCurCost, iBestCost = INT_MAX; + + WelsI16x16LumaPredV_c (pDst, pDec, iDecStride); + iCurCost = WelsSampleSad16x16_c (pDst, 16, pEnc, iEncStride); + + if (iCurCost < iBestCost) { + iBestMode = 0; + iBestCost = iCurCost; + } + + WelsI16x16LumaPredH_c (pDst, pDec, iDecStride); + iCurCost = WelsSampleSad16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2; + if (iCurCost < iBestCost) { + iBestMode = 1; + iBestCost = iCurCost; + } + WelsI16x16LumaPredDc_c (pDst, pDec, iDecStride); + iCurCost = WelsSampleSad16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2; + if (iCurCost < iBestCost) { + iBestMode = 2; + iBestCost = iCurCost; + } + + *pBestMode = iBestMode; + + return iBestCost; + + +} + +void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { + //pfSampleSad init + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16 ] = WelsSampleSad8x16_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8 ] = WelsSampleSad8x8_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4 ] = WelsSampleSad4x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x4 ] = WelsSampleSad8x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x8 ] = WelsSampleSad4x8_c; + + //pfSampleSatd init + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8 ] = WelsSampleSatd8x8_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4 ] = WelsSampleSatd4x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x4 ] = WelsSampleSatd8x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x8 ] = WelsSampleSatd4x8_c; + + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x4] = WelsSampleSadFour8x4_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x8] = WelsSampleSadFour4x8_c; + + pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = NULL; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = NULL; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad = NULL; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = NULL; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = NULL; + +#if defined (X86_ASM) + if (uiCpuFlag & WELS_CPU_MMXEXT) { + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4 ] = WelsSampleSad4x4_mmx; + } + + if (uiCpuFlag & WELS_CPU_SSE2) { + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_sse2; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_sse2; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_sse2; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_sse21; + + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_sse2; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_sse2; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_sse2; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_sse2; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_sse2; + + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4 ] = WelsSampleSatd4x4_sse2; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8 ] = WelsSampleSatd8x8_sse2; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_sse2; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_sse2; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_sse2; + pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = WelsSampleSatdThree4x4_sse2; + } + + if (uiCpuFlag & WELS_CPU_SSSE3) { + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = WelsIntra16x16Combined3Sad_ssse3; + } + + if (uiCpuFlag & WELS_CPU_SSE41) { + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_sse41; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8] = WelsSampleSatd16x8_sse41; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16] = WelsSampleSatd8x16_sse41; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8] = WelsSampleSatd8x8_sse41; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] = WelsSampleSatd4x4_sse41; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_sse41; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = WelsIntraChroma8x8Combined3Satd_sse41; + } +#if defined(HAVE_AVX2) + if (uiCpuFlag & WELS_CPU_AVX2) { + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_avx2; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8] = WelsSampleSatd16x8_avx2; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16] = WelsSampleSatd8x16_avx2; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8] = WelsSampleSatd8x8_avx2; + } +#endif +#endif //(X86_ASM) + +#if defined (HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4 ] = WelsSampleSad4x4_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_neon; + + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_neon; + + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4 ] = WelsSampleSatd4x4_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8 ] = WelsSampleSatd8x8_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_neon; + + pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = WelsIntra4x4Combined3Satd_neon; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = WelsIntra8x8Combined3Satd_neon; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad = WelsIntra8x8Combined3Sad_neon; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_neon; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = WelsIntra16x16Combined3Sad_neon; + } +#endif + +#if defined (HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4 ] = WelsSampleSad4x4_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_AArch64_neon; + + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_AArch64_neon; + + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4 ] = WelsSampleSatd4x4_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8 ] = WelsSampleSatd8x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_AArch64_neon; + + pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = WelsIntra4x4Combined3Satd_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = WelsIntra8x8Combined3Satd_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad = WelsIntra8x8Combined3Sad_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = WelsIntra16x16Combined3Sad_AArch64_neon; + } +#endif + +#if defined (HAVE_MMI) + if (uiCpuFlag & WELS_CPU_MMI) { + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4 ] = WelsSampleSad4x4_mmi; + + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4 ] = WelsSampleSatd4x4_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8 ] = WelsSampleSatd8x8_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_mmi; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_mmi; + + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_mmi; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_mmi; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_mmi; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_mmi; + } +#endif//HAVE_MMI +} + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cabac.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cabac.cpp new file mode 100644 index 000000000..0d7a2e701 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cabac.cpp @@ -0,0 +1,204 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file set_mb_syn_cabac.cpp + * + * \brief cabac coding engine + * + * \date 10/11/2014 Created + * + ************************************************************************************* + */ +#include +#include "typedefs.h" +#include "macros.h" +#include "set_mb_syn_cabac.h" +#include "encoder.h" +#include "golomb_common.h" + +namespace { + +const int8_t g_kiClz5Table[32] = { + 6, 5, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +void PropagateCarry (uint8_t* pBufCur, uint8_t* pBufStart) { + for (; pBufCur > pBufStart; --pBufCur) + if (++ * (pBufCur - 1)) + break; +} + +} // anon ns. + +namespace WelsEnc { + +void WelsCabacInit (void* pCtx) { + sWelsEncCtx* pEncCtx = (sWelsEncCtx*)pCtx; + for (int32_t iModel = 0; iModel < 4; iModel++) { + for (int32_t iQp = 0; iQp <= WELS_QP_MAX; iQp++) + for (int32_t iIdx = 0; iIdx < WELS_CONTEXT_COUNT; iIdx++) { + int32_t m = g_kiCabacGlobalContextIdx[iIdx][iModel][0]; + int32_t n = g_kiCabacGlobalContextIdx[iIdx][iModel][1]; + int32_t iPreCtxState = WELS_CLIP3 ((((m * iQp) >> 4) + n), 1, 126); + uint8_t uiValMps = 0; + uint8_t uiStateIdx = 0; + if (iPreCtxState <= 63) { + uiStateIdx = 63 - iPreCtxState; + uiValMps = 0; + } else { + uiStateIdx = iPreCtxState - 64; + uiValMps = 1; + } + pEncCtx->sWelsCabacContexts[iModel][iQp][iIdx].Set (uiStateIdx, uiValMps); + } + } +} + +void WelsCabacContextInit (void* pCtx, SCabacCtx* pCbCtx, int32_t iModel) { + sWelsEncCtx* pEncCtx = (sWelsEncCtx*)pCtx; + int32_t iIdx = pEncCtx->eSliceType == WelsCommon::I_SLICE ? 0 : iModel + 1; + int32_t iQp = pEncCtx->iGlobalQp; + memcpy (pCbCtx->m_sStateCtx, pEncCtx->sWelsCabacContexts[iIdx][iQp], + WELS_CONTEXT_COUNT * sizeof (SStateCtx)); +} + +void WelsCabacEncodeInit (SCabacCtx* pCbCtx, uint8_t* pBuf, uint8_t* pEnd) { + pCbCtx->m_uiLow = 0; + pCbCtx->m_iLowBitCnt = 9; + pCbCtx->m_iRenormCnt = 0; + pCbCtx->m_uiRange = 510; + pCbCtx->m_pBufStart = pBuf; + pCbCtx->m_pBufEnd = pEnd; + pCbCtx->m_pBufCur = pBuf; +} + +void WelsCabacEncodeUpdateLowNontrivial_ (SCabacCtx* pCbCtx) { + int32_t iLowBitCnt = pCbCtx->m_iLowBitCnt; + int32_t iRenormCnt = pCbCtx->m_iRenormCnt; + cabac_low_t uiLow = pCbCtx->m_uiLow; + + do { + uint8_t* pBufCur = pCbCtx->m_pBufCur; + const int32_t kiInc = CABAC_LOW_WIDTH - 1 - iLowBitCnt; + + uiLow <<= kiInc; + if (uiLow & cabac_low_t (1) << (CABAC_LOW_WIDTH - 1)) + PropagateCarry (pBufCur, pCbCtx->m_pBufStart); + + if (CABAC_LOW_WIDTH > 32) { + WRITE_BE_32 (pBufCur, (uint32_t) (uiLow >> 31)); + pBufCur += 4; + } + *pBufCur++ = (uint8_t) (uiLow >> 23); + *pBufCur++ = (uint8_t) (uiLow >> 15); + iRenormCnt -= kiInc; + iLowBitCnt = 15; + uiLow &= (1u << iLowBitCnt) - 1; + pCbCtx->m_pBufCur = pBufCur; + } while (iLowBitCnt + iRenormCnt > CABAC_LOW_WIDTH - 1); + + pCbCtx->m_iLowBitCnt = iLowBitCnt + iRenormCnt; + pCbCtx->m_uiLow = uiLow << iRenormCnt; +} + +void WelsCabacEncodeDecisionLps_ (SCabacCtx* pCbCtx, int32_t iCtx) { + const int32_t kiState = pCbCtx->m_sStateCtx[iCtx].State(); + uint32_t uiRange = pCbCtx->m_uiRange; + uint32_t uiRangeLps = g_kuiCabacRangeLps[kiState][ (uiRange & 0xff) >> 6]; + uiRange -= uiRangeLps; + pCbCtx->m_sStateCtx[iCtx].Set (g_kuiStateTransTable[kiState][0], + pCbCtx->m_sStateCtx[iCtx].Mps() ^ (kiState == 0)); + + WelsCabacEncodeUpdateLow_ (pCbCtx); + pCbCtx->m_uiLow += uiRange; + + const int32_t kiRenormAmount = g_kiClz5Table[uiRangeLps >> 3]; + pCbCtx->m_uiRange = uiRangeLps << kiRenormAmount; + pCbCtx->m_iRenormCnt = kiRenormAmount; +} + +void WelsCabacEncodeTerminate (SCabacCtx* pCbCtx, uint32_t uiBin) { + pCbCtx->m_uiRange -= 2; + if (uiBin) { + WelsCabacEncodeUpdateLow_ (pCbCtx); + pCbCtx->m_uiLow += pCbCtx->m_uiRange; + + const int32_t kiRenormAmount = 7; + pCbCtx->m_uiRange = 2 << kiRenormAmount; + pCbCtx->m_iRenormCnt = kiRenormAmount; + + WelsCabacEncodeUpdateLow_ (pCbCtx); + pCbCtx->m_uiLow |= 0x80; + } else { + const int32_t kiRenormAmount = pCbCtx->m_uiRange >> 8 ^ 1; + pCbCtx->m_uiRange = pCbCtx->m_uiRange << kiRenormAmount; + pCbCtx->m_iRenormCnt += kiRenormAmount; + } +} +void WelsCabacEncodeUeBypass (SCabacCtx* pCbCtx, int32_t iExpBits, uint32_t uiVal) { + int32_t iSufS = uiVal; + int32_t iStopLoop = 0; + int32_t k = iExpBits; + do { + if (iSufS >= (1 << k)) { + WelsCabacEncodeBypassOne (pCbCtx, 1); + iSufS = iSufS - (1 << k); + k++; + } else { + WelsCabacEncodeBypassOne (pCbCtx, 0); + while (k--) + WelsCabacEncodeBypassOne (pCbCtx, (iSufS >> k) & 1); + iStopLoop = 1; + } + } while (!iStopLoop); +} + +void WelsCabacEncodeFlush (SCabacCtx* pCbCtx) { + WelsCabacEncodeTerminate (pCbCtx, 1); + + cabac_low_t uiLow = pCbCtx->m_uiLow; + int32_t iLowBitCnt = pCbCtx->m_iLowBitCnt; + uint8_t* pBufCur = pCbCtx->m_pBufCur; + + uiLow <<= CABAC_LOW_WIDTH - 1 - iLowBitCnt; + if (uiLow & cabac_low_t (1) << (CABAC_LOW_WIDTH - 1)) + PropagateCarry (pBufCur, pCbCtx->m_pBufStart); + for (; (iLowBitCnt -= 8) >= 0; uiLow <<= 8) + * pBufCur++ = (uint8_t) (uiLow >> (CABAC_LOW_WIDTH - 9)); + + pCbCtx->m_pBufCur = pBufCur; +} + +uint8_t* WelsCabacEncodeGetPtr (SCabacCtx* pCbCtx) { + return pCbCtx->m_pBufCur; +} +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cavlc.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cavlc.cpp new file mode 100644 index 000000000..6231624db --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cavlc.cpp @@ -0,0 +1,318 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file set_mb_syn_cavlc.h + * + * \brief Seting all syntax elements of mb and decoding residual with cavlc + * + * \date 05/19/2009 Created + * + ************************************************************************************* + */ + +#include "svc_set_mb_syn.h" +#include "vlc_encoder.h" +#include "cpu_core.h" +#include "wels_const.h" + +namespace WelsEnc { + +const ALIGNED_DECLARE (uint8_t, g_kuiZeroLeftMap[16], 16) = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; + + +/* + * Exponential Golomb codes encoding routines + */ + +#define CAVLC_BS_INIT( pBs ) \ + uint8_t * pBufPtr = pBs->pCurBuf; \ + uint32_t uiCurBits = pBs->uiCurBits; \ + int32_t iLeftBits = pBs->iLeftBits; + +#define CAVLC_BS_UNINIT( pBs ) \ + pBs->pCurBuf = pBufPtr; \ + pBs->uiCurBits = uiCurBits; \ + pBs->iLeftBits = iLeftBits; + +#define CAVLC_BS_WRITE( n, v ) \ + { \ + if ( (n) < iLeftBits ) {\ + uiCurBits = (uiCurBits<<(n))|(v);\ + iLeftBits -= (n);\ + }\ + else {\ + (n) -= iLeftBits;\ + uiCurBits = (uiCurBits<>(n));\ + WRITE_BE_32(pBufPtr, uiCurBits);\ + pBufPtr += 4;\ + uiCurBits = (v) & ((1<<(n))-1);\ + iLeftBits = 32 - (n);\ + }\ + } ; + + +int32_t CavlcParamCal_c (int16_t* pCoffLevel, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeff , + int32_t iLastIndex) { + int32_t iTotalZeros = 0; + int32_t iTotalCoeffs = 0; + + while (iLastIndex >= 0 && pCoffLevel[iLastIndex] == 0) { + -- iLastIndex; + } + + while (iLastIndex >= 0) { + int32_t iCountZero = 0; + pLevel[iTotalCoeffs] = pCoffLevel[iLastIndex--]; + + while (iLastIndex >= 0 && pCoffLevel[iLastIndex] == 0) { + ++ iCountZero; + -- iLastIndex; + } + iTotalZeros += iCountZero; + pRun[iTotalCoeffs++] = iCountZero; + } + *pTotalCoeff = iTotalCoeffs; + return iTotalZeros; +} + +int32_t WriteBlockResidualCavlc (SWelsFuncPtrList* pFuncList, int16_t* pCoffLevel, int32_t iEndIdx, + int32_t iCalRunLevelFlag, + int32_t iResidualProperty, int8_t iNC, SBitStringAux* pBs) { + ENFORCE_STACK_ALIGN_1D (int16_t, iLevel, 16, 16) + ENFORCE_STACK_ALIGN_1D (uint8_t, uiRun, 16, 16) + + int32_t iTotalCoeffs = 0; + int32_t iTrailingOnes = 0; + int32_t iTotalZeros = 0, iZerosLeft = 0; + uint32_t uiSign = 0; + int32_t iLevelCode = 0, iLevelPrefix = 0, iLevelSuffix = 0, uiSuffixLength = 0, iLevelSuffixSize = 0; + int32_t iValue = 0, iThreshold, iZeroLeft; + int32_t n = 0; + int32_t i = 0; + + + CAVLC_BS_INIT (pBs); + + /*Step 1: calculate iLevel and iRun and total */ + + if (iCalRunLevelFlag) { + int32_t iCount = 0; + iTotalZeros = pFuncList->pfCavlcParamCal (pCoffLevel, uiRun, iLevel, &iTotalCoeffs, iEndIdx); + iCount = (iTotalCoeffs > 3) ? 3 : iTotalCoeffs; + for (i = 0; i < iCount ; i++) { + if (WELS_ABS (iLevel[i]) == 1) { + iTrailingOnes ++; + uiSign <<= 1; + if (iLevel[i] < 0) + uiSign |= 1; + } else { + break; + + } + } + } + /*Step 3: coeff token */ + const uint8_t* upCoeffToken = &g_kuiVlcCoeffToken[g_kuiEncNcMapTable[iNC]][iTotalCoeffs][iTrailingOnes][0]; + iValue = upCoeffToken[0]; + n = upCoeffToken[1]; + + if (iTotalCoeffs == 0) { + CAVLC_BS_WRITE (n, iValue); + + CAVLC_BS_UNINIT (pBs); + return ENC_RETURN_SUCCESS; + } + + /* Step 4: */ + /* trailing */ + n += iTrailingOnes; + iValue = (iValue << iTrailingOnes) + uiSign; + CAVLC_BS_WRITE (n, iValue); + + /* levels */ + uiSuffixLength = (iTotalCoeffs > 10 && iTrailingOnes < 3) ? 1 : 0; + + for (i = iTrailingOnes; i < iTotalCoeffs; i++) { + int32_t iVal = iLevel[i]; + + iLevelCode = (iVal - 1) * (1 << 1); + uiSign = (iLevelCode >> 31); + iLevelCode = (iLevelCode ^ uiSign) + (uiSign << 1); + iLevelCode -= ((i == iTrailingOnes) && (iTrailingOnes < 3)) << 1; + + iLevelPrefix = iLevelCode >> uiSuffixLength; + iLevelSuffixSize = uiSuffixLength; + iLevelSuffix = iLevelCode - (iLevelPrefix << uiSuffixLength); + + if (iLevelPrefix >= 14 && iLevelPrefix < 30 && uiSuffixLength == 0) { + iLevelPrefix = 14; + iLevelSuffix = iLevelCode - iLevelPrefix; + iLevelSuffixSize = 4; + } else if (iLevelPrefix >= 15) { + iLevelPrefix = 15; + iLevelSuffix = iLevelCode - (iLevelPrefix << uiSuffixLength); + //for baseline profile,overflow when the length of iLevelSuffix is larger than 11. + if (iLevelSuffix >> 11) + return ENC_RETURN_VLCOVERFLOWFOUND; + if (uiSuffixLength == 0) { + iLevelSuffix -= 15; + } + iLevelSuffixSize = 12; + } + + n = iLevelPrefix + 1 + iLevelSuffixSize; + iValue = ((1 << iLevelSuffixSize) | iLevelSuffix); + CAVLC_BS_WRITE (n, iValue); + + uiSuffixLength += !uiSuffixLength; + iThreshold = 3 << (uiSuffixLength - 1); + uiSuffixLength += ((iVal > iThreshold) || (iVal < -iThreshold)) && (uiSuffixLength < 6); + + } + + /* Step 5: total zeros */ + + if (iTotalCoeffs < iEndIdx + 1) { + if (CHROMA_DC != iResidualProperty) { + const uint8_t* upTotalZeros = &g_kuiVlcTotalZeros[iTotalCoeffs][iTotalZeros][0]; + n = upTotalZeros[1]; + iValue = upTotalZeros[0]; + CAVLC_BS_WRITE (n, iValue); + } else { + const uint8_t* upTotalZeros = &g_kuiVlcTotalZerosChromaDc[iTotalCoeffs][iTotalZeros][0]; + n = upTotalZeros[1]; + iValue = upTotalZeros[0]; + CAVLC_BS_WRITE (n, iValue); + } + } + + /* Step 6: pRun before */ + iZerosLeft = iTotalZeros; + for (i = 0; i + 1 < iTotalCoeffs && iZerosLeft > 0; ++ i) { + const uint8_t uirun = uiRun[i]; + iZeroLeft = g_kuiZeroLeftMap[iZerosLeft]; + n = g_kuiVlcRunBefore[iZeroLeft][uirun][1]; + iValue = g_kuiVlcRunBefore[iZeroLeft][uirun][0]; + CAVLC_BS_WRITE (n, iValue); + iZerosLeft -= uirun; + } + + CAVLC_BS_UNINIT (pBs); + return ENC_RETURN_SUCCESS; +} + +void StashMBStatusCavlc (SDynamicSlicingStack* pDss, SSlice* pSlice, int32_t iMbSkipRun) { + SBitStringAux* pBs = pSlice->pSliceBsa; + pDss->pBsStackBufPtr = pBs->pCurBuf; + pDss->uiBsStackCurBits = pBs->uiCurBits; + pDss->iBsStackLeftBits = pBs->iLeftBits; + pDss->uiLastMbQp = pSlice->uiLastMbQp; + pDss->iMbSkipRunStack = iMbSkipRun; +} +int32_t StashPopMBStatusCavlc (SDynamicSlicingStack* pDss, SSlice* pSlice) { + SBitStringAux* pBs = pSlice->pSliceBsa; + pBs->pCurBuf = pDss->pBsStackBufPtr; + pBs->uiCurBits = pDss->uiBsStackCurBits; + pBs->iLeftBits = pDss->iBsStackLeftBits; + pSlice->uiLastMbQp = pDss->uiLastMbQp; + return pDss->iMbSkipRunStack; +} +void StashMBStatusCabac (SDynamicSlicingStack* pDss, SSlice* pSlice, int32_t iMbSkipRun) { + SCabacCtx* pCtx = &pSlice->sCabacCtx; + memcpy (&pDss->sStoredCabac, pCtx, sizeof (SCabacCtx)); + if (pDss->pRestoreBuffer) { + int32_t iPosBitOffset = GetBsPosCabac (pSlice) - pDss->iStartPos; + int32_t iLen = ((iPosBitOffset >> 3) + ((iPosBitOffset & 0x07) ? 1 : 0)); + memcpy (pDss->pRestoreBuffer, pCtx->m_pBufStart, iLen); + } + pDss->uiLastMbQp = pSlice->uiLastMbQp; + pDss->iMbSkipRunStack = iMbSkipRun; +} +int32_t StashPopMBStatusCabac (SDynamicSlicingStack* pDss, SSlice* pSlice) { + SCabacCtx* pCtx = &pSlice->sCabacCtx; + memcpy (pCtx, &pDss->sStoredCabac, sizeof (SCabacCtx)); + if (pDss->pRestoreBuffer) { + int32_t iPosBitOffset = GetBsPosCabac (pSlice) - pDss->iStartPos; + int32_t iLen = ((iPosBitOffset >> 3) + ((iPosBitOffset & 0x07) ? 1 : 0)); + memcpy (pCtx->m_pBufStart, pDss->pRestoreBuffer, iLen); + } + pSlice->uiLastMbQp = pDss->uiLastMbQp; + return pDss->iMbSkipRunStack; +} +int32_t GetBsPosCavlc (SSlice* pSlice) { + return BsGetBitsPos (pSlice->pSliceBsa); +} +int32_t GetBsPosCabac (SSlice* pSlice) { + return (int32_t) ((pSlice->sCabacCtx.m_pBufCur - pSlice->sCabacCtx.m_pBufStart) << 3) + + (pSlice->sCabacCtx.m_iLowBitCnt - 9); +} +void WelsWriteSliceEndSyn (SSlice* pSlice, bool bEntropyCodingModeFlag) { + SBitStringAux* pBs = pSlice->pSliceBsa; + if (bEntropyCodingModeFlag) { + WelsCabacEncodeFlush (&pSlice->sCabacCtx); + pBs->pCurBuf = WelsCabacEncodeGetPtr (&pSlice->sCabacCtx); + + } else { + BsRbspTrailingBits (pBs); + BsFlush (pBs); + } +} +void InitCoeffFunc (SWelsFuncPtrList* pFuncList, const uint32_t uiCpuFlag, int32_t iEntropyCodingModeFlag) { + pFuncList->pfCavlcParamCal = CavlcParamCal_c; + +#if defined(X86_32_ASM) + if (uiCpuFlag & WELS_CPU_SSE2) { + pFuncList->pfCavlcParamCal = CavlcParamCal_sse2; + } +#endif + +#ifdef X86_ASM + if (uiCpuFlag & WELS_CPU_SSE42) { + pFuncList->pfCavlcParamCal = CavlcParamCal_sse42; + } +#endif + if (iEntropyCodingModeFlag) { + pFuncList->pfStashMBStatus = StashMBStatusCabac; + pFuncList->pfStashPopMBStatus = StashPopMBStatusCabac; + pFuncList->pfWelsSpatialWriteMbSyn = WelsSpatialWriteMbSynCabac; + pFuncList->pfGetBsPosition = GetBsPosCabac; + } else { + pFuncList->pfStashMBStatus = StashMBStatusCavlc; + pFuncList->pfStashPopMBStatus = StashPopMBStatusCavlc; + pFuncList->pfWelsSpatialWriteMbSyn = WelsSpatialWriteMbSyn; + pFuncList->pfGetBsPosition = GetBsPosCavlc; + } +} + + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/slice_multi_threading.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/slice_multi_threading.cpp new file mode 100644 index 000000000..da4fe3d61 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/slice_multi_threading.cpp @@ -0,0 +1,640 @@ +/*! + * \copy + * Copyright (c) 2010-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file slice_multi_threading.h + * + * \brief pSlice based multiple threading + * + * \date 04/16/2010 Created + * + ************************************************************************************* + */ + + +#include +#if !defined(_WIN32) +#include +#include +#endif//!_WIN32 +#ifndef SEM_NAME_MAX +// length of semaphore name should be system constrained at least on mac 10.7 +#define SEM_NAME_MAX 32 +#endif//SEM_NAME_MAX +#include "slice_multi_threading.h" +#include "mt_defs.h" +#include "nal_encap.h" +#include "utils.h" +#include "encoder.h" +#include "svc_encode_slice.h" +#include "deblocking.h" +#include "svc_enc_golomb.h" +#include "crt_util_safe_x.h" // for safe crt like calls +#include "rc.h" + +#include "cpu.h" + +#include "measure_time.h" +#include "wels_task_management.h" + +#if defined(ENABLE_TRACE_MT) +#define MT_TRACE_LOG(pLog, x, ...) WelsLog(pLog, x, __VA_ARGS__) +#else +#define MT_TRACE_LOG(x, ...) +#endif + +namespace WelsEnc { +void UpdateMbListNeighborParallel (SDqLayer* pCurDq, + SMB* pMbList, + const int32_t uiSliceIdc) { + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + const int32_t kiMbWidth = pSliceCtx->iMbWidth; + int32_t iIdx = pCurDq->pFirstMbIdxOfSlice[uiSliceIdc]; + const int32_t kiEndMbInSlice = iIdx + pCurDq->pCountMbNumInSlice[uiSliceIdc] - 1; + + do { + UpdateMbNeighbor (pCurDq, &pMbList[iIdx], kiMbWidth, uiSliceIdc); + ++ iIdx; + } while (iIdx <= kiEndMbInSlice); +} + +void CalcSliceComplexRatio (SDqLayer* pCurDq) { + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + SSlice** ppSliceInLayer = pCurDq->ppSliceInLayer; + int32_t iSumAv = 0; + const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; + int32_t iSliceIdx = 0; + int32_t iAvI[MAX_SLICES_NUM]; + + assert (kiSliceCount <= MAX_SLICES_NUM); + WelsEmms(); + + while (iSliceIdx < kiSliceCount) { + iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice, + ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime); + MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d", + iSliceIdx, + ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime, ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice); + iSumAv += iAvI[iSliceIdx]; + + ++ iSliceIdx; + } + while (-- iSliceIdx >= 0) { + ppSliceInLayer[iSliceIdx]->iSliceComplexRatio = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv); + } +} + +int32_t NeedDynamicAdjust (SSlice** ppSliceInLayer, const int32_t iSliceNum) { + if (NULL == ppSliceInLayer) { + return false; + } + + uint32_t uiTotalConsume = 0; + int32_t iSliceIdx = 0; + int32_t iNeedAdj = false; + + WelsEmms(); + + while (iSliceIdx < iSliceNum) { + if (NULL == ppSliceInLayer[iSliceIdx]) { + return false; + } + + uiTotalConsume += ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime; + iSliceIdx ++; + } + if (uiTotalConsume == 0) { + MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, + "[MT] NeedDynamicAdjust(), herein do no adjust due first picture, iCountSliceNum= %d", + iSliceNum); + return false; + } + + iSliceIdx = 0; + float fThr = EPSN; // threshold for various cores cases + float fRmse = .0f; // root mean square error of pSlice consume ratios + const float kfMeanRatio = 1.0f / iSliceNum; + do { + const float fRatio = 1.0f * ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime / uiTotalConsume; + const float fDiffRatio = fRatio - kfMeanRatio; + fRmse += (fDiffRatio * fDiffRatio); + ++ iSliceIdx; + } while (iSliceIdx + 1 < iSliceNum); + fRmse = sqrtf (fRmse / iSliceNum); + if (iSliceNum >= 8) { + fThr += THRESHOLD_RMSE_CORE8; + } else if (iSliceNum >= 4) { + fThr += THRESHOLD_RMSE_CORE4; + } else if (iSliceNum >= 2) { + fThr += THRESHOLD_RMSE_CORE2; + } else + fThr = 1.0f; + if (fRmse > fThr) + iNeedAdj = true; + MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, + "[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d", + iNeedAdj, fRmse, fThr, iSliceNum); + + return iNeedAdj; +} + +void DynamicAdjustSlicing (sWelsEncCtx* pCtx, + SDqLayer* pCurDqLayer, + int32_t iCurDid) { + SSliceCtx* pSliceCtx = &pCurDqLayer->sSliceEncCtx; + SSlice** ppSliceInLayer = pCurDqLayer->ppSliceInLayer; + const int32_t kiCountSliceNum = pSliceCtx->iSliceNumInFrame; + const int32_t kiCountNumMb = pSliceCtx->iMbNumInFrame; + int32_t iMinimalMbNum = + pSliceCtx->iMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required + int32_t iMaximalMbNum = 0; // dynamically assign later + int32_t iMbNumLeft = kiCountNumMb; + int32_t iRunLen[MAX_THREADS_NUM] = {0}; + int32_t iSliceIdx = 0; + + int32_t iNumMbInEachGom = 0; + SWelsSvcRc* pWelsSvcRc = &pCtx->pWelsSvcRc[iCurDid]; + if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { + iNumMbInEachGom = pWelsSvcRc->iNumberMbGom; + + if (iNumMbInEachGom <= 0) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d", iNumMbInEachGom, + iCurDid, kiCountNumMb); + return; + } + + // do not adjust in case no extra iNumMbInEachGom based left for slicing adjustment, + // extra MB of non integrated GOM assigned at the last pSlice in default, keep up on early initial result. + if (iNumMbInEachGom * kiCountSliceNum >= kiCountNumMb) { + return; + } + iMinimalMbNum = iNumMbInEachGom; + } + + if (kiCountSliceNum < 2 || (kiCountSliceNum & 0x01)) // we need suppose uiSliceNum is even for multiple threading + return; + + iMaximalMbNum = kiCountNumMb - (kiCountSliceNum - 1) * iMinimalMbNum; + + WelsEmms(); + + MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iDid= %d, iCountNumMb= %d", iCurDid, + kiCountNumMb); + + iSliceIdx = 0; + while (iSliceIdx + 1 < kiCountSliceNum) { + int32_t iNumMbAssigning = WELS_DIV_ROUND (kiCountNumMb * ppSliceInLayer[iSliceIdx]->iSliceComplexRatio, INT_MULTIPLY); + + // GOM boundary aligned + if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { + iNumMbAssigning = iNumMbAssigning / iNumMbInEachGom * iNumMbInEachGom; + } + + // make sure one GOM at least in each pSlice for safe + if (iNumMbAssigning < iMinimalMbNum) + iNumMbAssigning = iMinimalMbNum; + else if (iNumMbAssigning > iMaximalMbNum) + iNumMbAssigning = iMaximalMbNum; + + assert (iNumMbAssigning > 0); + + iMbNumLeft -= iNumMbAssigning; + if (iMbNumLeft <= 0) { // error due to we can not support slice_skip now yet, do not adjust this time + assert (0); + return; + } + iRunLen[iSliceIdx] = iNumMbAssigning; + MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, iSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", + iSliceIdx, ppSliceInLayer[iSliceIdx]->iSliceComplexRatio * 1.0f / INT_MULTIPLY, + ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice, + iNumMbAssigning); + ++ iSliceIdx; + iMaximalMbNum = iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts + } + iRunLen[iSliceIdx] = iMbNumLeft; + MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", + iSliceIdx, ppSliceInLayer[iSliceIdx]->iSliceComplexRatio * 1.0f / INT_MULTIPLY, + ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice, iMbNumLeft); + pCurDqLayer->bNeedAdjustingSlicing = !DynamicAdjustSlicePEncCtxAll (pCurDqLayer, iRunLen); +} + +int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen, + const int32_t iMaxSliceBufferSize, bool bDynamicSlice) { + CMemoryAlign* pMa = NULL; + SWelsSvcCodingParam* pPara = NULL; + SSliceThreading* pSmt = NULL; + int32_t iNumSpatialLayers = 0; + int32_t iThreadNum = 0; + int32_t iIdx = 0; + int32_t iReturn = ENC_RETURN_SUCCESS; + + if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0) + return 1; +#if defined(ENABLE_TRACE_MT) + SLogContext* pLogCtx = & ((*ppCtx)->sLogCtx); +#endif + pMa = (*ppCtx)->pMemAlign; + pPara = pCodingParam; + iNumSpatialLayers = pPara->iSpatialLayerNum; + iThreadNum = pPara->iMultipleThreadIdc; + + assert (iThreadNum > 0); + + pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading"); + WELS_VERIFY_RETURN_IF (1, (NULL == pSmt)) + memset (pSmt, 0, sizeof (SSliceThreading)); + (*ppCtx)->pSliceThreading = pSmt; + pSmt->pThreadPEncCtx = (SSliceThreadPrivateData*)pMa->WelsMalloc (sizeof (SSliceThreadPrivateData) * iThreadNum, + "pThreadPEncCtx"); + WELS_VERIFY_RETURN_IF (1, (NULL == pSmt->pThreadPEncCtx)) + +#ifdef _WIN32 + // Dummy event namespace, the windows events don't actually use this + WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p", (void*) *ppCtx); +#else + WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p%x", (void*) *ppCtx, getpid()); +#endif//!_WIN32 + +#ifdef MT_DEBUG + // file handle for MT debug + pSmt->pFSliceDiff = NULL; + + if (pSmt->pFSliceDiff) { + fclose (pSmt->pFSliceDiff); + pSmt->pFSliceDiff = NULL; + } + pSmt->pFSliceDiff = fopen ("slice_time.txt", "wt+"); +#endif//MT_DEBUG + + MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "encpEncCtx= 0x%p", (void*) *ppCtx); + + char name[SEM_NAME_MAX] = {0}; + WELS_GCC_UNUSED WELS_THREAD_ERROR_CODE err = 0; + + iIdx = 0; + while (iIdx < iThreadNum) { + pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) *ppCtx; + pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx; + pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx; + pSmt->pThreadHandles[iIdx] = 0; + + // length of semaphore name should be system constrained at least on mac 10.7 + WelsSnprintf (name, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace); + err = WelsEventOpen (&pSmt->pUpdateMbListEvent[iIdx], name); + MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); + WelsSnprintf (name, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace); + err = WelsEventOpen (&pSmt->pFinUpdateMbListEvent[iIdx], name); + MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pFinUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, + errno); + WelsSnprintf (name, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace); + err = WelsEventOpen (&pSmt->pSliceCodedEvent[iIdx], name); + MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); + WelsSnprintf (name, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace); + err = WelsEventOpen (&pSmt->pReadySliceCodingEvent[iIdx], name); + MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx, + (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno); + ++ iIdx; + } + + WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace); + err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name); + MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno); + + iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate); + WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn)) + + (*ppCtx)->pTaskManage = IWelsTaskManage::CreateTaskManage (*ppCtx, iNumSpatialLayers, bDynamicSlice); + WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pTaskManage)) + + int32_t iThreadBufferNum = WELS_MIN ((*ppCtx)->pTaskManage->GetThreadPoolThreadNum(), MAX_THREADS_NUM); + + for (iIdx = 0; iIdx < iThreadBufferNum; iIdx++) { + pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMallocz (iCountBsLen, "pSmt->pThreadBsBuffer"); + WELS_VERIFY_RETURN_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx])) + } + iReturn = WelsMutexInit (&pSmt->mutexThreadBsBufferUsage); + WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)) + + iReturn = WelsMutexInit (&pSmt->mutexEvent); + WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)); + + iReturn = WelsMutexInit (&pSmt->mutexThreadSlcBuffReallocate); + WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)) + + iReturn = WelsMutexInit (& (*ppCtx)->mutexEncoderError); + WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn)) + + MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iMultipleThreadIdc= %d", + pPara->iMultipleThreadIdc, + (*ppCtx)->iMaxSliceCount); + return 0; +} + +void ReleaseMtResource (sWelsEncCtx** ppCtx) { + SSliceThreading* pSmt = NULL; + CMemoryAlign* pMa = NULL; + int32_t iIdx = 0; + int32_t iThreadNum = 0; + + if (NULL == ppCtx || NULL == *ppCtx) + return; + + pMa = (*ppCtx)->pMemAlign; + iThreadNum = (*ppCtx)->pSvcParam->iMultipleThreadIdc; + pSmt = (*ppCtx)->pSliceThreading; + + if (NULL == pSmt) + return; + + char ename[SEM_NAME_MAX] = {0}; + while (iIdx < iThreadNum) { + // length of semaphore name should be system constrained at least on mac 10.7 + WelsSnprintf (ename, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace); + WelsEventClose (&pSmt->pSliceCodedEvent[iIdx], ename); + WelsSnprintf (ename, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace); + WelsEventClose (&pSmt->pReadySliceCodingEvent[iIdx], ename); + WelsSnprintf (ename, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace); + WelsEventClose (&pSmt->pUpdateMbListEvent[iIdx], ename); + WelsSnprintf (ename, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace); + WelsEventClose (&pSmt->pFinUpdateMbListEvent[iIdx], ename); + + ++ iIdx; + } + WelsSnprintf (ename, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace); + WelsEventClose (&pSmt->pSliceCodedMasterEvent, ename); + + WelsMutexDestroy (&pSmt->mutexSliceNumUpdate); + WelsMutexDestroy (&pSmt->mutexThreadBsBufferUsage); + WelsMutexDestroy (&pSmt->mutexThreadSlcBuffReallocate); + WelsMutexDestroy (& ((*ppCtx)->mutexEncoderError)); + WelsMutexDestroy (&pSmt->mutexEvent); + if (pSmt->pThreadPEncCtx != NULL) { + pMa->WelsFree (pSmt->pThreadPEncCtx, "pThreadPEncCtx"); + pSmt->pThreadPEncCtx = NULL; + } + + for (int i = 0; i < MAX_THREADS_NUM; i++) { + if (pSmt->pThreadBsBuffer[i]) { + pMa->WelsFree (pSmt->pThreadBsBuffer[i], "pSmt->pThreadBsBuffer"); + pSmt->pThreadBsBuffer[i] = NULL; + } + } + memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool)); + + if ((*ppCtx)->pTaskManage != NULL) { + WELS_DELETE_OP ((*ppCtx)->pTaskManage); + } + +#ifdef MT_DEBUG + // file handle for debug + if (pSmt->pFSliceDiff) { + fclose (pSmt->pFSliceDiff); + pSmt->pFSliceDiff = NULL; + } +#endif//MT_DEBUG + pMa->WelsFree ((*ppCtx)->pSliceThreading, "SSliceThreading"); + (*ppCtx)->pSliceThreading = NULL; +} + +int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t iSliceCount) { + SSlice** ppSliceInlayer = pCtx->pCurDqLayer->ppSliceInLayer; + SWelsSliceBs* pSliceBs = NULL; + int32_t iLayerSize = 0; + int32_t iNalIdxBase = pLbi->iNalCount; + int32_t iSliceIdx = 0; + + iNalIdxBase = pLbi->iNalCount = 0; + while (iSliceIdx < iSliceCount) { + pSliceBs = &ppSliceInlayer[iSliceIdx]->sSliceBs; + if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) { + int32_t iNalIdx = 0; + const int32_t iCountNal = pSliceBs->iNalIndex; + +#if MT_DEBUG_BS_WR + assert (pSliceBs->bSliceCodedFlag); +#endif//MT_DEBUG_BS_WR + + memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage + pCtx->iPosBsBuffer += pSliceBs->uiBsPos; + + iLayerSize += pSliceBs->uiBsPos; + + while (iNalIdx < iCountNal) { + pLbi->pNalLengthInByte[iNalIdxBase + iNalIdx] = pSliceBs->iNalLen[iNalIdx]; + ++ iNalIdx; + } + pLbi->iNalCount += iCountNal; + iNalIdxBase += iCountNal; + } + ++ iSliceIdx; + } + + return iLayerSize; +} + +int32_t WriteSliceBs (sWelsEncCtx* pCtx, SWelsSliceBs* pSliceBs, const int32_t iSliceIdx, int32_t& iSliceSize) { + const int32_t kiNalCnt = pSliceBs->iNalIndex; + int32_t iNalIdx = 0; + int32_t iNalSize = 0; + int32_t iReturn = ENC_RETURN_SUCCESS; + int32_t iTotalLeftLength = pSliceBs->uiSize - pSliceBs->uiBsPos; + SNalUnitHeaderExt* pNalHdrExt = &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt; + uint8_t* pDst = pSliceBs->pBs; + + assert (kiNalCnt <= 2); + if (kiNalCnt > 2) + return 0; + + iSliceSize = 0; + while (iNalIdx < kiNalCnt) { + iNalSize = 0; + iReturn = WelsEncodeNal (&pSliceBs->sNalList[iNalIdx], pNalHdrExt, iTotalLeftLength - iSliceSize, + pDst, &iNalSize); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + pSliceBs->iNalLen[iNalIdx] = iNalSize; + iSliceSize += iNalSize; + pDst += iNalSize; + ++ iNalIdx; + } + pSliceBs->uiBsPos = iSliceSize; + + return iReturn; +} + +// thread process for coding one pSlice +int32_t DynamicDetectCpuCores() { + WelsLogicalProcessInfo info; + WelsQueryLogicalProcessInfo (&info); + return info.ProcessorCount; +} + +int32_t AdjustBaseLayer (sWelsEncCtx* pCtx) { + SDqLayer* pCurDq = pCtx->ppDqLayerList[0]; + int32_t iNeedAdj = 1; +#ifdef MT_DEBUG + int64_t iT0 = WelsTime(); +#endif//MT_DEBUG + + pCtx->pCurDqLayer = pCurDq; + + // do not need adjust due to not different at both slices of consumed time + iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[0]->ppSliceInLayer + , pCurDq->sSliceEncCtx.iSliceNumInFrame); + if (iNeedAdj) + DynamicAdjustSlicing (pCtx, + pCurDq, + 0); +#ifdef MT_DEBUG + iT0 = WelsTime() - iT0; + if (pCtx->pSliceThreading->pFSliceDiff) { + fprintf (pCtx->pSliceThreading->pFSliceDiff, + "%6" PRId64" us adjust time at base spatial layer, iNeedAdj %d, DynamicAdjustSlicing()\n", + iT0, iNeedAdj); + } +#endif//MT_DEBUG + + return iNeedAdj; +} + +int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid) { +#ifdef MT_DEBUG + int64_t iT1 = WelsTime(); +#endif//MT_DEBUG + int32_t iNeedAdj = 1; + // uiSliceMode of referencing spatial should be SM_FIXEDSLCNUM_SLICE + // if using spatial base layer for complexity estimation + + const bool kbModelingFromSpatial = (pCtx->pCurDqLayer->pRefLayer != NULL && iCurDid > 0) + && (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE + && pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid - + 1].sSliceArgument.uiSliceNum); + + if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation + // do not need adjust due to not different at both slices of consumed time + iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid - 1]->ppSliceInLayer, + pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame); + if (iNeedAdj) + DynamicAdjustSlicing (pCtx, + pCtx->pCurDqLayer, + iCurDid + ); + } else { // use temporal layer for complexity estimation + // do not need adjust due to not different at both slices of consumed time + iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid]->ppSliceInLayer, + pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame); + if (iNeedAdj) + DynamicAdjustSlicing (pCtx, + pCtx->pCurDqLayer, + iCurDid + ); + } + +#ifdef MT_DEBUG + iT1 = WelsTime() - iT1; + if (pCtx->pSliceThreading->pFSliceDiff) { + fprintf (pCtx->pSliceThreading->pFSliceDiff, + "%6" PRId64" us adjust time at spatial layer %d, iNeedAdj %d, DynamicAdjustSlicing()\n", + iT1, iCurDid, iNeedAdj); + } +#endif//MT_DEBUG + + return iNeedAdj; +} + + + +#if defined(MT_DEBUG) +void TrackSliceComplexities (sWelsEncCtx* pCtx, const int32_t iCurDid) { + const int32_t kiCountSliceNum = pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame; + SSlice** ppSliceInLayer = pCtx->pCurDqLayer->ppSliceInLayer; + if (kiCountSliceNum > 0) { + int32_t iSliceIdx = 0; + do { + fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6.3f complexity pRatio at iDid %d pSlice %d\n", + ppSliceInLayer[iSliceIdx]->iSliceComplexRatio, iCurDid, iSliceIdx); + ++ iSliceIdx; + } while (iSliceIdx < kiCountSliceNum); + } +} +#endif + +#if defined(MT_DEBUG) +void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t iSpatialNum) { + SWelsSvcCodingParam* pPara = NULL; + int32_t iSpatialIdx = 0; + + if (iSpatialNum > MAX_DEPENDENCY_LAYER) + return; + + pPara = pCtx->pSvcParam; + while (iSpatialIdx < iSpatialNum) { + const int32_t kiDid = pDidList[iSpatialIdx]; + SSliceArgument* pSliceArgument = &pPara->sSpatialLayers[kiDid].sSliceArgument; + SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid]; + SSlice** ppSliceInLayer = pCurDq->ppSliceInLayer; + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame; + if (pCtx->pSliceThreading) { + if (pCtx->pSliceThreading->pFSliceDiff + && ((pSliceArgument->uiSliceMode == SM_FIXEDSLCNUM_SLICE) || (pSliceArgument->uiSliceMode == SM_SIZELIMITED_SLICE)) + && pPara->iMultipleThreadIdc > 1 + && pPara->iMultipleThreadIdc >= kuiCountSliceNum) { + uint32_t i = 0; + uint32_t uiMaxT = 0; + int32_t iMaxI = 0; + while (i < kuiCountSliceNum) { + fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n", + ppSliceInLayer[i]->uiSliceConsumeTime, pCtx->iCodingIndex, kiDid, i /*/ 1000*/); + if (ppSliceInLayer[i]->uiSliceConsumeTime > uiMaxT) { + uiMaxT = ppSliceInLayer[i]->uiSliceConsumeTime; + iMaxI = i; + } + ++ i; + } + fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time_max coding_idx %d iDid %d pSlice %d\n", uiMaxT, + pCtx->iCodingIndex, kiDid, iMaxI /*/ 1000*/); + } + } + ++ iSpatialIdx; + } +} +#endif//#if defined(MT_DEBUG) + +void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, SSlice* pSlice) { + SWelsSliceBs* pSliceBs = &pSlice->sSliceBs; + pSliceBs->pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx]; + pSliceBs->uiBsPos = 0; +} +} + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_base_layer_md.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_base_layer_md.cpp new file mode 100644 index 000000000..5acc2d922 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_base_layer_md.cpp @@ -0,0 +1,2041 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_base_layer_md.c + * + * \brief mode decision + * + * \date 2009.08.10 Created + * + ************************************************************************************* + */ +#include "ls_defines.h" +#include "mv_pred.h" +#include "svc_enc_golomb.h" +#include "svc_base_layer_md.h" +#include "encoder.h" +#include "svc_encode_mb.h" +#include "svc_encode_slice.h" +namespace WelsEnc { +static const ALIGNED_DECLARE (int8_t, g_kiIntra16AvaliMode[8][5], 16) = { + { I16_PRED_DC_128, I16_PRED_INVALID, I16_PRED_INVALID, I16_PRED_INVALID, 1 }, + { I16_PRED_DC_L, I16_PRED_H, I16_PRED_INVALID, I16_PRED_INVALID, 2 }, + { I16_PRED_DC_T, I16_PRED_V, I16_PRED_INVALID, I16_PRED_INVALID, 2 }, + { I16_PRED_V, I16_PRED_H, I16_PRED_DC, I16_PRED_INVALID, 3 }, + { I16_PRED_DC_128, I16_PRED_INVALID, I16_PRED_INVALID, I16_PRED_INVALID, 1 }, + { I16_PRED_DC_L, I16_PRED_H, I16_PRED_INVALID, I16_PRED_INVALID, 2 }, + { I16_PRED_DC_T, I16_PRED_V, I16_PRED_INVALID, I16_PRED_INVALID, 2 }, + { I16_PRED_V, I16_PRED_H, I16_PRED_DC, I16_PRED_P, 4 } +}; + +static const ALIGNED_DECLARE (uint8_t, g_kiIntra4AvailCount[16], 16) = { +#ifndef I4_PRED_MODE_EXTEND + 1, 3, 2, 4, 1, 3, 2, 7, 1, 3, 4, 6, 1, 3, 4, 9 +#else + 1, 3, 4, 4, 1, 3, 4, 7, 1, 3, 4, 6, 1, 3, 4, 9 +#endif //I4_PRED_MODE_EXTEND +}; + +//left_avail | (top_avail<<1) | (left_top_avail<<2) | (right_top_avail<<3); +static const ALIGNED_DECLARE (uint8_t, g_kiIntra4AvailMode[16][16], 16) = { + { + I4_PRED_DC_128, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0000 + + { + I4_PRED_DC_L, I4_PRED_H, I4_PRED_HU, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0001 + +#ifndef I4_PRED_MODE_EXTEND + { + I4_PRED_DC_T, I4_PRED_V, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0010 +#else + { + I4_PRED_DC_T, I4_PRED_V, I4_PRED_DDL_TOP, I4_PRED_VL_TOP, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0010 +#endif //I4_PRED_MODE_EXTEND + + { + I4_PRED_DC, I4_PRED_H, I4_PRED_V, I4_PRED_HU, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0011 + + { + I4_PRED_DC_128, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0100 + + { + I4_PRED_DC_L, I4_PRED_H, I4_PRED_HU, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0101 + +#ifndef I4_PRED_MODE_EXTEND + { + I4_PRED_DC_T, I4_PRED_V, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0110 +#else + { + I4_PRED_DC_T, I4_PRED_V, I4_PRED_DDL_TOP, I4_PRED_VL_TOP, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0110 +#endif //I4_PRED_MODE_EXTEND + + { + I4_PRED_DC, I4_PRED_H, I4_PRED_V, I4_PRED_HU, + I4_PRED_DDR, I4_PRED_VR, I4_PRED_HD, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 0111 + + { + I4_PRED_DC_128, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 1000 + + { + I4_PRED_DC_L, I4_PRED_H, I4_PRED_HU, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 1001 + + { + I4_PRED_DC_T, I4_PRED_V, I4_PRED_DDL, I4_PRED_VL, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 1010 + + { + I4_PRED_DC, I4_PRED_H, I4_PRED_V, I4_PRED_HU, + I4_PRED_DDL, I4_PRED_VL, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 1011 + + { + I4_PRED_DC_128, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 1100 + + { + I4_PRED_DC_L, I4_PRED_H, I4_PRED_HU, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 1101 + + { + I4_PRED_DC_T, I4_PRED_V, I4_PRED_DDL, I4_PRED_VL, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + }, // 1110 + + { + I4_PRED_DC, I4_PRED_H, I4_PRED_V, I4_PRED_HU, + I4_PRED_DDL, I4_PRED_VL, I4_PRED_DDR, I4_PRED_VR, + I4_PRED_HD, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, + I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID, I4_PRED_INVALID + } // 1111 + +}; +static const ALIGNED_DECLARE (int8_t, g_kiIntraChromaAvailMode[8][5], 16) = { + { C_PRED_DC_128, C_PRED_INVALID, C_PRED_INVALID, C_PRED_INVALID, 1 }, + { C_PRED_DC_L, C_PRED_H, C_PRED_INVALID, C_PRED_INVALID, 2 }, + { C_PRED_DC_T, C_PRED_V, C_PRED_INVALID, C_PRED_INVALID, 2 }, + { C_PRED_V, C_PRED_H, C_PRED_DC, C_PRED_INVALID, 3 }, + { C_PRED_DC_128, C_PRED_INVALID, C_PRED_INVALID, C_PRED_INVALID, 1 }, + { C_PRED_DC_L, C_PRED_H, C_PRED_INVALID, C_PRED_INVALID, 2 }, + { C_PRED_DC_T, C_PRED_V, C_PRED_INVALID, C_PRED_INVALID, 2 }, + { C_PRED_V, C_PRED_H, C_PRED_DC, C_PRED_P, 4 } +}; + +// for cache hit, two table are total sizeof 64 Bytes +const int8_t g_kiCoordinateIdx4x4X[16] = { 0, 4, 0, 4, + 8, 12, 8, 12, + 0, 4, 0, 4, + 8, 12, 8, 12 + }; + +const int8_t g_kiCoordinateIdx4x4Y[16] = { 0, 0, 4, 4, + 0, 0, 4, 4, + 8, 8, 12, 12, + 8, 8, 12, 12 + }; +static const ALIGNED_DECLARE (int8_t, g_kiNeighborIntraToI4x4[16][16], 16) = { + { 0, 1, 10, 7, 1, 1, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 1, 1, 15, 7, 1, 1, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, + { 10, 15, 10, 7, 15, 7, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 11, 15, 15, 7, 15, 7, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, + { 4, 1, 10, 7, 1, 1, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 5, 1, 15, 7, 1, 1, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, + { 14, 15, 10, 7, 15, 7, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 15, 15, 15, 7, 15, 7, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, + { 0, 1, 10, 7, 1, 9, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 1, 1, 15, 7, 1, 9, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, + { 10, 15, 10, 7, 15, 15, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 11, 15, 15, 7, 15, 15, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, + { 4, 1, 10, 7, 1, 9, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 5, 1, 15, 7, 1, 9, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, + { 14, 15, 10, 7, 15, 15, 15, 7, 10, 15, 10, 7, 15, 7, 15, 7}, + { 15, 15, 15, 7, 15, 15, 15, 7, 15, 15, 15, 7, 15, 7, 15, 7}, +}; + +ALIGNED_DECLARE (const int8_t, g_kiMapModeI4x4[14], 16) = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 3, 7 +}; + +int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4) { + int8_t iTopMode = pIntraPredMode[iIdx4 - 8]; + int8_t iLeftMode = pIntraPredMode[iIdx4 - 1]; + int8_t iBestMode; + + if (-1 == iLeftMode || -1 == iTopMode) { + iBestMode = 2; + } else { + iBestMode = WELS_MIN (iLeftMode, iTopMode); + } + return iBestMode; +} + +void WelsMdIntraInit (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, const int32_t iSliceFirstMbXY) { + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + + const int32_t kiMbX = pCurMb->iMbX; + const int32_t kiMbY = pCurMb->iMbY; + const int32_t kiMbXY = pCurMb->iMbXY; + + // step 3. locating current pEnc and pDec + // unroll loops here + if (0 == kiMbX || iSliceFirstMbXY == kiMbXY) { + int32_t iStrideY, iStrideUV; + int32_t iOffsetY, iOffsetUV; + + iStrideY = pCurLayer->iEncStride[0]; + iStrideUV = pCurLayer->iEncStride[1]; + iOffsetY = (kiMbX + kiMbY * iStrideY) << 4; + iOffsetUV = (kiMbX + kiMbY * iStrideUV) << 3; + pMbCache->SPicData.pEncMb[0] = pCurLayer->pEncData[0] + iOffsetY; + pMbCache->SPicData.pEncMb[1] = pCurLayer->pEncData[1] + iOffsetUV; + pMbCache->SPicData.pEncMb[2] = pCurLayer->pEncData[2] + iOffsetUV; + + iStrideY = pCurLayer->iCsStride[0]; + iStrideUV = pCurLayer->iCsStride[1]; + iOffsetY = (kiMbX + kiMbY * iStrideY) << 4; + iOffsetUV = (kiMbX + kiMbY * iStrideUV) << 3; + pMbCache->SPicData.pCsMb[0] = pCurLayer->pCsData[0] + iOffsetY; + pMbCache->SPicData.pCsMb[1] = pCurLayer->pCsData[1] + iOffsetUV; + pMbCache->SPicData.pCsMb[2] = pCurLayer->pCsData[2] + iOffsetUV; + + iStrideY = pCurLayer->pDecPic->iLineSize[0]; + iStrideUV = pCurLayer->pDecPic->iLineSize[1]; + iOffsetY = (kiMbX + kiMbY * iStrideY) << 4; + iOffsetUV = (kiMbX + kiMbY * iStrideUV) << 3; + pMbCache->SPicData.pDecMb[0] = pCurLayer->pDecPic->pData[0] + iOffsetY; + pMbCache->SPicData.pDecMb[1] = pCurLayer->pDecPic->pData[1] + iOffsetUV; + pMbCache->SPicData.pDecMb[2] = pCurLayer->pDecPic->pData[2] + iOffsetUV; + } else { + pMbCache->SPicData.pEncMb[0] += MB_WIDTH_LUMA; + pMbCache->SPicData.pEncMb[1] += MB_WIDTH_CHROMA; + pMbCache->SPicData.pEncMb[2] += MB_WIDTH_CHROMA; + + pMbCache->SPicData.pDecMb[0] += MB_WIDTH_LUMA; + pMbCache->SPicData.pDecMb[1] += MB_WIDTH_CHROMA; + pMbCache->SPicData.pDecMb[2] += MB_WIDTH_CHROMA; + + pMbCache->SPicData.pCsMb[0] += MB_WIDTH_LUMA; + pMbCache->SPicData.pCsMb[1] += MB_WIDTH_CHROMA; + pMbCache->SPicData.pCsMb[2] += MB_WIDTH_CHROMA; + } + + //step 2. initial pWelsMd + pCurMb->uiCbp = 0; + + //step 4: locating scaled_tcoeff + + //step 1. load neighbor cache + FillNeighborCacheIntra (pMbCache, pCurMb, pCurLayer->iMbWidth); + pMbCache->pMemPredLuma = pMbCache->pMemPredMb;// in WelsMdI16x16() will be changed, so re-init here! + pMbCache->pMemPredChroma = pMbCache->pMemPredMb + + 256;// Init with default, maybe change in WelsMdI16x16 and svc_md_i16x16_sad +} + +void WelsMdInterInit (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, const int32_t iSliceFirstMbXY) { + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + const int32_t kiMbX = pCurMb->iMbX; + const int32_t kiMbY = pCurMb->iMbY; + const int32_t kiMbXY = pCurMb->iMbXY; + const int32_t kiMbWidth = pCurLayer->iMbWidth; + const int32_t kiMbHeight = pCurLayer->iMbHeight; + + pMbCache->pEncSad = &pCurLayer->pDecPic->pMbSkipSad[kiMbXY]; + + //step 1. load neighbor cache + pEncCtx->pFuncList->pfFillInterNeighborCache (pMbCache, pCurMb, kiMbWidth, + pEncCtx->pVaa->pVaaBackgroundMbFlag + kiMbXY); //BGD spatial pFunc + + //step 3: initial cost + + //step 4. locating current p_ref + // merge loops + if (0 == kiMbX || iSliceFirstMbXY == kiMbXY) { + const int32_t kiRefStrideY = pCurLayer->pRefPic->iLineSize[0]; + const int32_t kiRefStrideUV = pCurLayer->pRefPic->iLineSize[1]; + const int32_t kiCurStrideY = (kiMbX + kiMbY * kiRefStrideY) << 4; + const int32_t kiCurStrideUV = (kiMbX + kiMbY * kiRefStrideUV) << 3; + pMbCache->SPicData.pRefMb[0] = pCurLayer->pRefPic->pData[0] + kiCurStrideY; + pMbCache->SPicData.pRefMb[1] = pCurLayer->pRefPic->pData[1] + kiCurStrideUV; + pMbCache->SPicData.pRefMb[2] = pCurLayer->pRefPic->pData[2] + kiCurStrideUV; + } else { + pMbCache->SPicData.pRefMb[0] += MB_WIDTH_LUMA; + pMbCache->SPicData.pRefMb[1] += MB_WIDTH_CHROMA; + pMbCache->SPicData.pRefMb[2] += MB_WIDTH_CHROMA; + } + + pMbCache->uiRefMbType = pCurLayer->pRefPic->uiRefMbType[kiMbXY]; + pMbCache->bCollocatedPredFlag = false; + + //comment: sometimes, mode decision process may skip the md_p16x16 and md_pskip function, + ST32 (&pCurMb->sP16x16Mv, 0); + ST32 (&pCurLayer->pDecPic->sMvList[kiMbXY], 0); + + SetMvWithinIntegerMvRange (kiMbWidth, kiMbHeight, kiMbX, kiMbY, pEncCtx->iMvRange, & (pSlice->sMvStartMin), + & (pSlice->sMvStartMax)); +} + +int32_t WelsMdI16x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SMbCache* pMbCache, int32_t iLambda) { + const int8_t* kpAvailMode; + int32_t iAvailCount; + int32_t iIdx = 0; + uint8_t* pPredI16x16[2] = {pMbCache->pMemPredMb, pMbCache->pMemPredMb + 256}; + uint8_t* pDst = pPredI16x16[0]; + uint8_t* pDec = pMbCache->SPicData.pCsMb[0]; + uint8_t* pEnc = pMbCache->SPicData.pEncMb[0]; + int32_t iLineSizeDec = pCurDqLayer->iCsStride[0]; + int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0]; + int32_t i, iCurCost, iCurMode, iBestMode, iBestCost = INT_MAX; + + int32_t iOffset = pMbCache->uiNeighborIntra & 0x07; + iAvailCount = g_kiIntra16AvaliMode[iOffset][4]; + kpAvailMode = g_kiIntra16AvaliMode[iOffset]; + if (iAvailCount > 3 && pFunc->sSampleDealingFuncs.pfIntra16x16Combined3) { + iBestCost = pFunc->sSampleDealingFuncs.pfIntra16x16Combined3 (pDec, iLineSizeDec, pEnc, iLineSizeEnc, &iBestMode, + iLambda, pDst/*temp*/); + iCurMode = kpAvailMode[3]; + pFunc->pfGetLumaI16x16Pred[iCurMode] (pDst, pDec, iLineSizeDec); + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_16x16] (pDst, 16, pEnc, iLineSizeEnc) + iLambda * 4 ; + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + } else { + pFunc->pfGetLumaI16x16Pred[iBestMode] (pDst, pDec, iLineSizeDec); + } + iIdx = 1; + iBestCost += iLambda; + } else { + iBestMode = kpAvailMode[0]; + for (i = 0; i < iAvailCount; ++ i) { + iCurMode = kpAvailMode[i]; + + assert (iCurMode >= 0 && iCurMode < 7); + + pFunc->pfGetLumaI16x16Pred[iCurMode] (pDst, pDec, iLineSizeDec); + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_16x16] (pDst, 16, pEnc, iLineSizeEnc); + iCurCost += iLambda * (BsSizeUE (g_kiMapModeI16x16[iCurMode])); + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iIdx = iIdx ^ 0x01; + pDst = pPredI16x16[iIdx]; + } + } + } + pMbCache->pMemPredChroma = pPredI16x16[iIdx]; + + pMbCache->pMemPredLuma = pPredI16x16[iIdx ^ 0x01]; + pMbCache->uiLumaI16x16Mode = iBestMode; + return iBestCost; +} +int32_t WelsMdI4x4 (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + int32_t iLambda = pWelsMd->iLambda; + int32_t iBestCostLuma = pWelsMd->iCostLuma; + uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0]; + uint8_t* pDecMb = pMbCache->SPicData.pCsMb[0]; + const int32_t kiLineSizeEnc = pCurDqLayer->iEncStride[0]; + const int32_t kiLineSizeDec = pCurDqLayer->iCsStride[0]; + + uint8_t* pCurEnc, *pCurDec, *pDst; + + int32_t iPredMode, iCurMode, iBestMode, iFinalMode; + int32_t iCurCost, iBestCost; + int32_t iAvailCount; + const uint8_t* kpAvailMode; + int32_t i, j, iCoordinateX, iCoordinateY, iIdxStrideEnc, iIdxStrideDec; + int32_t lambda[2] = {iLambda << 2, iLambda}; + bool* pPrevIntra4x4PredModeFlag = pMbCache->pPrevIntra4x4PredModeFlag; + int8_t* pRemIntra4x4PredModeFlag = pMbCache->pRemIntra4x4PredModeFlag; + const uint8_t* kpIntra4x4AvailCount = &g_kiIntra4AvailCount[0]; + const uint8_t* kpCache48CountScan4 = &g_kuiCache48CountScan4Idx[0]; + const int8_t* kpNeighborIntraToI4x4 = g_kiNeighborIntraToI4x4[pMbCache->uiNeighborIntra]; + const int8_t* kpCoordinateIdxX = &g_kiCoordinateIdx4x4X[0]; + const int8_t* kpCoordinateIdxY = &g_kiCoordinateIdx4x4Y[0]; + int32_t iBestPredBufferNum = 0; + int32_t iCosti4x4 = 0; + +#if defined(X86_ASM) + WelsPrefetchZero_mmx (g_kiMapModeI4x4); + WelsPrefetchZero_mmx ((int8_t*)&pFunc->pfGetLumaI4x4Pred); +#endif//X86_ASM + + for (i = 0; i < 16; i++) { + const int32_t kiOffset = kpNeighborIntraToI4x4[i]; + + //step 1: locating current 4x4 block position in pEnc and pDecMb + iCoordinateX = kpCoordinateIdxX[i]; + iCoordinateY = kpCoordinateIdxY[i]; + + iIdxStrideEnc = (iCoordinateY * kiLineSizeEnc) + iCoordinateX; + pCurEnc = pEncMb + iIdxStrideEnc; + iIdxStrideDec = (iCoordinateY * kiLineSizeDec) + iCoordinateX; + pCurDec = pDecMb + iIdxStrideDec; + + //step 2: get predicted mode from neighbor + iPredMode = PredIntra4x4Mode (pMbCache->iIntraPredMode, kpCache48CountScan4[i]); + + //step 3: collect candidates of iPredMode + iAvailCount = kpIntra4x4AvailCount[kiOffset]; + kpAvailMode = g_kiIntra4AvailMode[kiOffset]; + + //step 4: gain the best pred mode + iBestCost = INT_MAX; + iBestMode = kpAvailMode[0]; + + if (pFunc->sSampleDealingFuncs.pfIntra4x4Combined3 && (iAvailCount >= 6)) { + pDst = &pMbCache->pMemPredBlk4[iBestPredBufferNum << 4]; + + iBestCost = pFunc->sSampleDealingFuncs.pfIntra4x4Combined3 (pCurDec, kiLineSizeDec, pCurEnc, kiLineSizeEnc, pDst, + &iBestMode, + lambda[iPredMode == 2], lambda[iPredMode == 1], lambda[iPredMode == 0]); + // ST64(&pMbCache->pMemPredBlk4[iBestMode<<4], LD64(mem_pred_blk4_temp)); + // ST64(&pMbCache->pMemPredBlk4[8+(iBestMode<<4)], LD64(mem_pred_blk4_temp+8)); + + for (j = 3; j < iAvailCount; ++ j) { + iCurMode = kpAvailMode[j]; + + assert (iCurMode >= 0 && iCurMode < 14); + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCurCost = pFunc->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + } + } else { + for (j = 0; j < iAvailCount; ++ j) { + iCurMode = kpAvailMode[j]; + + assert (iCurMode >= 0 && iCurMode < 14); + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCurCost = pFunc->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + } + } + pMbCache->pBestPredI4x4Blk4 = &pMbCache->pMemPredBlk4[iBestPredBufferNum << 4]; + iCosti4x4 += iBestCost; + if (iCosti4x4 >= iBestCostLuma) { + break; + } + + //step 5: update pred mode and sample avail cache + iFinalMode = g_kiMapModeI4x4[iBestMode]; + if (iPredMode == iFinalMode) { + *pPrevIntra4x4PredModeFlag++ = true; + } else { + *pPrevIntra4x4PredModeFlag++ = false; + *pRemIntra4x4PredModeFlag = (iFinalMode < iPredMode ? iFinalMode : (iFinalMode - 1)); + } + pRemIntra4x4PredModeFlag++; + // pCurMb->pIntra4x4PredMode[g_kuiMbCountScan4Idx[i]] = iFinalMode; + pMbCache->iIntraPredMode[kpCache48CountScan4[i]] = iFinalMode; + + //step 6: encoding I_4x4 + WelsEncRecI4x4Y (pEncCtx, pCurMb, pMbCache, i); + } + ST32 (pCurMb->pIntra4x4PredMode, LD32 (&pMbCache->iIntraPredMode[33])); + pCurMb->pIntra4x4PredMode[4] = pMbCache->iIntraPredMode[12]; + pCurMb->pIntra4x4PredMode[5] = pMbCache->iIntraPredMode[20]; + pCurMb->pIntra4x4PredMode[6] = pMbCache->iIntraPredMode[28]; + iCosti4x4 += (iLambda << 4) + (iLambda << 3); //4*6*lambda from JVT SATD0 + return iCosti4x4; +} + +int32_t WelsMdI4x4Fast (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + int32_t iLambda = pWelsMd->iLambda; + int32_t iBestCostLuma = pWelsMd->iCostLuma; + uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0]; + uint8_t* pDecMb = pMbCache->SPicData.pCsMb[0]; + const int32_t kiLineSizeEnc = pCurDqLayer->iEncStride[0]; + const int32_t kiLineSizeDec = pCurDqLayer->iCsStride[0]; + + uint8_t* pCurEnc, *pCurDec, *pDst; + int8_t iPredMode, iCurMode, iBestMode, iFinalMode; + int32_t iCurCost, iBestCost; + int32_t iAvailCount; + const uint8_t* kpAvailMode; + int32_t i, j, iCoordinateX, iCoordinateY, iIdxStrideEnc, iIdxStrideDec; + int32_t iCostH, iCostV, iCostVR, iCostHD, iCostVL, iCostHU, iBestModeFake; + int32_t lambda[2] = {iLambda << 2, iLambda}; + bool* pPrevIntra4x4PredModeFlag = pMbCache->pPrevIntra4x4PredModeFlag; + int8_t* pRemIntra4x4PredModeFlag = pMbCache->pRemIntra4x4PredModeFlag; + const uint8_t* kpIntra4x4AvailCount = &g_kiIntra4AvailCount[0]; + const uint8_t* kpCache48CountScan4 = &g_kuiCache48CountScan4Idx[0]; + const int8_t* kpNeighborIntraToI4x4 = g_kiNeighborIntraToI4x4[pMbCache->uiNeighborIntra]; + const int8_t* kpCoordinateIdxX = &g_kiCoordinateIdx4x4X[0]; + const int8_t* kpCoordinateIdxY = &g_kiCoordinateIdx4x4Y[0]; + int32_t iBestPredBufferNum = 0; + int32_t iCosti4x4 = 0; +#if defined(X86_ASM) + WelsPrefetchZero_mmx (g_kiMapModeI4x4); + WelsPrefetchZero_mmx ((int8_t*)&pFunc->pfGetLumaI4x4Pred); +#endif//X86_ASM + + for (i = 0; i < 16; i++) { + const int32_t kiOffset = kpNeighborIntraToI4x4[i]; +// const int32_t i_next = (1+i) & 15; // next loop +// const uint8_t dummy_byte= pIntra4x4AvailCount[pNeighborIntraToI4x4[i_next]]; // prefetch pIntra4x4AvailCount of next loop to avoid cache missed + + //step 1: locating current 4x4 block position in pEnc and pDecMb + iCoordinateX = kpCoordinateIdxX[i]; + iCoordinateY = kpCoordinateIdxY[i]; + + iIdxStrideEnc = (iCoordinateY * kiLineSizeEnc) + iCoordinateX; + pCurEnc = pEncMb + iIdxStrideEnc; + iIdxStrideDec = (iCoordinateY * kiLineSizeDec) + iCoordinateX; + pCurDec = pDecMb + iIdxStrideDec; + + //step 2: get predicted mode from neighbor + iPredMode = PredIntra4x4Mode (pMbCache->iIntraPredMode, kpCache48CountScan4[i]); + //step 3: collect candidates of iPredMode + iAvailCount = kpIntra4x4AvailCount[kiOffset]; + kpAvailMode = g_kiIntra4AvailMode[kiOffset]; + + if (iAvailCount == 9 || iAvailCount == 7) { + //I4_PRED_DC(2) + + iBestMode = I4_PRED_DC; + + pDst = &pMbCache->pMemPredBlk4[iBestPredBufferNum << 4]; + + pFunc->pfGetLumaI4x4Pred[I4_PRED_DC] (pDst, pCurDec, kiLineSizeDec); + iBestCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iBestMode]]; + + //I4_PRED_H(1) + iCurMode = I4_PRED_H; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCostH = iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + + //I4_PRED_V(0) + iCurMode = I4_PRED_V; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCostV = iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + if (iCostV < iCostH) { + if (iAvailCount == 9) { + iBestModeFake = true; //indicating whether V is the best fake mode + + //I4_PRED_VR(5) and I4_PRED_VL(7) + iCurMode = I4_PRED_VR; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCostVR = iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + + if (iCurCost < iCostV) + iBestModeFake = false; + + iCurMode = I4_PRED_VL; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCostVL = iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + + if (iCurCost < iCostV) + iBestModeFake = false; + + //Vertical Early Determination + if (!iBestModeFake) { //Vertical is not the best, go on checking... + //select the best one from VL and VR + if (iCostVR < iCostVL) { + //I4_PRED_DDR(4) + iCurMode = I4_PRED_DDR; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + } else { + //I4_PRED_DDL(3) + iCurMode = I4_PRED_DDL; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + } + } + } else if (iAvailCount == 7) { + iCurMode = I4_PRED_DDR; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + + iCurMode = I4_PRED_VR; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + } + } else { + iBestModeFake = true; //indicating whether H is the best fake mode + //I4_PRED_HD(6) and I4_PRED_HU(8) + iCurMode = I4_PRED_HD; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCostHD = iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + + if (iCurCost < iCostH) + iBestModeFake = false; + + iCurMode = I4_PRED_HU; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCostHU = iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + + if (iCurCost < iCostH) + iBestModeFake = false; + + if (!iBestModeFake) { //Horizontal is not the best, go on checking... + //select the best one from VL and VR + if (iCostHD < iCostHU) { + //I4_PRED_DDR(4) + iCurMode = I4_PRED_DDR; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + } else if (iAvailCount == 9) { + //I4_PRED_DDL(3) + iCurMode = I4_PRED_DDL; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + + } + } + } + } else { + iBestCost = INT_MAX; + iBestMode = I4_PRED_INVALID; + for (j = 0; j < iAvailCount; j++) { + // I4x4_MODE_CHECK(pAvailMode[j], iCurCost); + iCurMode = kpAvailMode[j]; + + pDst = &pMbCache->pMemPredBlk4[ (1 - iBestPredBufferNum) << 4]; + + pFunc->pfGetLumaI4x4Pred[iCurMode] (pDst, pCurDec, kiLineSizeDec); + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_4x4] (pDst, 4, pCurEnc, kiLineSizeEnc) + + lambda[iPredMode == g_kiMapModeI4x4[iCurMode]]; + + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iBestPredBufferNum = 1 - iBestPredBufferNum; + } + } + } + pMbCache->pBestPredI4x4Blk4 = &pMbCache->pMemPredBlk4[iBestPredBufferNum << 4]; + iCosti4x4 += iBestCost; + if (iCosti4x4 >= iBestCostLuma) { + break; + } + + //step 5: update pred mode and sample avail cache + iFinalMode = g_kiMapModeI4x4[iBestMode]; + if (iPredMode == iFinalMode) { + *pPrevIntra4x4PredModeFlag++ = true; + } else { + *pPrevIntra4x4PredModeFlag++ = false; + *pRemIntra4x4PredModeFlag = (iFinalMode < iPredMode ? iFinalMode : (iFinalMode - 1)); + } + pRemIntra4x4PredModeFlag++; + // pCurMb->pIntra4x4PredMode[scan4[i]] = iFinalMode; + pMbCache->iIntraPredMode[kpCache48CountScan4[i]] = iFinalMode; + //step 6: encoding I_4x4 + WelsEncRecI4x4Y (pEncCtx, pCurMb, pMbCache, i); + } + ST32 (pCurMb->pIntra4x4PredMode, LD32 (&pMbCache->iIntraPredMode[33])); + pCurMb->pIntra4x4PredMode[4] = pMbCache->iIntraPredMode[12]; + pCurMb->pIntra4x4PredMode[5] = pMbCache->iIntraPredMode[20]; + pCurMb->pIntra4x4PredMode[6] = pMbCache->iIntraPredMode[28]; + iCosti4x4 += (iLambda << 4) + (iLambda << 3); //4*6*lambda from JVT SATD0 + return iCosti4x4; +} + +int32_t WelsMdIntraChroma (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SMbCache* pMbCache, int32_t iLambda) { + const int8_t* kpAvailMode; + int32_t iAvailCount = 0; + int32_t iChmaIdx = 0; + uint8_t* pPredIntraChma[2] = {pMbCache->pMemPredChroma, pMbCache->pMemPredChroma + 128}; + uint8_t* pDstChma = pPredIntraChma[0]; + uint8_t* pEncCb = pMbCache->SPicData.pEncMb[1]; + uint8_t* pEncCr = pMbCache->SPicData.pEncMb[2]; + uint8_t* pDecCb = pMbCache->SPicData.pCsMb[1];//pMbCache->SPicData.pDecMb[1]; + uint8_t* pDecCr = pMbCache->SPicData.pCsMb[2];//pMbCache->SPicData.pDecMb[2]; + const int32_t kiLineSizeEnc = pCurDqLayer->iEncStride[1]; + const int32_t kiLineSizeDec = pCurDqLayer->iCsStride[1];//pMbCache->SPicData.i_stride_dec[1]; + + int32_t i, iCurMode, iCurCost, iBestMode, iBestCost = INT_MAX; + + int32_t iOffset = pMbCache->uiNeighborIntra & 0x07; + iAvailCount = g_kiIntraChromaAvailMode[iOffset][4]; + kpAvailMode = g_kiIntraChromaAvailMode[iOffset]; + if (iAvailCount > 3 && pFunc->sSampleDealingFuncs.pfIntra8x8Combined3) { + iBestCost = pFunc->sSampleDealingFuncs.pfIntra8x8Combined3 (pDecCb, kiLineSizeDec, pEncCb, kiLineSizeEnc, &iBestMode, + iLambda, pDstChma, pDecCr, pEncCr); + iCurMode = kpAvailMode[3]; + pFunc->pfGetChromaPred[iCurMode] (pDstChma, pDecCb, kiLineSizeDec); //Cb + pFunc->pfGetChromaPred[iCurMode] (pDstChma + 64, pDecCr, kiLineSizeDec); //Cr + + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_8x8] (pDstChma, 8, pEncCb, kiLineSizeEnc) + + pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_8x8] (pDstChma + 64, 8, pEncCr, kiLineSizeEnc) + + iLambda * 4; + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + } else { + pFunc->pfGetChromaPred[iBestMode] (pDstChma, pDecCb, kiLineSizeDec); //Cb + pFunc->pfGetChromaPred[iBestMode] (pDstChma + 64, pDecCr, kiLineSizeDec); //Cr + } + iBestCost += iLambda; + iChmaIdx = 1; + } else { + iBestMode = kpAvailMode[0]; + for (i = 0; i < iAvailCount; ++ i) { + iCurMode = kpAvailMode[i]; + + assert (iCurMode >= 0 && iCurMode < 7); + + // pDstCb = &pMbCache->mem_pred_intra_cb[iCurMode<<6]; + // pDstCr = &pMbCache->mem_pred_intra_cr[iCurMode<<6]; + pFunc->pfGetChromaPred[iCurMode] (pDstChma, pDecCb, kiLineSizeDec); //Cb + iCurCost = pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_8x8] (pDstChma, 8, pEncCb, kiLineSizeEnc); + + pFunc->pfGetChromaPred[iCurMode] (pDstChma + 64, pDecCr, kiLineSizeDec); //Cr + iCurCost += pFunc->sSampleDealingFuncs.pfMdCost[BLOCK_8x8] (pDstChma + 64, 8, pEncCr, kiLineSizeEnc) + + iLambda * BsSizeUE (g_kiMapModeIntraChroma[iCurMode]); + if (iCurCost < iBestCost) { + iBestMode = iCurMode; + iBestCost = iCurCost; + iChmaIdx = iChmaIdx ^ 0x01; + pDstChma = pPredIntraChma[iChmaIdx]; + } + } + } + + pMbCache->pBestPredIntraChroma = pPredIntraChma[iChmaIdx ^ 0x01]; + pMbCache->uiChmaI8x8Mode = iBestMode; + return iBestCost; +} +int32_t WelsMdIntraFinePartition (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + int32_t iCosti4x4 = WelsMdI4x4 (pEncCtx, pWelsMd, pCurMb, pMbCache); + + if (iCosti4x4 < pWelsMd->iCostLuma) { + pCurMb->uiMbType = MB_TYPE_INTRA4x4; + pWelsMd->iCostLuma = iCosti4x4; + } + return pWelsMd->iCostLuma; +} + +int32_t WelsMdIntraFinePartitionVaa (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + + if (MdIntraAnalysisVaaInfo (pEncCtx, pMbCache->SPicData.pEncMb[0])) { + int32_t iCosti4x4 = WelsMdI4x4Fast (pEncCtx, pWelsMd, pCurMb, pMbCache); + + if (iCosti4x4 < pWelsMd->iCostLuma) { + pCurMb->uiMbType = MB_TYPE_INTRA4x4; + pWelsMd->iCostLuma = iCosti4x4; + } + } + + return pWelsMd->iCostLuma; +} + +void WelsMdIntraMb (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + //initial prediction memory for I_16x16 + pWelsMd->iCostLuma = WelsMdI16x16 (pEncCtx->pFuncList, pEncCtx->pCurDqLayer, pMbCache, pWelsMd->iLambda); + pCurMb->uiMbType = MB_TYPE_INTRA16x16; + + WelsMdIntraSecondaryModesEnc (pEncCtx, pWelsMd, pCurMb, pMbCache); +} + +static inline void InitMe (const SWelsMD& sWelsMd, const int32_t iBlockSize, uint8_t* pEnc, uint8_t* pRef, + SScreenBlockFeatureStorage* pRefFeatureStorage, + SWelsME& sWelsMe) { + sWelsMe.iCurMeBlockPixX = sWelsMd.iMbPixX; + sWelsMe.iCurMeBlockPixY = sWelsMd.iMbPixY; + sWelsMe.uiBlockSize = iBlockSize; + sWelsMe.pMvdCost = sWelsMd.pMvdCost; + + sWelsMe.pEncMb = pEnc; + sWelsMe.pRefMb = sWelsMe.pColoRefMb = pRef; + + sWelsMe.pRefFeatureStorage = pRefFeatureStorage; +} + +int32_t WelsMdP16x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurLayer, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SWelsME* pMe16x16 = &pWelsMd->sMe.sMe16x16; + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + const int32_t kiMbWidth = pCurLayer->iMbWidth; // for assign once + const int32_t kiMbHeight = pCurLayer->iMbHeight; + InitMe (*pWelsMd, BLOCK_16x16, pMbCache->SPicData.pEncMb[0], pMbCache->SPicData.pRefMb[0], + pCurLayer->pRefPic->pScreenBlockFeatureStorage, + *pMe16x16); + //not putting the line below into InitMe to avoid judging mode in InitMe + pMe16x16->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb; + + pSlice->uiMvcNum = 0; + pSlice->sMvc[pSlice->uiMvcNum++] = pMe16x16->sMvBase; + //spatial motion vector predictors + if (uiNeighborAvail & LEFT_MB_POS) { //left available + pSlice->sMvc[pSlice->uiMvcNum++] = (pCurMb - 1)->sP16x16Mv; + } + if (uiNeighborAvail & TOP_MB_POS) { //top available + pSlice->sMvc[pSlice->uiMvcNum++] = (pCurMb - kiMbWidth)->sP16x16Mv; + } + //temporal motion vector predictors + if (pCurLayer->pRefPic->iPictureType == P_SLICE) { + if (pCurMb->iMbX < kiMbWidth - 1) { + SMVUnitXY sTempMv = pCurLayer->pRefPic->sMvList[pCurMb->iMbXY + 1]; + pSlice->sMvc[pSlice->uiMvcNum].iMvX = sTempMv.iMvX >> pSlice->sScaleShift; + pSlice->sMvc[pSlice->uiMvcNum].iMvY = sTempMv.iMvY >> pSlice->sScaleShift; + ++ pSlice->uiMvcNum; + } + if (pCurMb->iMbY < kiMbHeight - 1) { + SMVUnitXY sTempMv = pCurLayer->pRefPic->sMvList[pCurMb->iMbXY + kiMbWidth]; + pSlice->sMvc[pSlice->uiMvcNum].iMvX = sTempMv.iMvX >> pSlice->sScaleShift; + pSlice->sMvc[pSlice->uiMvcNum].iMvY = sTempMv.iMvY >> pSlice->sScaleShift; + ++ pSlice->uiMvcNum; + } + } + + PredMv (&pMbCache->sMvComponents, 0, 4, 0, & (pMe16x16->sMvp)); + pFunc->pfMotionSearch[0] (pFunc, pCurLayer, pMe16x16, pSlice); + + pCurMb->sP16x16Mv = pMe16x16->sMv; + pCurLayer->pDecPic->sMvList[pCurMb->iMbXY] = pMe16x16->sMv; + + return pMe16x16->uiSatdCost; +} +int32_t WelsMdP16x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + int32_t iStrideEnc = pCurDqLayer->iEncStride[0]; + int32_t iStrideRef = pCurDqLayer->pRefPic->iLineSize[0]; + SWelsME* sMe16x8; + int32_t i = 0, iPixelY; + int32_t iCostP16x8 = 0; + do { + sMe16x8 = &pWelsMd->sMe.sMe16x8[i]; + iPixelY = (i << 3); + InitMe (*pWelsMd, BLOCK_16x8, + pMbCache->SPicData.pEncMb[0] + (iPixelY * iStrideEnc), + pMbCache->SPicData.pRefMb[0] + (iPixelY * iStrideRef), + pCurDqLayer->pRefPic->pScreenBlockFeatureStorage, + *sMe16x8); + //not putting the lines below into InitMe to avoid judging mode in InitMe + sMe16x8->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY; + sMe16x8->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 1; + + pSlice->sMvc[0] = sMe16x8->sMvBase; + pSlice->uiMvcNum = 1; + + PredInter16x8Mv (pMbCache, i << 3, 0, & (sMe16x8->sMvp)); + pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe16x8, pSlice); + UpdateP16x8Motion2Cache (pMbCache, i << 3, pWelsMd->uiRef, & (sMe16x8->sMv)); + iCostP16x8 += sMe16x8->uiSatdCost; + ++i; + } while (i < 2); + return iCostP16x8; +} +int32_t WelsMdP8x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurLayer, SWelsMD* pWelsMd, SSlice* pSlice) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SWelsME* sMe8x16; + int32_t i = 0, iPixelX; + int32_t iCostP8x16 = 0; + do { + iPixelX = (i << 3); + sMe8x16 = &pWelsMd->sMe.sMe8x16[i]; + InitMe (*pWelsMd, BLOCK_8x16, + pMbCache->SPicData.pEncMb[0] + iPixelX, + pMbCache->SPicData.pRefMb[0] + iPixelX, + pCurLayer->pRefPic->pScreenBlockFeatureStorage, + *sMe8x16); + //not putting the lines below into InitMe to avoid judging mode in InitMe + sMe8x16->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX; + sMe8x16->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 1; + + pSlice->sMvc[0] = sMe8x16->sMvBase; + pSlice->uiMvcNum = 1; + + PredInter8x16Mv (pMbCache, i << 2, 0, & (sMe8x16->sMvp)); + pFunc->pfMotionSearch[0] (pFunc, pCurLayer, sMe8x16, pSlice); + UpdateP8x16Motion2Cache (pMbCache, i << 2, pWelsMd->uiRef, & (sMe8x16->sMv)); + iCostP8x16 += sMe8x16->uiSatdCost; + ++i; + } while (i < 2); + return iCostP8x16; +} +int32_t WelsMdP8x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0]; + int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0]; + SWelsME* sMe8x8; + int32_t i, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef; + int32_t iCostP8x8 = 0; + for (i = 0; i < 4; i++) { + iIdxX = i & 1; + iIdxY = i >> 1; + iPixelX = (iIdxX << 3); + iPixelY = (iIdxY << 3); + iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc); + iStrideRef = iPixelX + (iPixelY * iLineSizeRef); + + sMe8x8 = &pWelsMd->sMe.sMe8x8[i]; + InitMe (*pWelsMd, BLOCK_8x8, + pMbCache->SPicData.pEncMb[0] + iStrideEnc, + pMbCache->SPicData.pRefMb[0] + iStrideRef, + pCurDqLayer->pRefPic->pScreenBlockFeatureStorage, + *sMe8x8); + //not putting these three lines below into InitMe to avoid judging mode in InitMe + sMe8x8->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX; + sMe8x8->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY; + sMe8x8->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2; + + + pSlice->sMvc[0] = sMe8x8->sMvBase; + pSlice->uiMvcNum = 1; + + PredMv (&pMbCache->sMvComponents, i << 2, 2, pWelsMd->uiRef, & (sMe8x8->sMvp)); + pFunc->pfMotionSearch[pWelsMd->iBlock8x8StaticIdc[i]] (pFunc, pCurDqLayer, sMe8x8, pSlice); + UpdateP8x8Motion2Cache (pMbCache, i << 2, pWelsMd->uiRef, & (sMe8x8->sMv)); + iCostP8x8 += sMe8x8->uiSatdCost; +// sMe8x8++; + } + return iCostP8x8; +} + +int32_t WelsMdP4x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, + const int32_t ki8x8Idx) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0]; + int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0]; + SWelsME* sMe4x4; + int32_t i4x4Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef; + int32_t iCostP4x4 = 0; + for (i4x4Idx = 0; i4x4Idx < 4; ++i4x4Idx) { + int32_t iPartIdx = (ki8x8Idx << 2) + i4x4Idx; + iIdxX = ((ki8x8Idx & 1) << 1) + (i4x4Idx & 1); + iIdxY = ((ki8x8Idx >> 1) << 1) + (i4x4Idx >> 1); + iPixelX = (iIdxX << 2); + iPixelY = (iIdxY << 2); + iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc); + iStrideRef = iPixelX + (iPixelY * iLineSizeRef); + + sMe4x4 = &pWelsMd->sMe.sMe4x4[ki8x8Idx][i4x4Idx]; + InitMe (*pWelsMd, BLOCK_4x4, + pMbCache->SPicData.pEncMb[0] + iStrideEnc, + pMbCache->SPicData.pRefMb[0] + iStrideRef, + pCurDqLayer->pRefPic->pScreenBlockFeatureStorage, + *sMe4x4); + //not putting these three lines below into InitMe to avoid judging mode in InitMe + sMe4x4->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX; + sMe4x4->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY; + sMe4x4->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2; + + pSlice->sMvc[0] = sMe4x4->sMvBase; + pSlice->uiMvcNum = 1; + + PredMv (&pMbCache->sMvComponents, iPartIdx, 1, pWelsMd->uiRef, & (sMe4x4->sMvp)); + pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe4x4, pSlice); + UpdateP4x4Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe4x4->sMv)); + iCostP4x4 += sMe4x4->uiSatdCost; + } + return iCostP4x4; +} + +int32_t WelsMdP8x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, + const int32_t ki8x8Idx) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0]; + int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0]; + SWelsME* sMe8x4; + int32_t i8x4Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef; + int32_t iCostP8x4 = 0; + for (i8x4Idx = 0; i8x4Idx < 2; ++i8x4Idx) { + int32_t iPartIdx = (ki8x8Idx << 2) + (i8x4Idx << 1); + iIdxX = ((ki8x8Idx & 1) << 1); + iIdxY = ((ki8x8Idx >> 1) << 1) + i8x4Idx; + iPixelX = (iIdxX << 2); + iPixelY = (iIdxY << 2); + iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc); + iStrideRef = iPixelX + (iPixelY * iLineSizeRef); + + sMe8x4 = &pWelsMd->sMe.sMe8x4[ki8x8Idx][i8x4Idx]; + InitMe (*pWelsMd, BLOCK_8x4, + pMbCache->SPicData.pEncMb[0] + iStrideEnc, + pMbCache->SPicData.pRefMb[0] + iStrideRef, + pCurDqLayer->pRefPic->pScreenBlockFeatureStorage, + *sMe8x4); + //not putting these three lines below into InitMe to avoid judging mode in InitMe + sMe8x4->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX; + sMe8x4->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY; + sMe8x4->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2; + + pSlice->sMvc[0] = sMe8x4->sMvBase; + pSlice->uiMvcNum = 1; + + PredMv (&pMbCache->sMvComponents, iPartIdx, 2, pWelsMd->uiRef, & (sMe8x4->sMvp)); + pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe8x4, pSlice); + UpdateP8x4Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe8x4->sMv)); + iCostP8x4 += sMe8x4->uiSatdCost; + } + return iCostP8x4; +} + +int32_t WelsMdP4x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, + const int32_t ki8x8Idx) { + //Wayne, to be modified + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0]; + int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0]; + SWelsME* sMe4x8; + int32_t i4x8Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef; + int32_t iCostP4x8 = 0; + for (i4x8Idx = 0; i4x8Idx < 2; ++i4x8Idx) { + int32_t iPartIdx = (ki8x8Idx << 2) + i4x8Idx; + iIdxX = ((ki8x8Idx & 1) << 1) + i4x8Idx; + iIdxY = ((ki8x8Idx >> 1) << 1); + iPixelX = (iIdxX << 2); + iPixelY = (iIdxY << 2); + iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc); + iStrideRef = iPixelX + (iPixelY * iLineSizeRef); + + sMe4x8 = &pWelsMd->sMe.sMe4x8[ki8x8Idx][i4x8Idx]; + InitMe (*pWelsMd, BLOCK_4x8, + pMbCache->SPicData.pEncMb[0] + iStrideEnc, + pMbCache->SPicData.pRefMb[0] + iStrideRef, + pCurDqLayer->pRefPic->pScreenBlockFeatureStorage, + *sMe4x8); + //not putting these three lines below into InitMe to avoid judging mode in InitMe + sMe4x8->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX; + sMe4x8->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY; + sMe4x8->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2; + + pSlice->sMvc[0] = sMe4x8->sMvBase; + pSlice->uiMvcNum = 1; + + PredMv (&pMbCache->sMvComponents, iPartIdx, 1, pWelsMd->uiRef, & (sMe4x8->sMvp)); + pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe4x8, pSlice); + UpdateP4x8Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe4x8->sMv)); + iCostP4x8 += sMe4x8->uiSatdCost; + } + return iCostP4x8; +} + +void WelsMdInterFinePartition (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; +// SMbCache *pMbCache = &pSlice->sMbCacheInfo; + int32_t iCost = 0; + +// WelsLog( pEncCtx, WELS_LOG_INFO, "WelsMdP8x8, p_ref[0]= 0x%p", pMbCache->SPicData.pRefMb[0]); + + iCost = WelsMdP8x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + + if (iCost < iBestCost) { + int32_t iCostPart; + pCurMb->uiMbType = MB_TYPE_8x8; + memset (pCurMb->uiSubMbType, SUB_MB_TYPE_8x8, 4); + +// WelsLog( pEncCtx, WELS_LOG_INFO, "WelsMdP16x8, p_ref[0]= 0x%p", pMbCache->SPicData.pRefMb[0]); + iCostPart = WelsMdP16x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostPart <= iCost) { + iCost = iCostPart; + pCurMb->uiMbType = MB_TYPE_16x8; + //pCurMb->mb_partition = 2; + } + +// WelsLog( pEncCtx, WELS_LOG_INFO, "WelsMdP8x16, p_ref[0]= 0x%p", pMbCache->SPicData.pRefMb[0]); + iCostPart = WelsMdP8x16 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostPart <= iCost) { + iCost = iCostPart; + pCurMb->uiMbType = MB_TYPE_8x16; + //pCurMb->mb_partition = 2; + } + } +} + +void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, + int32_t iBestCost) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; +// SMbCache *pMbCache = &pSlice->sMbCacheInfo; + int32_t iCostP8x16, iCostP16x8, iCostP8x8; + uint8_t uiMbSign = pEncCtx->pFuncList->pfGetMbSignFromInterVaa (&pEncCtx->pVaa->sVaaCalcInfo.pSad8x8[pCurMb->iMbXY][0]); + + if (uiMbSign == 15) { + return; + } + +// iCost = pWelsMd->sMe16x16.uiSatdCost; + + switch (uiMbSign) { + case 3: + case 12: +// WelsLog( pEncCtx, WELS_LOG_INFO, "WelsMdP16x8, p_ref[0]= 0x%p", pMbCache->SPicData.pRefMb[0]); + iCostP16x8 = WelsMdP16x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostP16x8 < iBestCost) { + iBestCost = iCostP16x8; + pCurMb->uiMbType = MB_TYPE_16x8; + //pCurMb->mb_partition = 2; + } + break; + + case 5: + case 10: +// WelsLog( pEncCtx, WELS_LOG_INFO, "WelsMdP8x16, p_ref[0]= 0x%p", pMbCache->SPicData.pRefMb[0]); + iCostP8x16 = WelsMdP8x16 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostP8x16 < iBestCost) { + iBestCost = iCostP8x16; + pCurMb->uiMbType = MB_TYPE_8x16; + //pCurMb->mb_partition = 2; + } + break; + + case 6: + case 9: + iCostP8x8 = WelsMdP8x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostP8x8 < iBestCost) { + iBestCost = iCostP8x8; + pCurMb->uiMbType = MB_TYPE_8x8; + memset (pCurMb->uiSubMbType, SUB_MB_TYPE_8x8, 4); + } + break; + + default: + iCostP8x8 = WelsMdP8x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostP8x8 < iBestCost) { + iBestCost = iCostP8x8; + pCurMb->uiMbType = MB_TYPE_8x8; + memset (pCurMb->uiSubMbType, SUB_MB_TYPE_8x8, 4); + + iCostP16x8 = WelsMdP16x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostP16x8 <= iBestCost) { + iBestCost = iCostP16x8; + pCurMb->uiMbType = MB_TYPE_16x8; + } + + iCostP8x16 = WelsMdP8x16 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostP8x16 <= iBestCost) { + iBestCost = iCostP8x16; + pCurMb->uiMbType = MB_TYPE_8x16; + } + } + break; + } + pWelsMd->iCostLuma = iBestCost; +} + + +inline void VaaBackgroundMbDataUpdate (SWelsFuncPtrList* pFunc, SVAAFrameInfo* pVaaInfo, SMB* pCurMb) { + const int32_t kiPicStride = pVaaInfo->iPicStride; + const int32_t kiPicStrideUV = pVaaInfo->iPicStrideUV; + const int32_t kiOffsetY = (pCurMb->iMbY * kiPicStride + pCurMb->iMbX) << 4; + const int32_t kiOffsetUV = (pCurMb->iMbY * kiPicStrideUV + pCurMb->iMbX) << 3; + + pFunc->pfCopy16x16Aligned (pVaaInfo->pCurY + kiOffsetY, kiPicStride, pVaaInfo->pRefY + kiOffsetY, kiPicStride); + pFunc->pfCopy8x8Aligned (pVaaInfo->pCurU + kiOffsetUV, kiPicStrideUV, pVaaInfo->pRefU + kiOffsetUV, kiPicStrideUV); + pFunc->pfCopy8x8Aligned (pVaaInfo->pCurV + kiOffsetUV, kiPicStrideUV, pVaaInfo->pRefV + kiOffsetUV, kiPicStrideUV); +} + +void WelsMdBackgroundMbEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache, SSlice* pSlice, + bool bSkipMbFlag) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SMVUnitXY sMvp = { 0 }; + uint8_t* pRefLuma = pMbCache->SPicData.pRefMb[0]; + uint8_t* pRefCb = pMbCache->SPicData.pRefMb[1]; + uint8_t* pRefCr = pMbCache->SPicData.pRefMb[2]; + int32_t iLineSizeY = pCurDqLayer->pRefPic->iLineSize[0]; + int32_t iLineSizeUV = pCurDqLayer->pRefPic->iLineSize[1]; + uint8_t* pDstLuma = pMbCache->pSkipMb; + uint8_t* pDstCb = pMbCache->pSkipMb + 256; + uint8_t* pDstCr = pMbCache->pSkipMb + 256 + 64; + + if (!bSkipMbFlag) { + pDstLuma = pMbCache->pMemPredLuma; + pDstCb = pMbCache->pMemPredChroma; + pDstCr = pMbCache->pMemPredChroma + 64; + } + //MC + pFunc->sMcFuncs.pMcLumaFunc (pRefLuma, iLineSizeY, pDstLuma, 16, 0, 0, 16, 16); + pFunc->sMcFuncs.pMcChromaFunc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb + pFunc->sMcFuncs.pMcChromaFunc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr + + pCurMb->uiCbp = 0; + pMbCache->bCollocatedPredFlag = true; + pWelsMd->iCostLuma = 0;//BGD&RC integration + pCurMb->pSadCost[0] = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurDqLayer->iEncStride[0], pRefLuma, iLineSizeY); + ST32 (&pCurMb->sP16x16Mv, 0); + ST32 (&pCurDqLayer->pDecPic->sMvList[pCurMb->iMbXY], 0); + + if (bSkipMbFlag) { + pCurMb->uiMbType = MB_TYPE_BACKGROUND; + + //update motion info to current MB + ST32 (pCurMb->pRefIndex, 0); + pFunc->pfUpdateMbMv (pCurMb->sMv, sMvp); + + pCurMb->uiLumaQp = pSlice->uiLastMbQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + + pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)]; + + WelsRecPskip (pCurDqLayer, pEncCtx->pFuncList, pCurMb, pMbCache); + VaaBackgroundMbDataUpdate (pEncCtx->pFuncList, pEncCtx->pVaa, pCurMb); + return; + } + + pCurMb->uiMbType = MB_TYPE_16x16; + + pWelsMd->sMe.sMe16x16.sMv.iMvX = 0; + pWelsMd->sMe.sMe16x16.sMv.iMvY = 0; + PredMv (&pMbCache->sMvComponents, 0, 4, pWelsMd->uiRef, &pWelsMd->sMe.sMe16x16.sMvp); + pMbCache->sMbMvp[0] = pWelsMd->sMe.sMe16x16.sMvp; + + UpdateP16x16MotionInfo (pMbCache, pCurMb, pWelsMd->uiRef, &pWelsMd->sMe.sMe16x16.sMv); + + if (pWelsMd->bMdUsingSad) + pWelsMd->iCostLuma = pCurMb->pSadCost[0]; + else + pWelsMd->iCostLuma = pFunc->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurDqLayer->iEncStride[0], pRefLuma, iLineSizeY); + + WelsInterMbEncode (pEncCtx, pSlice, pCurMb); + WelsPMbChromaEncode (pEncCtx, pSlice, pCurMb); + + pFunc->pfCopy16x16Aligned (pMbCache->SPicData.pCsMb[0], pCurDqLayer->iCsStride[0], pMbCache->pMemPredLuma, 16); + pFunc->pfCopy8x8Aligned (pMbCache->SPicData.pCsMb[1], pCurDqLayer->iCsStride[1], pMbCache->pMemPredChroma, 8); + pFunc->pfCopy8x8Aligned (pMbCache->SPicData.pCsMb[2], pCurDqLayer->iCsStride[1], pMbCache->pMemPredChroma + 64, 8); +} + +bool WelsMdPSkipEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + + uint8_t* pRefLuma = pMbCache->SPicData.pRefMb[0]; + uint8_t* pRefCb = pMbCache->SPicData.pRefMb[1]; + uint8_t* pRefCr = pMbCache->SPicData.pRefMb[2]; + int32_t iLineSizeY = pCurLayer->pRefPic->iLineSize[0]; + int32_t iLineSizeUV = pCurLayer->pRefPic->iLineSize[1]; + + uint8_t* pDstLuma = pMbCache->pSkipMb; + uint8_t* pDstCb = pMbCache->pSkipMb + 256; + uint8_t* pDstCr = pMbCache->pSkipMb + 256 + 64; + + SMVUnitXY sMvp = { 0 }; + int32_t n; + + int32_t iEncStride = pCurLayer->iEncStride[0]; + uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0]; + int32_t* pStrideEncBlockOffset = pEncCtx->pStrideTab->pStrideEncBlockOffset[pEncCtx->uiDependencyId]; + int32_t* pEncBlockOffset; + + int32_t iSadCostLuma = 0; + int32_t iSadCostChroma = 0; + int32_t iSadCostMb = 0; + + PredSkipMv (pMbCache, &sMvp); + + // Special case, need to clip the vector // + SMVUnitXY sQpelMvp = { static_cast (sMvp.iMvX >> 2), static_cast (sMvp.iMvY >> 2) }; + n = (pCurMb->iMbX << 4) + sQpelMvp.iMvX; + if (n < -29) + return false; + else if (n > (int32_t) ((pCurLayer->iMbWidth << 4) + 12)) + return false; + + n = (pCurMb->iMbY << 4) + sQpelMvp.iMvY; + if (n < -29) + return false; + else if (n > (int32_t) ((pCurLayer->iMbHeight << 4) + 12)) + return false; + + //luma + pRefLuma += sQpelMvp.iMvY * iLineSizeY + sQpelMvp.iMvX; + pFunc->sMcFuncs.pMcLumaFunc (pRefLuma, iLineSizeY, pDstLuma, 16, sMvp.iMvX, sMvp.iMvY, 16, 16); + iSadCostLuma = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurLayer->iEncStride[0], pDstLuma, 16); + + const int32_t iStrideUV = (sQpelMvp.iMvY >> 1) * iLineSizeUV + (sQpelMvp.iMvX >> 1); + pRefCb += iStrideUV; + pFunc->sMcFuncs.pMcChromaFunc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb + iSadCostChroma = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[1], + pCurLayer->iEncStride[1], pDstCb, 8); + + pRefCr += iStrideUV; + pFunc->sMcFuncs.pMcChromaFunc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr + iSadCostChroma += pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[2], + pCurLayer->iEncStride[2], pDstCr, 8); + + iSadCostMb = iSadCostLuma + iSadCostChroma; + + if (iSadCostMb == 0 || + iSadCostMb < pWelsMd->iSadPredSkip || + (pCurLayer->pRefPic->iPictureType == P_SLICE && + pMbCache->uiRefMbType == MB_TYPE_SKIP && + iSadCostMb < pCurLayer->pRefPic->pMbSkipSad[pCurMb->iMbXY])) { + //update motion info to current MB + ST32 (pCurMb->pRefIndex, 0); + pFunc->pfUpdateMbMv (pCurMb->sMv, sMvp); + + if (pWelsMd->bMdUsingSad) { + pCurMb->pSadCost[0] = iSadCostLuma; + pWelsMd->iCostLuma = pCurMb->pSadCost[0]; + } else + pWelsMd->iCostLuma = pFunc->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurLayer->iEncStride[0], pDstLuma, 16); + + pWelsMd->iCostSkipMb = iSadCostMb; + + pCurMb->sP16x16Mv = sMvp; + pCurLayer->pDecPic->sMvList[pCurMb->iMbXY] = sMvp; + + return true; + } + + WelsDctMb (pMbCache->pCoeffLevel, pEncMb, iEncStride, pDstLuma, pEncCtx->pFuncList->pfDctFourT4); + + if (WelsTryPYskip (pEncCtx, pCurMb, pMbCache)) { + iEncStride = pEncCtx->pCurDqLayer->iEncStride[1]; + pEncMb = pMbCache->SPicData.pEncMb[1]; + pEncBlockOffset = pStrideEncBlockOffset + 16; + pFunc->pfDctFourT4 (pMbCache->pCoeffLevel + 256, & (pEncMb[*pEncBlockOffset]), iEncStride, pMbCache->pSkipMb + 256, 8); + if (WelsTryPUVskip (pEncCtx, pCurMb, pMbCache, 1)) { + pEncMb = pMbCache->SPicData.pEncMb[2]; + pEncBlockOffset = pStrideEncBlockOffset + 20; + pFunc->pfDctFourT4 (pMbCache->pCoeffLevel + 320, & (pEncMb[*pEncBlockOffset]), iEncStride, pMbCache->pSkipMb + 320, 8); + if (WelsTryPUVskip (pEncCtx, pCurMb, pMbCache, 2)) { + //update motion info to current MB + ST32 (pCurMb->pRefIndex, 0); + pFunc->pfUpdateMbMv (pCurMb->sMv, sMvp); + + if (pWelsMd->bMdUsingSad) { + pCurMb->pSadCost[0] = iSadCostLuma; + pWelsMd->iCostLuma = pCurMb->pSadCost[0]; + } else + pWelsMd->iCostLuma = pFunc->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurLayer->iEncStride[0], pDstLuma, 16); + + pWelsMd->iCostSkipMb = iSadCostMb; + + pCurMb->sP16x16Mv = sMvp; + pCurLayer->pDecPic->sMvList[pCurMb->iMbXY] = sMvp; + + return true; + } + } + } + return false; +} + +const int32_t g_kiPixStrideIdx8x8[4] = { 0, ME_REFINE_BUF_WIDTH_BLK8, + ME_REFINE_BUF_STRIDE_BLK8, ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + }; +const int32_t g_kiPixStrideIdx4x4[4][4] = { + { + 0, + 0 + ME_REFINE_BUF_WIDTH_BLK4, + 0 + ME_REFINE_BUF_STRIDE_BLK4, + 0 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4 + }, //[0][] + { + ME_REFINE_BUF_WIDTH_BLK8, + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4, + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_STRIDE_BLK4, + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4 + }, //[1][] + { + ME_REFINE_BUF_STRIDE_BLK8, + ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK4, + ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_STRIDE_BLK4, + ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4 + }, //[2][] + { + ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8, + ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4, + ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_STRIDE_BLK4, + ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4 + } //[3][] +}; + +void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + uint8_t* pTmpRefCb, *pTmpRefCr, *pTmpDstCb, *pTmpDstCr; + int32_t iMvStride, iRefBlk4Stride, iDstBlk4Stride; + SMVUnitXY* pMv; + int32_t iBestSadCost = 0, iBestSatdCost = 0; + SMeRefinePointer sMeRefine; + + int32_t i, j, iIdx, iPixStride; + + uint8_t* pRefCb = pMbCache->SPicData.pRefMb[1]; + uint8_t* pRefCr = pMbCache->SPicData.pRefMb[2]; + uint8_t* pDstCb = pMbCache->pMemPredChroma; + uint8_t* pDstCr = pMbCache->pMemPredChroma + 64; + uint8_t* pDstLuma = pMbCache->pMemPredLuma; + + int32_t iLineSizeRefUV = pCurDqLayer->pRefPic->iLineSize[1]; + + switch (pCurMb->uiMbType) { + case MB_TYPE_16x16: + //luma + InitMeRefinePointer (&sMeRefine, pMbCache, 0); + sMeRefine.pfCopyBlockByMode = + pFunc->pfCopy16x16NotAligned; // dst can be align with 16 bytes, but not sure at pSrc, 12/29/2011 + MeRefineFracPixel (pEncCtx, pDstLuma, &pWelsMd->sMe.sMe16x16, &sMeRefine, 16, 16); + UpdateP16x16MotionInfo (pMbCache, pCurMb, pWelsMd->uiRef, &pWelsMd->sMe.sMe16x16.sMv); + + pMbCache->sMbMvp[0] = pWelsMd->sMe.sMe16x16.sMvp; + //save the best cost of final mode + iBestSadCost = pWelsMd->sMe.sMe16x16.uiSadCost; + iBestSatdCost = pWelsMd->sMe.sMe16x16.uiSatdCost; + + //chroma + pMv = &pWelsMd->sMe.sMe16x16.sMv; + iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); + pTmpRefCb = pRefCb + iMvStride; + pTmpRefCr = pRefCr + iMvStride; + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb, iLineSizeRefUV, pDstCb, 8, pMv->iMvX, pMv->iMvY, 8, 8); //Cb + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr, iLineSizeRefUV, pDstCr, 8, pMv->iMvX, pMv->iMvY, 8, 8); //Cr + + pWelsMd->iCostSkipMb = pEncCtx->pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurDqLayer->iEncStride[0], pDstLuma, 16); + pWelsMd->iCostSkipMb += pEncCtx->pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[1], + pCurDqLayer->iEncStride[1], pDstCb, 8); + pWelsMd->iCostSkipMb += pEncCtx->pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[2], + pCurDqLayer->iEncStride[2], pDstCr, 8); + break; + + case MB_TYPE_16x8: + iPixStride = 0; + sMeRefine.pfCopyBlockByMode = + pFunc->pfCopy16x8NotAligned; // dst can be align with 16 bytes, but not sure at pSrc, 12/29/2011 + for (i = 0; i < 2; i++) { + //luma + iIdx = i << 3; + InitMeRefinePointer (&sMeRefine, pMbCache, iPixStride); + iPixStride += ME_REFINE_BUF_STRIDE_BLK8; + PredInter16x8Mv (pMbCache, iIdx, pWelsMd->uiRef, &pWelsMd->sMe.sMe16x8[i].sMvp); + MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iIdx], &pWelsMd->sMe.sMe16x8[i], &sMeRefine, 16, 8); + UpdateP16x8MotionInfo (pMbCache, pCurMb, iIdx, pWelsMd->uiRef, &pWelsMd->sMe.sMe16x8[i].sMv); + pMbCache->sMbMvp[i] = pWelsMd->sMe.sMe16x8[i].sMvp; + //save the best cost of final mode + iBestSadCost += pWelsMd->sMe.sMe16x8[i].uiSadCost; + iBestSatdCost += pWelsMd->sMe.sMe16x8[i].uiSatdCost; + + //chroma + iRefBlk4Stride = (i << 2) * iLineSizeRefUV; + iDstBlk4Stride = i << 5; // 4*8 + pMv = &pWelsMd->sMe.sMe16x8[i].sMv; + iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); + pTmpRefCb = pRefCb + iRefBlk4Stride + iMvStride; + pTmpRefCr = pRefCr + iRefBlk4Stride + iMvStride; + pTmpDstCb = pDstCb + iDstBlk4Stride; + pTmpDstCr = pDstCr + iDstBlk4Stride; + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 8, 4); //Cb + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 8, 4); //Cr + } + break; + + case MB_TYPE_8x16: + iPixStride = 0; + sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x16Aligned; + for (i = 0; i < 2; i++) { + //luma + iIdx = i << 2; + InitMeRefinePointer (&sMeRefine, pMbCache, iPixStride); + iPixStride += ME_REFINE_BUF_WIDTH_BLK8; + PredInter8x16Mv (pMbCache, iIdx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x16[i].sMvp); + MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iIdx], &pWelsMd->sMe.sMe8x16[i], &sMeRefine, 8, 16); + update_P8x16_motion_info (pMbCache, pCurMb, iIdx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x16[i].sMv); + pMbCache->sMbMvp[i] = pWelsMd->sMe.sMe8x16[i].sMvp; + //save the best cost of final mode + iBestSadCost += pWelsMd->sMe.sMe8x16[i].uiSadCost; + iBestSatdCost += pWelsMd->sMe.sMe8x16[i].uiSatdCost; + + //chroma + iRefBlk4Stride = iIdx; //4 + pMv = &pWelsMd->sMe.sMe8x16[i].sMv; + iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); + pTmpRefCb = pRefCb + iRefBlk4Stride + iMvStride; + pTmpRefCr = pRefCr + iRefBlk4Stride + iMvStride; + pTmpDstCb = pDstCb + iRefBlk4Stride; + pTmpDstCr = pDstCr + iRefBlk4Stride; + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 4, 8); //Cb + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 4, 8); //Cr + } + break; + case MB_TYPE_8x8: + pMbCache->sMvComponents.iRefIndexCache [9] = pMbCache->sMvComponents.iRefIndexCache [21] = REF_NOT_AVAIL; + for (i = 0; i < 4; i++) { + int32_t iBlk8Idx = i << 2; //0, 4, 8, 12 + int32_t iBlk4X, iBlk4Y, iBlk4x4Idx; + + pCurMb->pRefIndex[i] = pWelsMd->uiRef; + switch (pCurMb->uiSubMbType[i]) { + case SUB_MB_TYPE_8x8: + sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x8Aligned; + //luma + InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx8x8[i]); + PredMv (&pMbCache->sMvComponents, iBlk8Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMvp); + MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk8Idx], &pWelsMd->sMe.sMe8x8[i], &sMeRefine, 8, 8); + UpdateP8x8MotionInfo (pMbCache, pCurMb, iBlk8Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMv); + pMbCache->sMbMvp[g_kuiMbCountScan4Idx[iBlk8Idx]] = pWelsMd->sMe.sMe8x8[i].sMvp; + iBestSadCost += pWelsMd->sMe.sMe8x8[i].uiSadCost; + iBestSatdCost += pWelsMd->sMe.sMe8x8[i].uiSatdCost; + + //chroma + pMv = &pWelsMd->sMe.sMe8x8[i].sMv; + iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); + + iBlk4X = (i & 1) << 2; + iBlk4Y = (i >> 1) << 2; + iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X; + iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X; + + pTmpRefCb = pRefCb + iRefBlk4Stride; + pTmpDstCb = pDstCb + iDstBlk4Stride; + pTmpRefCr = pRefCr + iRefBlk4Stride; + pTmpDstCr = pDstCr + iDstBlk4Stride; + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, + 4, 4); //Cb + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, + 4, 4); //Cr + break; + case SUB_MB_TYPE_4x4: + sMeRefine.pfCopyBlockByMode = pFunc->pfCopy4x4; + //luma + for (j = 0; j < 4; ++j) { + iBlk4x4Idx = iBlk8Idx + j; + InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j]); + PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 1, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x4[i][j].sMvp); + MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe4x4[i][j], &sMeRefine, 4, 4); + UpdateP4x4MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x4[i][j].sMv); + pMbCache->sMbMvp[g_kuiMbCountScan4Idx[iBlk4x4Idx]] = pWelsMd->sMe.sMe4x4[i][j].sMvp; + iBestSadCost += pWelsMd->sMe.sMe4x4[i][j].uiSadCost; + iBestSatdCost += pWelsMd->sMe.sMe4x4[i][j].uiSatdCost; + + //chroma + pMv = &pWelsMd->sMe.sMe4x4[i][j].sMv; + iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); + + iBlk4X = (((i & 1) << 1) + (j & 1)) << 1; + iBlk4Y = (((i >> 1) << 1) + (j >> 1)) << 1; + iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X; + iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X; + + pTmpRefCb = pRefCb + iRefBlk4Stride; + pTmpDstCb = pDstCb + iDstBlk4Stride; + pTmpRefCr = pRefCr + iRefBlk4Stride; + pTmpDstCr = pDstCr + iDstBlk4Stride; + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, + 2, 2); //Cb + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, + 2, 2); //Cr + } + break; + case SUB_MB_TYPE_8x4: + sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x4; + //luma + for (j = 0; j < 2; ++j) { + iBlk4x4Idx = iBlk8Idx + (j << 1); + InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j << 1]); + PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x4[i][j].sMvp); + MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe8x4[i][j], &sMeRefine, 8, 4); + UpdateP8x4MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x4[i][j].sMv); + pMbCache->sMbMvp[g_kuiMbCountScan4Idx[ iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp; + //pMbCache->sMbMvp[g_kuiMbCountScan4Idx[1 + iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp; + iBestSadCost += pWelsMd->sMe.sMe8x4[i][j].uiSadCost; + iBestSatdCost += pWelsMd->sMe.sMe8x4[i][j].uiSatdCost; + + //chroma + pMv = &pWelsMd->sMe.sMe8x4[i][j].sMv; + iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); + + iBlk4X = ((i & 1) << 1) << 1; + iBlk4Y = (((i >> 1) << 1) + j) << 1; + iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X; + iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X; + + pTmpRefCb = pRefCb + iRefBlk4Stride; + pTmpDstCb = pDstCb + iDstBlk4Stride; + pTmpRefCr = pRefCr + iRefBlk4Stride; + pTmpDstCr = pDstCr + iDstBlk4Stride; + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, + 4, 2); //Cb + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, + 4, 2); //Cr + } + break; + case SUB_MB_TYPE_4x8: + sMeRefine.pfCopyBlockByMode = pFunc->pfCopy4x8; + //luma + for (j = 0; j < 2; ++j) { + iBlk4x4Idx = iBlk8Idx + j; + InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j]); + PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 1, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x8[i][j].sMvp); + MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe4x8[i][j], &sMeRefine, 4, 8); + UpdateP4x8MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x8[i][j].sMv); + pMbCache->sMbMvp[g_kuiMbCountScan4Idx[ iBlk4x4Idx]] = pWelsMd->sMe.sMe4x8[i][j].sMvp; + //pMbCache->sMbMvp[g_kuiMbCountScan4Idx[4 + iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp; + iBestSadCost += pWelsMd->sMe.sMe4x8[i][j].uiSadCost; + iBestSatdCost += pWelsMd->sMe.sMe4x8[i][j].uiSatdCost; + + //chroma + pMv = &pWelsMd->sMe.sMe4x8[i][j].sMv; + iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); + + iBlk4X = (((i & 1) << 1) + j) << 1; + iBlk4Y = (((i >> 1) << 1)) << 1; + iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X; + iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X; + + pTmpRefCb = pRefCb + iRefBlk4Stride; + pTmpDstCb = pDstCb + iDstBlk4Stride; + pTmpRefCr = pRefCr + iRefBlk4Stride; + pTmpDstCr = pDstCr + iDstBlk4Stride; + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, + 2, 4); //Cb + pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, + 2, 4); //Cr + } + break; + } + } + break; + default: + break; + } + pCurMb->pSadCost[0] = iBestSadCost; + if (pWelsMd->bMdUsingSad) + pWelsMd->iCostLuma = iBestSadCost; + else + pWelsMd->iCostLuma = iBestSatdCost; + +} +bool WelsMdFirstIntraMode (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + + int32_t iCostI16x16 = WelsMdI16x16 (pFunc, pEncCtx->pCurDqLayer, pMbCache, pWelsMd->iLambda); + + //compare cost_p16x16 with cost_i16x16 + if (iCostI16x16 < pWelsMd->iCostLuma) { + pCurMb->uiMbType = MB_TYPE_INTRA16x16; + pWelsMd->iCostLuma = iCostI16x16; + + pFunc->pfIntraFineMd (pEncCtx, pWelsMd, pCurMb, pMbCache); + + //add pEnc&rec to MD--2010.3.15 + if (IS_INTRA16x16 (pCurMb->uiMbType)) { + pCurMb->uiCbp = 0; + WelsEncRecI16x16Y (pEncCtx, pCurMb, pMbCache); + } + + //chroma + pWelsMd->iCostChroma = WelsMdIntraChroma (pFunc, pEncCtx->pCurDqLayer, pMbCache, pWelsMd->iLambda); + WelsIMbChromaEncode (pEncCtx, pCurMb, pMbCache); //add pEnc&rec to MD--2010.3.15 + pCurMb->uiChromPredMode = pMbCache->uiChmaI8x8Mode; + pCurMb->pSadCost[0] = 0; + return true; //intra_mb_type is best + } + + return false; +} + +void WelsMdInterMb (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pUnused) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + const uint32_t kuiNeighborAvail = pCurMb->uiNeighborAvail; + const int32_t kiMbWidth = pCurDqLayer->iMbWidth; + const SMB* top_mb = pCurMb - kiMbWidth; + const bool bMbLeftAvailPskip = ((kuiNeighborAvail & LEFT_MB_POS) ? IS_SKIP ((pCurMb - 1)->uiMbType) : false); + const bool bMbTopAvailPskip = ((kuiNeighborAvail & TOP_MB_POS) ? IS_SKIP (top_mb->uiMbType) : false); + const bool bMbTopLeftAvailPskip = ((kuiNeighborAvail & TOPLEFT_MB_POS) ? IS_SKIP ((top_mb - 1)->uiMbType) : false); + const bool bMbTopRightAvailPskip = ((kuiNeighborAvail & TOPRIGHT_MB_POS) ? IS_SKIP ((top_mb + 1)->uiMbType) : false); + bool bTrySkip = bMbLeftAvailPskip || bMbTopAvailPskip || bMbTopLeftAvailPskip || bMbTopRightAvailPskip; + bool bKeepSkip = bMbLeftAvailPskip && bMbTopAvailPskip && bMbTopRightAvailPskip; + bool bSkip = false; + + //try BGD skip + if (pEncCtx->pFuncList->pfInterMdBackgroundDecision (pEncCtx, pWelsMd, pSlice, pCurMb, pMbCache, &bKeepSkip)) { + return; + } + + //try static or scrolled Pskip + if (pEncCtx->pFuncList->pfSCDPSkipDecision (pEncCtx, pWelsMd, pSlice, pCurMb, pMbCache)) { + return; + } + + //step 1: try SKIP + bSkip = WelsMdInterJudgePskip (pEncCtx, pWelsMd, pSlice, pCurMb, pMbCache, bTrySkip); + + if (bSkip) { + if (bKeepSkip) { + WelsMdInterDecidedPskip (pEncCtx, pSlice, pCurMb, pMbCache); + return; + } + } else { + PredictSad (pMbCache->sMvComponents.iRefIndexCache, pMbCache->iSadCost, 0, &pWelsMd->iSadPredMb); + + //step 2: P_16x16 + pWelsMd->iCostLuma = WelsMdP16x16 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, pCurMb); + pCurMb->uiMbType = MB_TYPE_16x16; + } + + WelsMdInterSecondaryModesEnc (pEncCtx, pWelsMd, pSlice, pCurMb, pMbCache, bSkip); +} + + + +////// +// try the ordinary Pskip +////// +bool WelsMdInterJudgePskip (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + bool bTrySkip) { + bool bRet = true; + if (((pEncCtx->pRefPic->iPictureType == P_SLICE) && (pMbCache->uiRefMbType == MB_TYPE_SKIP + || pMbCache->uiRefMbType == MB_TYPE_BACKGROUND)) || + bTrySkip) { + PredictSadSkip (pMbCache->sMvComponents.iRefIndexCache, pMbCache->bMbTypeSkip, pMbCache->iSadCostSkip, 0, + & (pWelsMd->iSadPredSkip)); + bRet = WelsMdPSkipEnc (pEncCtx, pWelsMd, pCurMb, pMbCache) ? true : false; + return bRet; + } + + return false; +} + +////// +// try the ordinary Pskip +////// +void WelsMdInterUpdatePskip (SDqLayer* pCurDqLayer, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache) { + //add pEnc&rec to MD--2010.3.15 + pCurMb->uiCbp = 0; + pCurMb->uiLumaQp = pSlice->uiLastMbQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + + pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)]; + pMbCache->bCollocatedPredFlag = (LD32 (&pCurMb->sMv[0]) == 0); +} + + +////// +// doublecheck if current MBTYPE is Pskip +////// +void WelsMdInterDoubleCheckPskip (SMB* pCurMb, SMbCache* pMbCache) { + if (MB_TYPE_16x16 == pCurMb->uiMbType && 0 == pCurMb->uiCbp) { + if (0 == pCurMb->pRefIndex[0]) { + SMVUnitXY sMvp = { 0 }; + + PredSkipMv (pMbCache, &sMvp); + if (LD32 (&sMvp) == LD32 (&pCurMb->sMv[0])) { + pCurMb->uiMbType = MB_TYPE_SKIP; + } + } + pMbCache->bCollocatedPredFlag = (LD32 (&pCurMb->sMv[0]) == 0); + } +} + +////// +// Pskip mb encode +////// +void WelsMdInterDecidedPskip (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + pCurMb->uiMbType = MB_TYPE_SKIP; + WelsRecPskip (pCurDqLayer, pEncCtx->pFuncList, pCurMb, pMbCache); + WelsMdInterUpdatePskip (pCurDqLayer, pSlice, pCurMb, pMbCache); +} + +////// +// inter mb encode +////// +void WelsMdInterEncode (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + + //add pEnc&rec to MD--2010.3.15 + const int32_t kiCsStrideY = pCurDqLayer->iCsStride[0]; + const int32_t kiCsStrideUV = pCurDqLayer->iCsStride[1]; + + //add pEnc&rec to MD--2010.3.15 + pCurMb->uiCbp = 0; + WelsInterMbEncode (pEncCtx, pSlice, pCurMb); + WelsPMbChromaEncode (pEncCtx, pSlice, pCurMb); + + pFunc->pfCopy16x16Aligned (pMbCache->SPicData.pCsMb[0], kiCsStrideY, pMbCache->pMemPredLuma, 16); + pFunc->pfCopy8x8Aligned (pMbCache->SPicData.pCsMb[1], kiCsStrideUV, pMbCache->pMemPredChroma, 8); + pFunc->pfCopy8x8Aligned (pMbCache->SPicData.pCsMb[2], kiCsStrideUV, pMbCache->pMemPredChroma + 64, 8); +} + + + +// +// +// +void WelsMdInterSaveSadAndRefMbType (Mb_Type* pRefMbtypeList, SMbCache* pMbCache, const SMB* pCurMb, + const SWelsMD* pMd) { + const Mb_Type kmtCurMbtype = pCurMb->uiMbType; + + //sad + pMbCache->pEncSad[0] = (kmtCurMbtype == MB_TYPE_SKIP) ? pMd->iCostSkipMb : 0; + //uiMbType + pRefMbtypeList[pCurMb->iMbXY] = kmtCurMbtype; +} + +void WelsMdInterSecondaryModesEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, + SMbCache* pMbCache, const bool bSkip) { + //step 2: Intra + const bool kbTrySkip = pEncCtx->pFuncList->pfFirstIntraMode (pEncCtx, pWelsMd, pCurMb, pMbCache); + if (kbTrySkip) + return; + + if (bSkip) { + WelsMdInterDecidedPskip (pEncCtx, pSlice, pCurMb, pMbCache); + } else { + //Step 3: SubP16 MD + pEncCtx->pFuncList->pfSetScrollingMv (pEncCtx->pVaa, pWelsMd); //SCC + pEncCtx->pFuncList->pfInterFineMd (pEncCtx, pWelsMd, pSlice, pCurMb, pWelsMd->iCostLuma); + + //refinement for inter type + WelsMdInterMbRefinement (pEncCtx, pWelsMd, pCurMb, pMbCache); + + //step 7: invoke encoding + WelsMdInterEncode (pEncCtx, pSlice, pCurMb, pMbCache); + + //step 8: double check Pskip + WelsMdInterDoubleCheckPskip (pCurMb, pMbCache); + } +} + + +void WelsMdIntraSecondaryModesEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + //initial prediction memory for I_4x4 + pFunc->pfIntraFineMd (pEncCtx, pWelsMd, pCurMb, pMbCache); //WelsMdIntraFinePartitionVaa + + //add pEnc&rec to MD--2010.3.15 + if (IS_INTRA16x16 (pCurMb->uiMbType)) { + pCurMb->uiCbp = 0; + WelsEncRecI16x16Y (pEncCtx, pCurMb, pMbCache); + } + + //chroma + pWelsMd->iCostChroma = WelsMdIntraChroma (pFunc, pEncCtx->pCurDqLayer, pMbCache, pWelsMd->iLambda); + WelsIMbChromaEncode (pEncCtx, pCurMb, pMbCache); //add pEnc&rec to MD--2010.3.15 + pCurMb->uiChromPredMode = pMbCache->uiChmaI8x8Mode; + pCurMb->pSadCost[0] = 0; +} + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_enc_slice_segment.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_enc_slice_segment.cpp new file mode 100644 index 000000000..5a9bf739e --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_enc_slice_segment.cpp @@ -0,0 +1,679 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file slice_segment.c + * + * \brief SSlice segment routine (Single slice/multiple slice/fmo arrangement exclusive) + * + * \date 2/4/2009 Created + * + ************************************************************************************* + */ +#include +#include "rc.h" +#include "svc_enc_frame.h" + +namespace WelsEnc { +/*! + * \brief Assign MB map for single slice segment + * + * \param pMbMap overall MB map + * \param iCountMbNum count number of MB + * + * \return 0 - successful; none 0 - failed + */ +int32_t AssignMbMapSingleSlice (void* pMbMap, const int32_t kiCountMbNum, const int32_t kiMapUnitSize) { + if (NULL == pMbMap || kiCountMbNum <= 0) + return 1; + + memset (pMbMap, 0, kiCountMbNum * kiMapUnitSize); + + return 0; +} + +/*! + * \brief Assign MB map for multiple slice(s) segment + * + * \param pCurDq current layer which its MB map will be assigned + * \param kpSliceArgument slice argument for current layer + * + * \return 0 - successful; none 0 - failed + */ +int32_t AssignMbMapMultipleSlices (SDqLayer* pCurDq, const SSliceArgument* kpSliceArgument) { + SSliceCtx* pSliceSeg = &pCurDq->sSliceEncCtx; + int32_t iSliceIdx = 0; + if (NULL == pSliceSeg || SM_SINGLE_SLICE == pSliceSeg->uiSliceMode) + return 1; + + if ((SM_RASTER_SLICE == pSliceSeg->uiSliceMode) && (0 == kpSliceArgument->uiSliceMbNum[0])) { + const int32_t kiMbWidth = pSliceSeg->iMbWidth; + int32_t iSliceNum = pSliceSeg->iSliceNumInFrame; + + iSliceIdx = 0; + while (iSliceIdx < iSliceNum) { + const int32_t kiFirstMb = iSliceIdx * kiMbWidth; + WelsSetMemMultiplebytes_c (pSliceSeg->pOverallMbMap + kiFirstMb, iSliceIdx, + kiMbWidth, sizeof (uint16_t)); + ++ iSliceIdx; + } + + return 0; + } else if (SM_RASTER_SLICE == pSliceSeg->uiSliceMode || + SM_FIXEDSLCNUM_SLICE == pSliceSeg->uiSliceMode) { + const int32_t* kpSlicesAssignList = (int32_t*) & (kpSliceArgument->uiSliceMbNum[0]); + const int32_t kiCountNumMbInFrame = pSliceSeg->iMbNumInFrame; + const int32_t kiCountSliceNumInFrame = pSliceSeg->iSliceNumInFrame; + int32_t iMbIdx = 0; + + iSliceIdx = 0; + do { + const int32_t kiCurRunLength = kpSlicesAssignList[iSliceIdx]; + int32_t iRunIdx = 0; + + // due here need check validate mb_assign_map for input pData, can not use memset + do { + pSliceSeg->pOverallMbMap[iMbIdx + iRunIdx] = iSliceIdx; + ++ iRunIdx; + } while (iRunIdx < kiCurRunLength && iMbIdx + iRunIdx < kiCountNumMbInFrame); + + iMbIdx += kiCurRunLength; + ++ iSliceIdx; + } while (iSliceIdx < kiCountSliceNumInFrame && iMbIdx < kiCountNumMbInFrame); + } else if (SM_SIZELIMITED_SLICE == pSliceSeg->uiSliceMode) { + // do nothing,pSliceSeg->pOverallMbMap will be initial later + } else { // any else uiSliceMode? + assert (0); + } + + // extention for other multiple slice type in the future + return 1; +} + +/*! + * Check slices assignment settings on MST_INTERLEAVE type + */ + +//slice parameter check for SM_FIXEDSLCNUM_SLICE +bool CheckFixedSliceNumMultiSliceSetting (const int32_t kiMbNumInFrame, SSliceArgument* pSliceArg) { + int32_t* pSlicesAssignList = (int32_t*) & (pSliceArg->uiSliceMbNum[0]); + const uint32_t kuiSliceNum = pSliceArg->uiSliceNum; + uint32_t uiSliceIdx = 0; + const int32_t kiMbNumPerSlice = kiMbNumInFrame / kuiSliceNum; + int32_t iNumMbLeft = kiMbNumInFrame; + + if (NULL == pSlicesAssignList) + return false; + + for (; uiSliceIdx + 1 < kuiSliceNum; ++ uiSliceIdx) { + pSlicesAssignList[uiSliceIdx] = kiMbNumPerSlice; + iNumMbLeft -= kiMbNumPerSlice; + } + + pSlicesAssignList[uiSliceIdx] = iNumMbLeft; + + if (iNumMbLeft <= 0 || kiMbNumPerSlice <= 0) { + return false; + } + + return true; +} + +//slice parameter check for SM_ROWMB_SLICE +bool CheckRowMbMultiSliceSetting (const int32_t kiMbWidth, SSliceArgument* pSliceArg) { + int32_t* pSlicesAssignList = (int32_t*) & (pSliceArg->uiSliceMbNum[0]); + const uint32_t kuiSliceNum = pSliceArg->uiSliceNum; + uint32_t uiSliceIdx = 0; + + if (NULL == pSlicesAssignList) + return false; + + while (uiSliceIdx < kuiSliceNum) { + pSlicesAssignList[uiSliceIdx] = kiMbWidth; + ++ uiSliceIdx; + } + return true; +} + +//slice parameter check for SM_RASTER_SLICE +bool CheckRasterMultiSliceSetting (const int32_t kiMbNumInFrame, SSliceArgument* pSliceArg) { + int32_t* pSlicesAssignList = (int32_t*) & (pSliceArg->uiSliceMbNum[0]); + int32_t iActualSliceCount = 0; + + //check mb_num setting + uint32_t uiSliceIdx = 0; + int32_t iCountMb = 0; + + if (NULL == pSlicesAssignList) + return false; + + while ((uiSliceIdx < MAX_SLICES_NUM) && (0 < pSlicesAssignList[uiSliceIdx])) { + iCountMb += pSlicesAssignList[uiSliceIdx]; + iActualSliceCount = uiSliceIdx + 1; + + if (iCountMb >= kiMbNumInFrame) { + break; + } + + ++ uiSliceIdx; + } + //break condition above makes, after the while + // here must have (iActualSliceCount <= MAX_SLICES_NUM) + + //correction if needed + if (iCountMb == kiMbNumInFrame) { + ; + } else if (iCountMb > kiMbNumInFrame) { + //need correction: + //setting is more than iMbNumInFrame, + //cut the last uiSliceMbNum; adjust iCountMb + pSlicesAssignList[iActualSliceCount - 1] -= (iCountMb - kiMbNumInFrame); + iCountMb = kiMbNumInFrame; + } else if (iActualSliceCount < MAX_SLICES_NUM) { + //where ( iCountMb < iMbNumInFrame ) + //can do correction: + // make the last uiSliceMbNum the left num + pSlicesAssignList[iActualSliceCount] = kiMbNumInFrame - iCountMb; + iActualSliceCount += 1; + } else { + //here ( iCountMb < iMbNumInFrame ) && ( iActualSliceCount == MAX_SLICES_NUM ) + //no more slice can be added + return false; + } + + pSliceArg->uiSliceNum = iActualSliceCount; + return true; + +} + + +// GOM based RC related for uiSliceNum decision, only used at SM_FIXEDSLCNUM_SLICE +bool GomValidCheckSliceNum (const int32_t kiMbWidth, const int32_t kiMbHeight, uint32_t* pSliceNum) { + const int32_t kiCountNumMb = kiMbWidth * kiMbHeight; + uint32_t iSliceNum = *pSliceNum; + int32_t iGomSize; + + //The default RC is Bit-rate mode[Yi], but need consider as below: + // Tuned to use max of mode0 and mode1 due can not refresh on this from rc mode changed outside, 8/16/2011 + // NOTE: GOM_ROW_MODE0_?P is integer multipler of GOM_ROW_MODE1_?P, which predefined at rc.h there, so GOM_ROM take MODE0 as the initial + if (kiMbWidth <= MB_WIDTH_THRESHOLD_90P) + iGomSize = kiMbWidth * GOM_ROW_MODE0_90P; + else if (kiMbWidth <= MB_WIDTH_THRESHOLD_180P) + iGomSize = kiMbWidth * GOM_ROW_MODE0_180P; + else if (kiMbWidth <= MB_WIDTH_THRESHOLD_360P) + iGomSize = kiMbWidth * GOM_ROW_MODE0_360P; + else + iGomSize = kiMbWidth * GOM_ROW_MODE0_720P; + + while (true) { + if (kiCountNumMb < iGomSize * (int32_t) iSliceNum) { + -- iSliceNum; + iSliceNum = iSliceNum - (iSliceNum & 0x01); // verfiy even num for multiple slices case + if (iSliceNum < 2) // for safe + break; + continue; + } + break; + } + + if (*pSliceNum != iSliceNum) { + *pSliceNum = (0 != iSliceNum) ? iSliceNum : 1; + return false; + } + return true; +} + + +// GOM based RC related for uiSliceMbNum decision, only used at SM_FIXEDSLCNUM_SLICE +bool GomValidCheckSliceMbNum (const int32_t kiMbWidth, const int32_t kiMbHeight, SSliceArgument* pSliceArg) { + uint32_t* pSlicesAssignList = & (pSliceArg->uiSliceMbNum[0]); + const uint32_t kuiSliceNum = pSliceArg->uiSliceNum; + const int32_t kiMbNumInFrame = kiMbWidth * kiMbHeight; + const int32_t kiMbNumPerSlice = kiMbNumInFrame / kuiSliceNum; + int32_t iNumMbLeft = kiMbNumInFrame; + + int32_t iMinimalMbNum = kiMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required + int32_t iMaximalMbNum = 0; // dynamically assign later + int32_t iGomSize; + + uint32_t uiSliceIdx = 0; // for test + + // The default RC is Bit-rate mode [Yi], but need consider as below: + // Tuned to use max of mode0 and mode1 due can not refresh on this from rc mode changed outside, 8/16/2011 + // NOTE: GOM_ROW_MODE0_?P is integer multipler of GOM_ROW_MODE1_?P, which predefined at rc.h there, so GOM_ROM take MODE0 as the initial + if (kiMbWidth <= MB_WIDTH_THRESHOLD_90P) + iGomSize = kiMbWidth * GOM_ROW_MODE0_90P; + else if (kiMbWidth <= MB_WIDTH_THRESHOLD_180P) + iGomSize = kiMbWidth * GOM_ROW_MODE0_180P; + else if (kiMbWidth <= MB_WIDTH_THRESHOLD_360P) + iGomSize = kiMbWidth * GOM_ROW_MODE0_360P; + else + iGomSize = kiMbWidth * GOM_ROW_MODE0_720P; + // GOM boundary aligned + int32_t iNumMbAssigning = WELS_DIV_ROUND (INT_MULTIPLY * kiMbNumPerSlice, iGomSize * INT_MULTIPLY) * iGomSize; + int32_t iCurNumMbAssigning = 0; + + iMinimalMbNum = iGomSize; + while (uiSliceIdx + 1 < kuiSliceNum) { + iMaximalMbNum = iNumMbLeft - (kuiSliceNum - uiSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts + // make sure one GOM at least in each slice for safe + if (iNumMbAssigning < iMinimalMbNum) + iCurNumMbAssigning = iMinimalMbNum; + else if (iNumMbAssigning > iMaximalMbNum) + iCurNumMbAssigning = (iMaximalMbNum / iGomSize) * iGomSize; + else + iCurNumMbAssigning = iNumMbAssigning; + + if (iCurNumMbAssigning <= 0) { + return false; + } + + iNumMbLeft -= iCurNumMbAssigning; + if (iNumMbLeft <= 0) { + return false; + } + + pSlicesAssignList[uiSliceIdx] = iCurNumMbAssigning; + ++ uiSliceIdx; + } + pSlicesAssignList[uiSliceIdx] = iNumMbLeft; + if (iNumMbLeft < iMinimalMbNum) { + return false; + } + + return true; +} + + +/*! + * Get slice count for multiple slice segment + * + */ +int32_t GetInitialSliceNum (SSliceArgument* pSliceArgument) { + if (NULL == pSliceArgument) + return -1; + + switch (pSliceArgument->uiSliceMode) { + case SM_SINGLE_SLICE: + case SM_FIXEDSLCNUM_SLICE: + case SM_RASTER_SLICE: { + return pSliceArgument->uiSliceNum; + } + case SM_SIZELIMITED_SLICE: { + return AVERSLICENUM_CONSTRAINT;//at the beginning of dynamic slicing, set the uiSliceNum to be 1 + } + case SM_RESERVED: + default: { + return -1; + } + } + + return -1; +} + +/*! + * \brief Initialize slice segment (Single/multiple slices) + * + * \param pCurDq current layer which its SSlice segment will be initialized + * \param uiSliceMode SSlice mode + * \param multi_slice_argv Multiple slices argument + * \param iMbWidth MB width + * \param iMbHeight MB height + * + * \return 0 - successful; none 0 - failed; + */ +int32_t InitSliceSegment (SDqLayer* pCurDq, + CMemoryAlign* pMa, + SSliceArgument* pSliceArgument, + const int32_t kiMbWidth, + const int32_t kiMbHeight) { + SSliceCtx* pSliceSeg = &pCurDq->sSliceEncCtx; + const int32_t kiCountMbNum = kiMbWidth * kiMbHeight; + SliceModeEnum uiSliceMode = SM_SINGLE_SLICE; + + if (NULL == pSliceSeg || NULL == pSliceArgument || kiMbWidth == 0 || kiMbHeight == 0) + return 1; + + uiSliceMode = pSliceArgument->uiSliceMode; + if (pSliceSeg->iMbNumInFrame == kiCountMbNum && pSliceSeg->iMbWidth == kiMbWidth + && pSliceSeg->iMbHeight == kiMbHeight && pSliceSeg->uiSliceMode == uiSliceMode && pSliceSeg->pOverallMbMap != NULL) + return 0; + else if (pSliceSeg->iMbNumInFrame != kiCountMbNum) { + if (NULL != pSliceSeg->pOverallMbMap) { + pMa->WelsFree (pSliceSeg->pOverallMbMap, "pSliceSeg->pOverallMbMap"); + + pSliceSeg->pOverallMbMap = NULL; + } + + // just for safe + pSliceSeg->iSliceNumInFrame = 0; + pSliceSeg->iMbNumInFrame = 0; + pSliceSeg->iMbWidth = 0; + pSliceSeg->iMbHeight = 0; + pSliceSeg->uiSliceMode = SM_SINGLE_SLICE; // sigle in default + } + + if (SM_SINGLE_SLICE == uiSliceMode) { + pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMallocz (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap"); + + WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap) + pSliceSeg->iSliceNumInFrame = 1; + + pSliceSeg->uiSliceMode = uiSliceMode; + pSliceSeg->iMbWidth = kiMbWidth; + pSliceSeg->iMbHeight = kiMbHeight; + pSliceSeg->iMbNumInFrame = kiCountMbNum; + + return AssignMbMapSingleSlice (pSliceSeg->pOverallMbMap, kiCountMbNum, sizeof (pSliceSeg->pOverallMbMap[0])); + } else { //if ( SM_MULTIPLE_SLICE == uiSliceMode ) + if (uiSliceMode != SM_FIXEDSLCNUM_SLICE && uiSliceMode != SM_RASTER_SLICE + && uiSliceMode != SM_SIZELIMITED_SLICE) + return 1; + + pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMallocz (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap"); + WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap) + + WelsSetMemMultiplebytes_c (pSliceSeg->pOverallMbMap, 0, kiCountMbNum, sizeof (uint16_t)); + + //SM_SIZELIMITED_SLICE: init, set pSliceSeg->iSliceNumInFrame = 1; + pSliceSeg->iSliceNumInFrame = GetInitialSliceNum (pSliceArgument); + if (-1 == pSliceSeg->iSliceNumInFrame) + return 1; + + pSliceSeg->uiSliceMode = pSliceArgument->uiSliceMode; + + pSliceSeg->iMbWidth = kiMbWidth; + pSliceSeg->iMbHeight = kiMbHeight; + pSliceSeg->iMbNumInFrame = kiCountMbNum; + if (SM_SIZELIMITED_SLICE == pSliceArgument->uiSliceMode) { + if (0 < pSliceArgument->uiSliceSizeConstraint) { + pSliceSeg->uiSliceSizeConstraint = pSliceArgument->uiSliceSizeConstraint; + } else { + return 1; + } + } else { + pSliceSeg->uiSliceSizeConstraint = DEFAULT_MAXPACKETSIZE_CONSTRAINT; + } + // about "iMaxSliceNumConstraint" + //only used in SM_SIZELIMITED_SLICE mode so far, + //now follows NAL_UNIT_CONSTRAINT, (see definition) + //will be adjusted under MT if there is limitation on iLayerNum + pSliceSeg->iMaxSliceNumConstraint = MAX_SLICES_NUM; + + + return AssignMbMapMultipleSlices (pCurDq, pSliceArgument); + } + return 0; +} + +/*! + * \brief Uninitialize slice segment (Single/multiple slices) + * + * \param pCurDq current layer which its SSlice segment will be uninitialized + * + * \return none; + */ +void UninitSliceSegment (SDqLayer* pCurDq, CMemoryAlign* pMa) { + SSliceCtx* pSliceSeg = &pCurDq->sSliceEncCtx; + if (NULL != pSliceSeg) { + if (NULL != pSliceSeg->pOverallMbMap) { + pMa->WelsFree (pSliceSeg->pOverallMbMap, "pSliceSeg->pOverallMbMap"); + + pSliceSeg->pOverallMbMap = NULL; + } + + pSliceSeg->uiSliceMode = SM_SINGLE_SLICE; // single in default + pSliceSeg->iMbWidth = 0; + pSliceSeg->iMbHeight = 0; + pSliceSeg->iSliceNumInFrame = 0; + pSliceSeg->iMbNumInFrame = 0; + pSliceSeg->uiSliceSizeConstraint = 0; + pSliceSeg->iMaxSliceNumConstraint = 0; + } +} + + +/*! + * \brief Initialize Wels SSlice context (Single/multiple slices and FMO) + * + * \param pCurDq current layer which its SSlice context will be initialized + * \param bFmoUseFlag flag of using fmo + * \param iMbWidth MB width + * \param iMbHeight MB height + * \param uiSliceMode slice mode + * \param mul_slice_arg argument for multiple slice if it is applicable + * \param pPpsArg argument for pPps parameter + * + * \return 0 - successful; none 0 - failed; + */ +int32_t InitSlicePEncCtx (SDqLayer* pCurDq, + CMemoryAlign* pMa, + bool bFmoUseFlag, + int32_t iMbWidth, + int32_t iMbHeight, + SSliceArgument* pSliceArgument, + void* pPpsArg) { + if (NULL == pCurDq) + return 1; + + InitSliceSegment (pCurDq, + pMa, + pSliceArgument, + iMbWidth, + iMbHeight); + return 0; +} + + +/*! + * \brief Uninitialize Wels SSlice context (Single/multiple slices and FMO) + * + * \param pCurDq current layer which its SSlice context will be initialized + * + * \return NONE; + */ +void UninitSlicePEncCtx (SDqLayer* pCurDq, CMemoryAlign* pMa) { + if (NULL != pCurDq) { + UninitSliceSegment (pCurDq, pMa); + } +} + +/*! + * \brief Get slice idc for given iMbXY (apply in Single/multiple slices and FMO) + * + * \param pCurDq current layer info + * \param kiMbXY MB xy index + * + * \return uiSliceIdc - successful; -1 - failed; + */ +uint16_t WelsMbToSliceIdc (SDqLayer* pCurDq, const int32_t kiMbXY) { + if (NULL == pCurDq) + return (uint16_t) (-1); + + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + if (NULL != pSliceCtx && kiMbXY < pSliceCtx->iMbNumInFrame && kiMbXY >= 0) + return pSliceCtx->pOverallMbMap[ kiMbXY ]; + return (uint16_t) (-1); +} + +/*! + * \brief Get first mb in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pCurLayer current layer + * \param kuiSliceIdc slice idc + * + * \return iFirstMb - successful; -1 - failed; + */ +int32_t WelsGetFirstMbOfSlice (SDqLayer* pCurLayer, const int32_t kuiSliceIdc) { + if (NULL == pCurLayer || NULL == pCurLayer->pFirstMbIdxOfSlice) { + return -1; + } + + return pCurLayer->pFirstMbIdxOfSlice[kuiSliceIdc]; +} + +/*! + * \brief Get successive mb to be processed in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pCurDq current layer info + * \param kiMbXY MB xy index + * + * \return next_mb - successful; -1 - failed; + */ +int32_t WelsGetNextMbOfSlice (SDqLayer* pCurDq, const int32_t kiMbXY) { + if (NULL != pCurDq) { + SSliceCtx* pSliceSeg = &pCurDq->sSliceEncCtx; + if (NULL == pSliceSeg || kiMbXY < 0 || kiMbXY >= pSliceSeg->iMbNumInFrame) + return -1; + if (SM_SINGLE_SLICE == pSliceSeg->uiSliceMode) { + int32_t iNextMbIdx = kiMbXY; + ++ iNextMbIdx; + if (iNextMbIdx >= pSliceSeg->iMbNumInFrame) + iNextMbIdx = -1; + return iNextMbIdx; + } else { /*if ( SM_MULTIPLE_SLICE == pSliceSeg->uiSliceMode )*/ + if (SM_RESERVED != pSliceSeg->uiSliceMode) { + int32_t iNextMbIdx = kiMbXY; + ++ iNextMbIdx; + if (iNextMbIdx < pSliceSeg->iMbNumInFrame && pSliceSeg->pOverallMbMap != NULL + && pSliceSeg->pOverallMbMap[iNextMbIdx] == pSliceSeg->pOverallMbMap[ kiMbXY ]) + return iNextMbIdx; + return -1; + } else + return -1; // reserved here for other multiple slice type + } + } else + return -1; +} + +/*! + * \brief Get previous mb to be processed in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pCurDq current layer info + * \param kiMbXY MB xy index + * + * \return prev_mb - successful; -1 - failed; + */ +int32_t WelsGetPrevMbOfSlice (SDqLayer* pCurDq, const int32_t kiMbXY) { + if (NULL != pCurDq) { + SSliceCtx* pSliceSeg = &pCurDq->sSliceEncCtx; + if (NULL == pSliceSeg || kiMbXY < 0 || kiMbXY >= pSliceSeg->iMbNumInFrame) + return -1; + if (pSliceSeg->uiSliceMode == SM_SINGLE_SLICE) + return (-1 + kiMbXY); + else { /* if ( pSliceSeg->uiSliceMode == SM_MULTIPLE_SLICE )*/ + if (SM_RESERVED == pSliceSeg->uiSliceMode) { + int32_t iPrevMbIdx = kiMbXY; + -- iPrevMbIdx; + if (iPrevMbIdx >= 0 && iPrevMbIdx < pSliceSeg->iMbNumInFrame && NULL != pSliceSeg->pOverallMbMap + && pSliceSeg->pOverallMbMap[ kiMbXY ] == pSliceSeg->pOverallMbMap[ iPrevMbIdx ]) + return iPrevMbIdx; + return -1; + } else + return -1; + } + } else + return -1; +} + +/*! + * \brief Get number of mb in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO) + * + * \param pSliceCtx SSlice context + * \param pSlice slice which request slice num + * \param kuiSliceIdc slice/slice_group idc + * + * \return count_num_of_mb - successful; -1 - failed; + */ +int32_t WelsGetNumMbInSlice (SDqLayer* pCurDq, SSlice* pSlice, const int32_t kuiSliceIdc) { + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + bool bInValidFlag = false; + + if (NULL == pSliceCtx || NULL == pSlice || kuiSliceIdc < 0) { + return -1; + } + + bInValidFlag = ((SM_SINGLE_SLICE != pSliceCtx->uiSliceMode) && (kuiSliceIdc >= pSliceCtx->iSliceNumInFrame)) + || ((SM_SINGLE_SLICE == pSliceCtx->uiSliceMode) && (kuiSliceIdc > 0)); + if (bInValidFlag) { + return -1; + } + + return pSlice->iCountMbNumInSlice; +} + +int32_t GetCurrentSliceNum (const SDqLayer* pCurDq) { + const SSliceCtx* kpSliceCtx = &pCurDq->sSliceEncCtx; + return (kpSliceCtx != NULL) ? (kpSliceCtx->iSliceNumInFrame) : (-1); +} +int32_t DynamicAdjustSlicePEncCtxAll (SDqLayer* pCurDq, + int32_t* pRunLength) { + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + const int32_t iCountNumMbInFrame = pSliceCtx->iMbNumInFrame; + const int32_t iCountSliceNumInFrame = pSliceCtx->iSliceNumInFrame; + int32_t iSameRunLenFlag = 1; + int32_t iFirstMbIdx = 0; + int32_t iSliceIdx = 0; + + assert (iCountSliceNumInFrame <= MAX_THREADS_NUM); + + while (iSliceIdx < iCountSliceNumInFrame) { + if (pRunLength[iSliceIdx] != pCurDq->pFirstMbIdxOfSlice[iSliceIdx]) { + iSameRunLenFlag = 0; + break; + } + ++ iSliceIdx; + } + if (iSameRunLenFlag) { + return 1; // do not need adjust it due to same running length as before to save complexity + } + + iSliceIdx = 0; + do { + const int32_t kiSliceRun = pRunLength[iSliceIdx]; + pCurDq->pFirstMbIdxOfSlice[iSliceIdx] = iFirstMbIdx; + pCurDq->pCountMbNumInSlice[iSliceIdx] = kiSliceRun; + + WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdx, iSliceIdx, + kiSliceRun, sizeof (uint16_t)); + + iFirstMbIdx += kiSliceRun; + + ++ iSliceIdx; + } while (iSliceIdx < iCountSliceNumInFrame && iFirstMbIdx < iCountNumMbInFrame); + + return 0; +} + +int32_t DynamicMaxSliceNumConstraint (uint32_t uiMaximumNum, int32_t iConsumedNum, uint32_t iDulplicateTimes) { + return ((uiMaximumNum - iConsumedNum - 1) / iDulplicateTimes); +} + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_encode_mb.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_encode_mb.cpp new file mode 100644 index 000000000..040c74edd --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_encode_mb.cpp @@ -0,0 +1,386 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file encode_mb.c + * + * \brief Implementaion for pCurMb encoding + * + * \date 05/19/2009 Created + ************************************************************************************* + */ + + +#include "svc_encode_mb.h" +#include "encode_mb_aux.h" +#include "decode_mb_aux.h" +#include "ls_defines.h" + +namespace WelsEnc { +void WelsDctMb (int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4) { + pfDctFourT4 (pRes, pEncMb, iEncStride, pBestPred, 16); + pfDctFourT4 (pRes + 64, pEncMb + 8, iEncStride, pBestPred + 8, 16); + pfDctFourT4 (pRes + 128, pEncMb + 8 * iEncStride, iEncStride, pBestPred + 128, 16); + pfDctFourT4 (pRes + 192, pEncMb + 8 * iEncStride + 8, iEncStride, pBestPred + 136, 16); +} + +void WelsEncRecI16x16Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) { + ENFORCE_STACK_ALIGN_1D (int16_t, aDctT4Dc, 16, 16) + SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList; + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + const int32_t kiEncStride = pCurDqLayer->iEncStride[0]; + int16_t* pRes = pMbCache->pCoeffLevel; + uint8_t* pPred = pMbCache->SPicData.pCsMb[0]; + const int32_t kiRecStride = pCurDqLayer->iCsStride[0]; + int16_t* pBlock = pMbCache->pDct->iLumaBlock[0]; + uint8_t* pBestPred = pMbCache->pMemPredLuma; + const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[0]; + uint8_t i, uiQp = pCurMb->uiLumaQp; + uint32_t uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiCountI16x16Dc; + + const int16_t* pMF = g_kiQuantMF[uiQp]; + const int16_t* pFF = g_iQuantIntraFF[uiQp]; + + WelsDctMb (pRes, pMbCache->SPicData.pEncMb[0], kiEncStride, pBestPred, pEncCtx->pFuncList->pfDctFourT4); + + pFuncList->pfTransformHadamard4x4Dc (aDctT4Dc, pRes); + pFuncList->pfQuantizationDc4x4 (aDctT4Dc, pFF[0] << 1, pMF[0]>>1); + pFuncList->pfScan4x4 (pMbCache->pDct->iLumaI16x16Dc, aDctT4Dc); + uiCountI16x16Dc = pFuncList->pfGetNoneZeroCount (pMbCache->pDct->iLumaI16x16Dc); + + for (i = 0; i < 4; i++) { + pFuncList->pfQuantizationFour4x4 (pRes, pFF, pMF); + pFuncList->pfScan4x4Ac (pBlock, pRes); + pFuncList->pfScan4x4Ac (pBlock + 16, pRes + 16); + pFuncList->pfScan4x4Ac (pBlock + 32, pRes + 32); + pFuncList->pfScan4x4Ac (pBlock + 48, pRes + 48); + pRes += 64; + pBlock += 64; + } + pRes -= 256; + pBlock -= 256; + + for (i = 0; i < 16; i++) { + uiNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock); + pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount; + uiNoneZeroCountMbAc += uiNoneZeroCount; + pBlock += 16; + } + + if (uiCountI16x16Dc > 0) { + if (uiQp < 12) { + WelsIHadamard4x4Dc (aDctT4Dc); + WelsDequantLumaDc4x4 (aDctT4Dc, uiQp); + } else + pFuncList->pfDequantizationIHadamard4x4 (aDctT4Dc, g_kuiDequantCoeff[uiQp][0] >> 2); + } + + if (uiNoneZeroCountMbAc > 0) { + pCurMb->uiCbp = 15; + pFuncList->pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]); + pFuncList->pfDequantizationFour4x4 (pRes + 64, g_kuiDequantCoeff[uiQp]); + pFuncList->pfDequantizationFour4x4 (pRes + 128, g_kuiDequantCoeff[uiQp]); + pFuncList->pfDequantizationFour4x4 (pRes + 192, g_kuiDequantCoeff[uiQp]); + + pRes[0] = aDctT4Dc[0]; + pRes[16] = aDctT4Dc[1]; + pRes[32] = aDctT4Dc[4]; + pRes[48] = aDctT4Dc[5]; + pRes[64] = aDctT4Dc[2]; + pRes[80] = aDctT4Dc[3]; + pRes[96] = aDctT4Dc[6]; + pRes[112] = aDctT4Dc[7]; + pRes[128] = aDctT4Dc[8]; + pRes[144] = aDctT4Dc[9]; + pRes[160] = aDctT4Dc[12]; + pRes[176] = aDctT4Dc[13]; + pRes[192] = aDctT4Dc[10]; + pRes[208] = aDctT4Dc[11]; + pRes[224] = aDctT4Dc[14]; + pRes[240] = aDctT4Dc[15]; + + pFuncList->pfIDctFourT4 (pPred, kiRecStride, pBestPred, 16, pRes); + pFuncList->pfIDctFourT4 (pPred + 8, kiRecStride, pBestPred + 8, 16, pRes + 64); + pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8, kiRecStride, pBestPred + 128, 16, pRes + 128); + pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8 + 8, kiRecStride, pBestPred + 136, 16, pRes + 192); + } else if (uiCountI16x16Dc > 0) { + pFuncList->pfIDctI16x16Dc (pPred, kiRecStride, pBestPred, 16, aDctT4Dc); + } else { + pFuncList->pfCopy16x16Aligned (pPred, kiRecStride, pBestPred, 16); + } +} +void WelsEncRecI4x4Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, uint8_t uiI4x4Idx) { + SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList; + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + int32_t iEncStride = pCurDqLayer->iEncStride[0]; + uint8_t uiQp = pCurMb->uiLumaQp; + + int16_t* pResI4x4 = pMbCache->pCoeffLevel; + uint8_t* pPredI4x4; + + uint8_t* pPred = pMbCache->SPicData.pCsMb[0]; + int32_t iRecStride = pCurDqLayer->iCsStride[0]; + + uint32_t uiOffset = g_kuiMbCountScan4Idx[uiI4x4Idx]; + uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0]; + uint8_t* pBestPred = pMbCache->pBestPredI4x4Blk4; + int16_t* pBlock = pMbCache->pDct->iLumaBlock[uiI4x4Idx]; + + const int16_t* pMF = g_kiQuantMF[uiQp]; + const int16_t* pFF = g_iQuantIntraFF[uiQp]; + + int32_t* pStrideEncBlockOffset = pEncCtx->pStrideTab->pStrideEncBlockOffset[pEncCtx->uiDependencyId]; + int32_t* pStrideDecBlockOffset = pEncCtx->pStrideTab->pStrideDecBlockOffset[pEncCtx->uiDependencyId][0 == + pEncCtx->uiTemporalId]; + int32_t iNoneZeroCount = 0; + + pFuncList->pfDctT4 (pResI4x4, & (pEncMb[pStrideEncBlockOffset[uiI4x4Idx]]), iEncStride, pBestPred, 4); + pFuncList->pfQuantization4x4 (pResI4x4, pFF, pMF); + pFuncList->pfScan4x4 (pBlock, pResI4x4); + + iNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock); + pCurMb->pNonZeroCount[uiOffset] = iNoneZeroCount; + + pPredI4x4 = pPred + pStrideDecBlockOffset[uiI4x4Idx]; + if (iNoneZeroCount > 0) { + pCurMb->uiCbp |= 1 << (uiI4x4Idx >> 2); + pFuncList->pfDequantization4x4 (pResI4x4, g_kuiDequantCoeff[uiQp]); + pFuncList->pfIDctT4 (pPredI4x4, iRecStride, pBestPred, 4, pResI4x4); + } else + pFuncList->pfCopy4x4 (pPredI4x4, iRecStride, pBestPred, 4); +} + +void WelsEncInterY (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) { + PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max; + PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8; + PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64; + PScanFunc pfScan4x4 = pFuncList->pfScan4x4; + PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4; + PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount; + PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4; + int16_t* pRes = pMbCache->pCoeffLevel; + int32_t iSingleCtrMb = 0, iSingleCtr8x8[4]; + int16_t* pBlock = pMbCache->pDct->iLumaBlock[0]; + uint8_t uiQp = pCurMb->uiLumaQp; + const int16_t* pMF = g_kiQuantMF[uiQp]; + const int16_t* pFF = g_kiQuantInterFF[uiQp]; + int16_t aMax[16]; + int32_t i, j, iNoneZeroCountMbDcAc = 0, iNoneZeroCount = 0; + + for (i = 0; i < 4; i++) { + pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax + (i << 2)); + iSingleCtr8x8[i] = 0; + for (j = 0; j < 4; j++) { + if (aMax[ (i << 2) + j] == 0) + pfSetMemZeroSize8 (pBlock, 32); + else { + pfScan4x4 (pBlock, pRes); + if (aMax[ (i << 2) + j] > 1) + iSingleCtr8x8[i] += 9; + else if (iSingleCtr8x8[i] < 6) + iSingleCtr8x8[i] += pfCalculateSingleCtr4x4 (pBlock); + } + pRes += 16; + pBlock += 16; + } + iSingleCtrMb += iSingleCtr8x8[i]; + } + pBlock -= 256; + pRes -= 256; + + memset (pCurMb->pNonZeroCount, 0, 16); + + + if (iSingleCtrMb < 6) { //from JVT-O079 + iNoneZeroCountMbDcAc = 0; + pfSetMemZeroSize64 (pRes, 768); // confirmed_safe_unsafe_usage + } else { + const uint8_t* kpNoneZeroCountIdx = g_kuiMbCountScan4Idx; + for (i = 0; i < 4; i++) { + if (iSingleCtr8x8[i] >= 4) { + for (j = 0; j < 4; j++) { + iNoneZeroCount = pfGetNoneZeroCount (pBlock); + pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = iNoneZeroCount; + iNoneZeroCountMbDcAc += iNoneZeroCount; + pBlock += 16; + } + pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]); + pCurMb->uiCbp |= 1 << i; + } else { // set zero for an 8x8 pBlock + pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage + kpNoneZeroCountIdx += 4; + pBlock += 64; + } + pRes += 64; + } + } +} + +void WelsEncRecUV (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache, int16_t* pRes, int32_t iUV) { + PQuantizationHadamardFunc pfQuantizationHadamard2x2 = pFuncList->pfQuantizationHadamard2x2; + PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max; + PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8; + PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64; + PScanFunc pfScan4x4Ac = pFuncList->pfScan4x4Ac; + PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4; + PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount; + PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4; + const int32_t kiInterFlag = !IS_INTRA (pCurMb->uiMbType); + const uint8_t kiQp = pCurMb->uiChromaQp; + uint8_t i, uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiNoneZeroCountMbDc = 0; + uint8_t uiNoneZeroCountOffset = (iUV - 1) << 1; //UV==1 or 2 + uint8_t uiSubMbIdx = 16 + ((iUV - 1) << 2); //uiSubMbIdx == 16 or 20 + int16_t* iChromaDc = pMbCache->pDct->iChromaDc[iUV - 1], *pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2]; + int16_t aDct2x2[4], j, aMax[4]; + int32_t iSingleCtr8x8 = 0; + const int16_t* pMF = g_kiQuantMF[kiQp]; + const int16_t* pFF = g_kiQuantInterFF[ (!kiInterFlag) * 6 + kiQp]; + + uiNoneZeroCountMbDc = pfQuantizationHadamard2x2 (pRes, pFF[0] << 1, pMF[0]>>1, aDct2x2, iChromaDc); + + pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax); + + for (j = 0; j < 4; j++) { + if (aMax[j] == 0) + pfSetMemZeroSize8 (pBlock, 32); + else { + pfScan4x4Ac (pBlock, pRes); + if (kiInterFlag) { + if (aMax[j] > 1) + iSingleCtr8x8 += 9; + else if (iSingleCtr8x8 < 7) + iSingleCtr8x8 += pfCalculateSingleCtr4x4 (pBlock); + } else + iSingleCtr8x8 = INT_MAX; + } + pRes += 16; + pBlock += 16; + } + pRes -= 64; + + if (iSingleCtr8x8 < 7) { //from JVT-O079 + pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage + ST16 (&pCurMb->pNonZeroCount[16 + uiNoneZeroCountOffset], 0); + ST16 (&pCurMb->pNonZeroCount[20 + uiNoneZeroCountOffset], 0); + } else { + const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[uiSubMbIdx]; + pBlock -= 64; + for (i = 0; i < 4; i++) { + uiNoneZeroCount = pfGetNoneZeroCount (pBlock); + pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount; + uiNoneZeroCountMbAc += uiNoneZeroCount; + pBlock += 16; + } + pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[pCurMb->uiChromaQp]); + pCurMb->uiCbp &= 0x0F; + pCurMb->uiCbp |= 0x20; + } + + if (uiNoneZeroCountMbDc > 0) { + WelsDequantIHadamard2x2Dc (aDct2x2, g_kuiDequantCoeff[kiQp][0]); + if (2 != (pCurMb->uiCbp >> 4)) + pCurMb->uiCbp |= (0x01 << 4) ; + pRes[0] = aDct2x2[0]; + pRes[16] = aDct2x2[1]; + pRes[32] = aDct2x2[2]; + pRes[48] = aDct2x2[3]; + } +} + + +void WelsRecPskip (SDqLayer* pCurLayer, SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) { + int32_t* iRecStride = pCurLayer->iCsStride; + uint8_t** pCsMb = &pMbCache->SPicData.pCsMb[0]; + + pFuncList->pfCopy16x16Aligned (pCsMb[0], *iRecStride++, pMbCache->pSkipMb, 16); + pFuncList->pfCopy8x8Aligned (pCsMb[1], *iRecStride++, pMbCache->pSkipMb + 256, 8); + pFuncList->pfCopy8x8Aligned (pCsMb[2], *iRecStride, pMbCache->pSkipMb + 320, 8); + pFuncList->pfSetMemZeroSize8 (pCurMb->pNonZeroCount, 24); +} + +bool WelsTryPYskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) { + int32_t iSingleCtrMb = 0; + int16_t* pRes = pMbCache->pCoeffLevel; + const uint8_t kuiQp = pCurMb->uiLumaQp; + + int16_t* pBlock = pMbCache->pDct->iLumaBlock[0]; + uint16_t aMax[4], i, j; + const int16_t* pMF = g_kiQuantMF[kuiQp]; + const int16_t* pFF = g_kiQuantInterFF[kuiQp]; + + for (i = 0; i < 4; i++) { + pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax); + + for (j = 0; j < 4; j++) { + if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP + else if (aMax[j] == 1) { + pEncCtx->pFuncList->pfScan4x4 (pBlock, pRes); // + iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock); + } + if (iSingleCtrMb >= 6) return false; //from JVT-O079 + pRes += 16; + pBlock += 16; + } + } + return true; +} + +bool WelsTryPUVskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, int32_t iUV) { + int16_t* pRes = ((iUV == 1) ? & (pMbCache->pCoeffLevel[256]) : & (pMbCache->pCoeffLevel[256 + 64])); + + const uint8_t kuiQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + + pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)]; + + const int16_t* pMF = g_kiQuantMF[kuiQp]; + const int16_t* pFF = g_kiQuantInterFF[kuiQp]; + + if (pEncCtx->pFuncList->pfQuantizationHadamard2x2Skip (pRes, pFF[0] << 1, pMF[0]>>1)) + return false; + else { + uint16_t aMax[4], j; + int32_t iSingleCtrMb = 0; + int16_t* pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2]; + pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax); + + for (j = 0; j < 4; j++) { + if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP + else if (aMax[j] == 1) { + pEncCtx->pFuncList->pfScan4x4Ac (pBlock, pRes); + iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock); + } + if (iSingleCtrMb >= 7) return false; //from JVT-O079 + pRes += 16; + pBlock += 16; + } + return true; + } +} + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp new file mode 100644 index 000000000..90139136f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp @@ -0,0 +1,2009 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_encode_slice.c + * + * \brief svc encoding slice + * + * \date 2009.07.27 Created + * + ************************************************************************************* + */ + +#include "ls_defines.h" +#include "svc_encode_slice.h" +#include "svc_enc_golomb.h" +#include "svc_base_layer_md.h" +#include "svc_encode_mb.h" +#include "svc_set_mb_syn.h" +#include "decode_mb_aux.h" +#include "svc_mode_decision.h" + +namespace WelsEnc { +//#define ENC_TRACE + +typedef int32_t (*PWelsCodingSliceFunc) (sWelsEncCtx* pCtx, SSlice* pSlice); +typedef void (*PWelsSliceHeaderWriteFunc) (sWelsEncCtx* pCtx, SBitStringAux* pBs, SDqLayer* pCurLayer, SSlice* pSlice, + IWelsParametersetStrategy* pParametersetStrategy); + +void UpdateNonZeroCountCache (SMB* pMb, SMbCache* pMbCache) { + ST32 (&pMbCache->iNonZeroCoeffCount[9], LD32 (&pMb->pNonZeroCount[ 0])); + ST32 (&pMbCache->iNonZeroCoeffCount[17], LD32 (&pMb->pNonZeroCount[ 4])); + ST32 (&pMbCache->iNonZeroCoeffCount[25], LD32 (&pMb->pNonZeroCount[ 8])); + ST32 (&pMbCache->iNonZeroCoeffCount[33], LD32 (&pMb->pNonZeroCount[12])); + + ST16 (&pMbCache->iNonZeroCoeffCount[14], LD16 (&pMb->pNonZeroCount[16])); + ST16 (&pMbCache->iNonZeroCoeffCount[38], LD16 (&pMb->pNonZeroCount[18])); + ST16 (&pMbCache->iNonZeroCoeffCount[22], LD16 (&pMb->pNonZeroCount[20])); + ST16 (&pMbCache->iNonZeroCoeffCount[46], LD16 (&pMb->pNonZeroCount[22])); +} + +void WelsSliceHeaderScalExtInit (SDqLayer* pCurLayer, SSlice* pSlice) { + SSliceHeaderExt* pSliceHeadExt = &pSlice->sSliceHeaderExt; + SNalUnitHeaderExt* pNalHeadExt = &pCurLayer->sLayerInfo.sNalHeaderExt; + + uint8_t uiDependencyId = pNalHeadExt->uiDependencyId; + + pSliceHeadExt->bSliceSkipFlag = false; + + if (uiDependencyId > 0) { //spatial EL + //bothe adaptive and default flags should equal to 0. + pSliceHeadExt->bAdaptiveBaseModeFlag = + pSliceHeadExt->bAdaptiveMotionPredFlag = + pSliceHeadExt->bAdaptiveResidualPredFlag = false; + + pSliceHeadExt->bDefaultBaseModeFlag = + pSliceHeadExt->bDefaultMotionPredFlag = + pSliceHeadExt->bDefaultResidualPredFlag = false; + } +} + +void WelsSliceHeaderExtInit (sWelsEncCtx* pEncCtx, SDqLayer* pCurLayer, SSlice* pSlice) { + SSliceHeaderExt* pCurSliceExt = &pSlice->sSliceHeaderExt; + SSliceHeader* pCurSliceHeader = &pCurSliceExt->sSliceHeader; + SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; + pCurSliceHeader->eSliceType = pEncCtx->eSliceType; + + pCurSliceExt->bStoreRefBasePicFlag = false; + + pCurSliceHeader->iFrameNum = pParamInternal->iFrameNum; + pCurSliceHeader->uiIdrPicId = pParamInternal->uiIdrPicId; + pCurSliceHeader->iPicOrderCntLsb = pEncCtx->pEncPic->iFramePoc; // 0 + + if (P_SLICE == pEncCtx->eSliceType) { + pCurSliceHeader->uiNumRefIdxL0Active = 1; + if (pCurSliceHeader->uiRefCount > 0 && + pCurSliceHeader->uiRefCount <= pCurLayer->sLayerInfo.pSpsP->iNumRefFrames) { + pCurSliceHeader->bNumRefIdxActiveOverrideFlag = true; + pCurSliceHeader->uiNumRefIdxL0Active = pCurSliceHeader->uiRefCount; + } + //to solve mismatch between debug&release + else { + pCurSliceHeader->bNumRefIdxActiveOverrideFlag = false; + } + } + + pCurSliceHeader->iSliceQpDelta = pEncCtx->iGlobalQp - pCurLayer->sLayerInfo.pPpsP->iPicInitQp; + + //for deblocking initial + pCurSliceHeader->uiDisableDeblockingFilterIdc = pCurLayer->iLoopFilterDisableIdc; + pCurSliceHeader->iSliceAlphaC0Offset = + pCurLayer->iLoopFilterAlphaC0Offset; // need update iSliceAlphaC0Offset & iSliceBetaOffset for pSlice-header if loop_filter_idc != 1 + pCurSliceHeader->iSliceBetaOffset = pCurLayer->iLoopFilterBetaOffset; + pCurSliceExt->uiDisableInterLayerDeblockingFilterIdc = pCurLayer->uiDisableInterLayerDeblockingFilterIdc; + + if (pSlice->bSliceHeaderExtFlag) { + WelsSliceHeaderScalExtInit (pCurLayer, pSlice); + } else { + //both adaptive and default flags should equal to 0. + pCurSliceExt->bAdaptiveBaseModeFlag = + pCurSliceExt->bAdaptiveMotionPredFlag = + pCurSliceExt->bAdaptiveResidualPredFlag = false; + + pCurSliceExt->bDefaultBaseModeFlag = + pCurSliceExt->bDefaultMotionPredFlag = + pCurSliceExt->bDefaultResidualPredFlag = false; + } +} + + +void UpdateMbNeighbor (SDqLayer* pCurDq, SMB* pMb, const int32_t kiMbWidth, uint16_t uiSliceIdc) { + uint32_t uiNeighborAvailFlag = 0; + const int32_t kiMbXY = pMb->iMbXY; + const int32_t kiMbX = pMb->iMbX; + const int32_t kiMbY = pMb->iMbY; + bool bLeft; + bool bTop; + bool bLeftTop; + bool bRightTop; + int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY; + + pMb->uiSliceIdc = uiSliceIdc; + iLeftXY = kiMbXY - 1; + iTopXY = kiMbXY - kiMbWidth; + iLeftTopXY = iTopXY - 1; + iRightTopXY = iTopXY + 1; + + bLeft = (kiMbX > 0) && (uiSliceIdc == WelsMbToSliceIdc (pCurDq, iLeftXY)); + bTop = (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pCurDq, iTopXY)); + bLeftTop = (kiMbX > 0) && (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pCurDq, iLeftTopXY)); + bRightTop = (kiMbX < (kiMbWidth - 1)) && (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pCurDq, iRightTopXY)); + + if (bLeft) { + uiNeighborAvailFlag |= LEFT_MB_POS; + } + if (bTop) { + uiNeighborAvailFlag |= TOP_MB_POS; + } + if (bLeftTop) { + uiNeighborAvailFlag |= TOPLEFT_MB_POS; + } + if (bRightTop) { + uiNeighborAvailFlag |= TOPRIGHT_MB_POS; + } + pMb->uiNeighborAvail = (uint8_t)uiNeighborAvailFlag; +} + +/* count MB types if enabled FRAME_INFO_OUTPUT*/ +#if defined(MB_TYPES_CHECK) +void WelsCountMbType (int32_t (*iMbCount)[18], const EWelsSliceType keSt, const SMB* kpMb) { + if (NULL == iMbCount) + return; + + switch (kpMb->uiMbType) { + case MB_TYPE_INTRA4x4: + ++ iMbCount[keSt][Intra4x4]; + break; + case MB_TYPE_INTRA16x16: + ++ iMbCount[keSt][Intra16x16]; + break; + case MB_TYPE_SKIP: + ++ iMbCount[keSt][PSkip]; + break; + case MB_TYPE_16x16: + ++ iMbCount[keSt][Inter16x16]; + break; + case MB_TYPE_16x8: + ++ iMbCount[keSt][Inter16x8]; + break; + case MB_TYPE_8x16: + ++ iMbCount[keSt][Inter8x16]; + break; + case MB_TYPE_8x8: + ++ iMbCount[keSt][Inter8x8]; + break; + case MB_TYPE_INTRA_BL: + ++ iMbCount[keSt][7]; + break; + default: + break; + } +} +#endif//MB_TYPES_CHECK + +/*! +* \brief write reference picture list on reordering syntax in Slice header +*/ +void WriteReferenceReorder (SBitStringAux* pBs, SSliceHeader* sSliceHeader) { + SRefPicListReorderSyntax* pRefOrdering = &sSliceHeader->sRefReordering; + uint8_t eSliceType = sSliceHeader->eSliceType % 5; + int16_t n = 0; + + if (I_SLICE != eSliceType && SI_SLICE != eSliceType) { // !I && !SI + BsWriteOneBit (pBs, true); +// { + uint16_t uiReorderingOfPicNumsIdc; + do { + uiReorderingOfPicNumsIdc = pRefOrdering->SReorderingSyntax[n].uiReorderingOfPicNumsIdc; + BsWriteUE (pBs, uiReorderingOfPicNumsIdc); + if (0 == uiReorderingOfPicNumsIdc || 1 == uiReorderingOfPicNumsIdc) + BsWriteUE (pBs, pRefOrdering->SReorderingSyntax[n].uiAbsDiffPicNumMinus1); + else if (2 == uiReorderingOfPicNumsIdc) + BsWriteUE (pBs, pRefOrdering->SReorderingSyntax[n].iLongTermPicNum); + + n ++; + } while (3 != uiReorderingOfPicNumsIdc); +// } + } +} + +/*! +* \brief write reference picture marking syntax in pSlice header +*/ +void WriteRefPicMarking (SBitStringAux* pBs, SSliceHeader* pSliceHeader, SNalUnitHeaderExt* pNalHdrExt) { + SRefPicMarking* sRefMarking = &pSliceHeader->sRefMarking; + int16_t n = 0; + + if (pNalHdrExt->bIdrFlag) { + BsWriteOneBit (pBs, sRefMarking->bNoOutputOfPriorPicsFlag); + BsWriteOneBit (pBs, sRefMarking->bLongTermRefFlag); + } else { + BsWriteOneBit (pBs, sRefMarking->bAdaptiveRefPicMarkingModeFlag); + + if (sRefMarking->bAdaptiveRefPicMarkingModeFlag) { + int32_t iMmcoType; + do { + iMmcoType = sRefMarking->SMmcoRef[n].iMmcoType; + BsWriteUE (pBs, iMmcoType); + if (1 == iMmcoType || 3 == iMmcoType) + BsWriteUE (pBs, sRefMarking->SMmcoRef[n].iDiffOfPicNum - 1); + + if (2 == iMmcoType) + BsWriteUE (pBs, sRefMarking->SMmcoRef[n].iLongTermPicNum); + + if (3 == iMmcoType || 6 == iMmcoType) + BsWriteUE (pBs, sRefMarking->SMmcoRef[n].iLongTermFrameIdx); + + if (4 == iMmcoType) + BsWriteUE (pBs, sRefMarking->SMmcoRef[n].iMaxLongTermFrameIdx + 1); + + n ++; + } while (0 != iMmcoType); + } + + } +} + +void WelsSliceHeaderWrite (sWelsEncCtx* pCtx, SBitStringAux* pBs, SDqLayer* pCurLayer, SSlice* pSlice, + IWelsParametersetStrategy* pParametersetStrategy) { + SWelsSPS* pSps = pCurLayer->sLayerInfo.pSpsP; + SWelsPPS* pPps = pCurLayer->sLayerInfo.pPpsP; + SSliceHeader* pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + SNalUnitHeaderExt* pNalHead = &pCurLayer->sLayerInfo.sNalHeaderExt; + + BsWriteUE (pBs, pSliceHeader->iFirstMbInSlice); + BsWriteUE (pBs, pSliceHeader->eSliceType); /* same type things */ + + BsWriteUE (pBs, pSliceHeader->pPps->iPpsId + pParametersetStrategy->GetPpsIdOffset (pSliceHeader->pPps->iPpsId)); + + BsWriteBits (pBs, pSps->uiLog2MaxFrameNum, pSliceHeader->iFrameNum); + + if (pNalHead->bIdrFlag) { /* NAL IDR */ + BsWriteUE (pBs, pSliceHeader->uiIdrPicId); + } + + BsWriteBits (pBs, pSps->iLog2MaxPocLsb, pSliceHeader->iPicOrderCntLsb); + + if (P_SLICE == pSliceHeader->eSliceType) { + BsWriteOneBit (pBs, pSliceHeader->bNumRefIdxActiveOverrideFlag); + if (pSliceHeader->bNumRefIdxActiveOverrideFlag) { + BsWriteUE (pBs, WELS_CLIP3 (pSliceHeader->uiNumRefIdxL0Active - 1, 0, MAX_REF_PIC_COUNT)); + } + } + + if (!pNalHead->bIdrFlag) + WriteReferenceReorder (pBs, pSliceHeader); + + if (pNalHead->sNalUnitHeader.uiNalRefIdc) { + WriteRefPicMarking (pBs, pSliceHeader, pNalHead); + } + + if (pPps->bEntropyCodingModeFlag && pSliceHeader->eSliceType != I_SLICE) { + BsWriteUE (pBs, pSlice->iCabacInitIdc); + } + BsWriteSE (pBs, pSliceHeader->iSliceQpDelta); /* pSlice qp delta */ + + if (pPps->bDeblockingFilterControlPresentFlag) { + switch (pSliceHeader->uiDisableDeblockingFilterIdc) { + case 0: + case 3: + case 4: + case 6: + BsWriteUE (pBs, 0); + break; + case 1: + BsWriteUE (pBs, 1); + break; + case 2: + case 5: + BsWriteUE (pBs, 2); + break; + default: + WelsLog (&pCtx->sLogCtx, WELS_LOG_ERROR, "Invalid uiDisableDeblockingFilterIdc %d", + pSliceHeader->uiDisableDeblockingFilterIdc); + break; + } + if (1 != pSliceHeader->uiDisableDeblockingFilterIdc) { + BsWriteSE (pBs, pSliceHeader->iSliceAlphaC0Offset >> 1); + BsWriteSE (pBs, pSliceHeader->iSliceBetaOffset >> 1); + } + } +} + +void WelsSliceHeaderExtWrite (sWelsEncCtx* pCtx, SBitStringAux* pBs, SDqLayer* pCurLayer, SSlice* pSlice, + IWelsParametersetStrategy* pParametersetStrategy) { + SWelsSPS* pSps = pCurLayer->sLayerInfo.pSpsP; + SWelsPPS* pPps = pCurLayer->sLayerInfo.pPpsP; + SSubsetSps* pSubSps = pCurLayer->sLayerInfo.pSubsetSpsP; + SSliceHeaderExt* pSliceHeadExt = &pSlice->sSliceHeaderExt; + SSliceHeader* pSliceHeader = &pSliceHeadExt->sSliceHeader; + SNalUnitHeaderExt* pNalHead = &pCurLayer->sLayerInfo.sNalHeaderExt; + + BsWriteUE (pBs, pSliceHeader->iFirstMbInSlice); + BsWriteUE (pBs, pSliceHeader->eSliceType); /* same type things */ + + BsWriteUE (pBs, pSliceHeader->pPps->iPpsId + + pParametersetStrategy->GetPpsIdOffset (pSliceHeader->pPps->iPpsId)); + + BsWriteBits (pBs, pSps->uiLog2MaxFrameNum, pSliceHeader->iFrameNum); + + if (pNalHead->bIdrFlag) { /* NAL IDR */ + BsWriteUE (pBs, pSliceHeader->uiIdrPicId); + } + + BsWriteBits (pBs, pSps->iLog2MaxPocLsb, pSliceHeader->iPicOrderCntLsb); +// { + if (P_SLICE == pSliceHeader->eSliceType) { + BsWriteOneBit (pBs, pSliceHeader->bNumRefIdxActiveOverrideFlag); + if (pSliceHeader->bNumRefIdxActiveOverrideFlag) { + BsWriteUE (pBs, WELS_CLIP3 (pSliceHeader->uiNumRefIdxL0Active - 1, 0, MAX_REF_PIC_COUNT)); + } + } + + if (!pNalHead->bIdrFlag) + WriteReferenceReorder (pBs, pSliceHeader); + + if (pNalHead->sNalUnitHeader.uiNalRefIdc) { + WriteRefPicMarking (pBs, pSliceHeader, pNalHead); + + if (!pSubSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) { + BsWriteOneBit (pBs, pSliceHeadExt->bStoreRefBasePicFlag); + } + } +// } + + + if (pPps->bEntropyCodingModeFlag && pSliceHeader->eSliceType != I_SLICE) { + BsWriteUE (pBs, pSlice->iCabacInitIdc); + } + + BsWriteSE (pBs, pSliceHeader->iSliceQpDelta); /* pSlice qp delta */ + + if (pPps->bDeblockingFilterControlPresentFlag) { + BsWriteUE (pBs, pSliceHeader->uiDisableDeblockingFilterIdc); + if (1 != pSliceHeader->uiDisableDeblockingFilterIdc) { + BsWriteSE (pBs, pSliceHeader->iSliceAlphaC0Offset >> 1); + BsWriteSE (pBs, pSliceHeader->iSliceBetaOffset >> 1); + } + } + +#if !defined(DISABLE_FMO_FEATURE) + if (pPps->uiNumSliceGroups > 1 && + pPps->uiSliceGroupMapType >= 3 && + pPps->uiSliceGroupMapType <= 5) { + int32_t iNumBits; + if (pPps->uiSliceGroupChangeRate) { + iNumBits = WELS_CEILLOG2 (1 + pPps->uiPicSizeInMapUnits / pPps->uiSliceGroupChangeRate); + BsWriteBits (pBs, iNumBits, pSliceHeader->iSliceGroupChangeCycle); + } + } +#endif//!DISABLE_FMO_FEATURE + + if (false) { + BsWriteOneBit (pBs, pSliceHeadExt->bSliceSkipFlag); + if (pSliceHeadExt->bSliceSkipFlag) { + BsWriteUE (pBs, pSliceHeadExt->uiNumMbsInSlice - 1); + } else { + BsWriteOneBit (pBs, pSliceHeadExt->bAdaptiveBaseModeFlag); + if (!pSliceHeadExt->bAdaptiveBaseModeFlag) { + BsWriteOneBit (pBs, pSliceHeadExt->bDefaultBaseModeFlag); + } + + if (!pSliceHeadExt->bDefaultBaseModeFlag) { + BsWriteOneBit (pBs, 0); + BsWriteOneBit (pBs, 0); + } + + BsWriteOneBit (pBs, pSliceHeadExt->bAdaptiveResidualPredFlag); + if (!pSliceHeadExt->bAdaptiveResidualPredFlag) { + BsWriteOneBit (pBs, 0); + } + } + if (1 == pSubSps->sSpsSvcExt.bAdaptiveTcoeffLevelPredFlag) { + BsWriteOneBit (pBs, pSliceHeadExt->bTcoeffLevelPredFlag); + } + + } + + if (!pSubSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) { + BsWriteBits (pBs, 4, 0); + BsWriteBits (pBs, 4, 15); + } +} + +//only BaseLayer inter MB and SpatialLayer (uiQualityId = 0) inter MB calling this pFunc. +//only for inter part +void WelsInterMbEncode (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + + WelsDctMb (pMbCache->pCoeffLevel, pMbCache->SPicData.pEncMb[0], pEncCtx->pCurDqLayer->iEncStride[0], + pMbCache->pMemPredLuma, pEncCtx->pFuncList->pfDctFourT4); + WelsEncInterY (pEncCtx->pFuncList, pCurMb, pMbCache); +} + + +//only BaseLayer inter MB and SpatialLayer (uiQualityId = 0) inter MB calling this pFunc. +//only for I SSlice +void WelsIMbChromaEncode (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + const int32_t kiEncStride = pCurLayer->iEncStride[1]; + const int32_t kiCsStride = pCurLayer->iCsStride[1]; + int16_t* pCurRS = pMbCache->pCoeffLevel; + uint8_t* pBestPred = pMbCache->pBestPredIntraChroma; + uint8_t* pCsCb = pMbCache->SPicData.pCsMb[1]; + uint8_t* pCsCr = pMbCache->SPicData.pCsMb[2]; + + //cb + pFunc->pfDctFourT4 (pCurRS, pMbCache->SPicData.pEncMb[1], kiEncStride, pBestPred, 8); + WelsEncRecUV (pFunc, pCurMb, pMbCache, pCurRS, 1); + pFunc->pfIDctFourT4 (pCsCb, kiCsStride, pBestPred, 8, pCurRS); + + //cr + pFunc->pfDctFourT4 (pCurRS + 64, pMbCache->SPicData.pEncMb[2], kiEncStride, pBestPred + 64, 8); + WelsEncRecUV (pFunc, pCurMb, pMbCache, pCurRS + 64, 2); + pFunc->pfIDctFourT4 (pCsCr, kiCsStride, pBestPred + 64, 8, pCurRS + 64); +} + + +//only BaseLayer inter MB and SpatialLayer (uiQualityId = 0) inter MB calling this pFunc. +//for P SSlice (intra part + inter part) +void WelsPMbChromaEncode (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + const int32_t kiEncStride = pCurLayer->iEncStride[1]; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + int16_t* pCurRS = pMbCache->pCoeffLevel + 256; + uint8_t* pBestPred = pMbCache->pMemPredChroma; + + pFunc->pfDctFourT4 (pCurRS, pMbCache->SPicData.pEncMb[1], kiEncStride, pBestPred, 8); + pFunc->pfDctFourT4 (pCurRS + 64, pMbCache->SPicData.pEncMb[2], kiEncStride, pBestPred + 64, 8); + + WelsEncRecUV (pFunc, pCurMb, pMbCache, pCurRS, 1); + WelsEncRecUV (pFunc, pCurMb, pMbCache, pCurRS + 64, 2); +} + +void OutputPMbWithoutConstructCsRsNoCopy (sWelsEncCtx* pCtx, SDqLayer* pDq, SSlice* pSlice, SMB* pMb) { + if ((IS_INTER (pMb->uiMbType) && !IS_SKIP (pMb->uiMbType)) + || IS_I_BL (pMb->uiMbType)) { //intra have been reconstructed, NO COPY from CS to pDecPic-- + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + uint8_t* pDecY = pMbCache->SPicData.pDecMb[0]; + uint8_t* pDecU = pMbCache->SPicData.pDecMb[1]; + uint8_t* pDecV = pMbCache->SPicData.pDecMb[2]; + int16_t* pScaledTcoeff = pMbCache->pCoeffLevel; + const int32_t kiDecStrideLuma = pDq->pDecPic->iLineSize[0]; + const int32_t kiDecStrideChroma = pDq->pDecPic->iLineSize[1]; + PIDctFunc pfIdctFour4x4 = pCtx->pFuncList->pfIDctFourT4; + + WelsIDctT4RecOnMb (pDecY, kiDecStrideLuma, pDecY, kiDecStrideLuma, pScaledTcoeff, pfIdctFour4x4); + pfIdctFour4x4 (pDecU, kiDecStrideChroma, pDecU, kiDecStrideChroma, pScaledTcoeff + 256); + pfIdctFour4x4 (pDecV, kiDecStrideChroma, pDecV, kiDecStrideChroma, pScaledTcoeff + 320); + } +} + +void UpdateQpForOverflow (SMB* pCurMb, uint8_t kuiChromaQpIndexOffset) { + pCurMb->uiLumaQp += DELTA_QP; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + kuiChromaQpIndexOffset)]; +} +// for intra non-dynamic pSlice +//encapsulate two kinds of reconstruction: +//first. store base or highest Dependency Layer with only one quality (without CS RS reconstruction) +//second. lower than highest Dependency Layer, and for every Dependency Layer with one quality layer(single layer) +int32_t WelsISliceMdEnc (sWelsEncCtx* pEncCtx, SSlice* pSlice) { //pMd + encoding + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SSliceHeaderExt* pSliceHdExt = &pSlice->sSliceHeaderExt; + SMB* pMbList = pCurLayer->sMbDataP; + SMB* pCurMb = NULL; + const int32_t kiSliceFirstMbXY = pSliceHdExt->sSliceHeader.iFirstMbInSlice; + int32_t iNextMbIdx = kiSliceFirstMbXY; + const int32_t kiTotalNumMb = pCurLayer->iMbWidth * pCurLayer->iMbHeight; + int32_t iCurMbIdx = 0, iNumMbCoded = 0; + const int32_t kiSliceIdx = pSlice->iSliceIdx; + const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + + SWelsMD sMd; + int32_t iEncReturn = ENC_RETURN_SUCCESS; + SDynamicSlicingStack sDss; + if (pEncCtx->pSvcParam->iEntropyCodingModeFlag) { + WelsInitSliceCabac (pEncCtx, pSlice); + sDss.pRestoreBuffer = NULL; + sDss.iStartPos = sDss.iCurrentPos = 0; + } + for (; ;) { + if (!pEncCtx->pSvcParam->iEntropyCodingModeFlag) + pEncCtx->pFuncList->pfStashMBStatus (&sDss, pSlice, 0); + iCurMbIdx = iNextMbIdx; + pCurMb = &pMbList[ iCurMbIdx ]; + + pEncCtx->pFuncList->pfRc.pfWelsRcMbInit (pEncCtx, pCurMb, pSlice); + WelsMdIntraInit (pEncCtx, pCurMb, pMbCache, kiSliceFirstMbXY); + +TRY_REENCODING: + sMd.iLambda = g_kiQpCostTable[pCurMb->uiLumaQp]; + WelsMdIntraMb (pEncCtx, &sMd, pCurMb, pMbCache); + UpdateNonZeroCountCache (pCurMb, pMbCache); + + + iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb); + if (!pEncCtx->pSvcParam->iEntropyCodingModeFlag) { + if ((iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) && (pCurMb->uiLumaQp < 50)) { + pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice); + UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset); + goto TRY_REENCODING; + } + } + if (ENC_RETURN_SUCCESS != iEncReturn) + return iEncReturn; + + pCurMb->uiSliceIdc = kiSliceIdx; + +#if defined(MB_TYPES_CHECK) + WelsCountMbType (pEncCtx->sPerInfo.iMbCount, I_SLICE, pCurMb); +#endif//MB_TYPES_CHECK + + pEncCtx->pFuncList->pfMdBackgroundInfoUpdate (pCurLayer, pCurMb, pMbCache->bCollocatedPredFlag, I_SLICE); + pEncCtx->pFuncList->pfRc.pfWelsRcMbInfoUpdate (pEncCtx, pCurMb, sMd.iCostLuma, pSlice); + + ++iNumMbCoded; + iNextMbIdx = WelsGetNextMbOfSlice (pCurLayer, iCurMbIdx); + if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbCoded >= kiTotalNumMb) { + break; + } + } + + return ENC_RETURN_SUCCESS; +} + +// Only for intra dynamic slicing +int32_t WelsISliceMdEncDynamic (sWelsEncCtx* pEncCtx, SSlice* pSlice) { //pMd + encoding + SBitStringAux* pBs = pSlice->pSliceBsa; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SSliceCtx* pSliceCtx = &pCurLayer->sSliceEncCtx; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SSliceHeaderExt* pSliceHdExt = &pSlice->sSliceHeaderExt; + SMB* pMbList = pCurLayer->sMbDataP; + SMB* pCurMb = NULL; + const int32_t kiSliceFirstMbXY = pSliceHdExt->sSliceHeader.iFirstMbInSlice; + int32_t iNextMbIdx = kiSliceFirstMbXY; + const int32_t kiTotalNumMb = pCurLayer->iMbWidth * pCurLayer->iMbHeight; + int32_t iCurMbIdx = 0, iNumMbCoded = 0; + const int32_t kiSliceIdx = pSlice->iSliceIdx; + const int32_t kiPartitionId = (kiSliceIdx % pEncCtx->iActiveThreadsNum); + const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + int32_t iEncReturn = ENC_RETURN_SUCCESS; + + SWelsMD sMd; + SDynamicSlicingStack sDss; + if (pEncCtx->pSvcParam->iEntropyCodingModeFlag) { + WelsInitSliceCabac (pEncCtx, pSlice); + sDss.pRestoreBuffer = pEncCtx->pDynamicBsBuffer[kiPartitionId]; + sDss.iStartPos = sDss.iCurrentPos = 0; + } else { + sDss.iStartPos = BsGetBitsPos (pBs); + } + for (; ;) { + iCurMbIdx = iNextMbIdx; + pCurMb = &pMbList[ iCurMbIdx ]; + + pEncCtx->pFuncList->pfStashMBStatus (&sDss, pSlice, 0); + pEncCtx->pFuncList->pfRc.pfWelsRcMbInit (pEncCtx, pCurMb, pSlice); + // if already reaches the largest number of slices, set QPs to the upper bound + if (pSlice->bDynamicSlicingSliceSizeCtrlFlag) { + pCurMb->uiLumaQp = pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId].iMaxQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + kuiChromaQpIndexOffset)]; + } + WelsMdIntraInit (pEncCtx, pCurMb, pMbCache, kiSliceFirstMbXY); + +TRY_REENCODING: + sMd.iLambda = g_kiQpCostTable[pCurMb->uiLumaQp]; + WelsMdIntraMb (pEncCtx, &sMd, pCurMb, pMbCache); + UpdateNonZeroCountCache (pCurMb, pMbCache); + + iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb); + if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) { + pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice); + UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset); + goto TRY_REENCODING; + } + if (ENC_RETURN_SUCCESS != iEncReturn) + return iEncReturn; + + sDss.iCurrentPos = pEncCtx->pFuncList->pfGetBsPosition (pSlice); + + if (DynSlcJudgeSliceBoundaryStepBack (pEncCtx, pSlice, pSliceCtx, pCurMb, &sDss)) { //islice + pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice); + pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx - + 1; // update LastCodedMbIdxOfPartition, need to -1 due to stepping back + ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId]; + + break; + } + + + pCurMb->uiSliceIdc = kiSliceIdx; + +#if defined(MB_TYPES_CHECK) + WelsCountMbType (pEncCtx->sPerInfo.iMbCount, I_SLICE, pCurMb); +#endif//MB_TYPES_CHECK + + pEncCtx->pFuncList->pfRc.pfWelsRcMbInfoUpdate (pEncCtx, pCurMb, sMd.iCostLuma, pSlice); + + ++iNumMbCoded; + + iNextMbIdx = WelsGetNextMbOfSlice (pCurLayer, iCurMbIdx); + //whether all of MB in current pSlice encoded or not + if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbCoded >= kiTotalNumMb) { + pSlice->iCountMbNumInSlice = iCurMbIdx - pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId]; + pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx; + ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId]; + + break; + } + } + return iEncReturn; +} + +//encapsulate two kinds of reconstruction: +// first. store base or highest Dependency Layer with only one quality (without CS RS reconstruction) +// second. lower than highest Dependency Layer, and for every Dependency Layer with one quality layer(single layer) +int32_t WelsPSliceMdEnc (sWelsEncCtx* pEncCtx, SSlice* pSlice, const bool kbIsHighestDlayerFlag) { //pMd + encoding + const SSliceHeaderExt* kpShExt = &pSlice->sSliceHeaderExt; + const SSliceHeader* kpSh = &kpShExt->sSliceHeader; + const int32_t kiSliceFirstMbXY = kpSh->iFirstMbInSlice; + SWelsMD sMd; + + sMd.uiRef = kpSh->uiRefIndex; + sMd.bMdUsingSad = (pEncCtx->pSvcParam->iComplexityMode == LOW_COMPLEXITY); + if (!pEncCtx->pCurDqLayer->bBaseLayerAvailableFlag || !kbIsHighestDlayerFlag) + memset (&sMd.sMe, 0, sizeof (sMd.sMe)); + + //pMb loop + return WelsMdInterMbLoop (pEncCtx, pSlice, &sMd, kiSliceFirstMbXY); +} + +int32_t WelsPSliceMdEncDynamic (sWelsEncCtx* pEncCtx, SSlice* pSlice, const bool kbIsHighestDlayerFlag) { + const SSliceHeaderExt* kpShExt = &pSlice->sSliceHeaderExt; + const SSliceHeader* kpSh = &kpShExt->sSliceHeader; + const int32_t kiSliceFirstMbXY = kpSh->iFirstMbInSlice; + SWelsMD sMd; + + sMd.uiRef = kpSh->uiRefIndex; + sMd.bMdUsingSad = (pEncCtx->pSvcParam->iComplexityMode == LOW_COMPLEXITY); + if (!pEncCtx->pCurDqLayer->bBaseLayerAvailableFlag || !kbIsHighestDlayerFlag) + memset (&sMd.sMe, 0, sizeof (sMd.sMe)); + + //mb loop + return WelsMdInterMbLoopOverDynamicSlice (pEncCtx, pSlice, &sMd, kiSliceFirstMbXY); +} + +int32_t WelsCodePSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice) { + //pSlice-level init should be outside and before this function + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + + const bool kbBaseAvail = pCurLayer->bBaseLayerAvailableFlag; + const bool kbHighestSpatial = pEncCtx->pSvcParam->iSpatialLayerNum == + (pCurLayer->sLayerInfo.sNalHeaderExt.uiDependencyId + 1); + + //MD switch + if (kbBaseAvail && kbHighestSpatial) { + //initial pMd pointer + pEncCtx->pFuncList->pfInterMd = WelsMdInterMbEnhancelayer; + } else { + //initial pMd pointer + pEncCtx->pFuncList->pfInterMd = WelsMdInterMb; + } + return WelsPSliceMdEnc (pEncCtx, pSlice, kbHighestSpatial); +} + +int32_t WelsCodePOverDynamicSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice) { + //pSlice-level init should be outside and before this function + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + + const bool kbBaseAvail = pCurLayer->bBaseLayerAvailableFlag; + const bool kbHighestSpatial = pEncCtx->pSvcParam->iSpatialLayerNum == + (pCurLayer->sLayerInfo.sNalHeaderExt.uiDependencyId + 1); + + //MD switch + if (kbBaseAvail && kbHighestSpatial) { + //initial pMd pointer + pEncCtx->pFuncList->pfInterMd = WelsMdInterMbEnhancelayer; + } else { + //initial pMd pointer + pEncCtx->pFuncList->pfInterMd = WelsMdInterMb; + } + return WelsPSliceMdEncDynamic (pEncCtx, pSlice, kbHighestSpatial); +} + +// 1st index: 0: for P pSlice; 1: for I pSlice; +// 2nd index: 0: for non-dynamic pSlice; 1: for dynamic I pSlice; +static const PWelsCodingSliceFunc g_pWelsSliceCoding[2][2] = { + { WelsCodePSlice, WelsCodePOverDynamicSlice }, // P SSlice + { WelsISliceMdEnc, WelsISliceMdEncDynamic } // I SSlice +}; +static const PWelsSliceHeaderWriteFunc g_pWelsWriteSliceHeader[2] = { // 0: for base; 1: for ext; + WelsSliceHeaderWrite, + WelsSliceHeaderExtWrite +}; + +//Allocate slice's MB cache buffer +int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa) { + pMbCache->pMemPredMb = (uint8_t*)pMa->WelsMallocz (2 * 256 * sizeof (uint8_t), "pMbCache->pMemPredMb"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredMb)); + + pMbCache->pCoeffLevel = (int16_t*)pMa->WelsMallocz (MB_COEFF_LIST_SIZE * sizeof (int16_t), "pMbCache->pCoeffLevel"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pCoeffLevel)); + pMbCache->pSkipMb = (uint8_t*)pMa->WelsMallocz (384 * sizeof (uint8_t), "pMbCache->pSkipMb"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pSkipMb)); + pMbCache->pMemPredBlk4 = (uint8_t*)pMa->WelsMallocz (2 * 16 * sizeof (uint8_t), "pMbCache->pMemPredBlk4"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredBlk4)); + pMbCache->pBufferInterPredMe = (uint8_t*)pMa->WelsMallocz (4 * 640 * sizeof (uint8_t), "pMbCache->pBufferInterPredMe"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pBufferInterPredMe)); + pMbCache->pPrevIntra4x4PredModeFlag = (bool*)pMa->WelsMallocz (16 * sizeof (bool), + "pMbCache->pPrevIntra4x4PredModeFlag"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pPrevIntra4x4PredModeFlag)); + pMbCache->pRemIntra4x4PredModeFlag = (int8_t*)pMa->WelsMallocz (16 * sizeof (int8_t), + "pMbCache->pRemIntra4x4PredModeFlag"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pRemIntra4x4PredModeFlag)); + pMbCache->pDct = (SDCTCoeff*)pMa->WelsMallocz (sizeof (SDCTCoeff), "pMbCache->pDct"); + WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pDct)); + + return 0; +} + +// Free slice's MB cache buffer +void FreeMbCache (SMbCache* pMbCache, CMemoryAlign* pMa) { + if (NULL != pMbCache->pCoeffLevel) { + pMa->WelsFree (pMbCache->pCoeffLevel, "pMbCache->pCoeffLevel"); + pMbCache->pCoeffLevel = NULL; + } + if (NULL != pMbCache->pMemPredMb) { + pMa->WelsFree (pMbCache->pMemPredMb, "pMbCache->pMemPredMb"); + pMbCache->pMemPredMb = NULL; + } + if (NULL != pMbCache->pSkipMb) { + pMa->WelsFree (pMbCache->pSkipMb, "pMbCache->pSkipMb"); + pMbCache->pSkipMb = NULL; + } + if (NULL != pMbCache->pMemPredBlk4) { + pMa->WelsFree (pMbCache->pMemPredBlk4, "pMbCache->pMemPredBlk4"); + pMbCache->pMemPredBlk4 = NULL; + } + if (NULL != pMbCache->pBufferInterPredMe) { + pMa->WelsFree (pMbCache->pBufferInterPredMe, "pMbCache->pBufferInterPredMe"); + pMbCache->pBufferInterPredMe = NULL; + } + if (NULL != pMbCache->pPrevIntra4x4PredModeFlag) { + pMa->WelsFree (pMbCache->pPrevIntra4x4PredModeFlag, "pMbCache->pPrevIntra4x4PredModeFlag"); + pMbCache->pPrevIntra4x4PredModeFlag = NULL; + } + if (NULL != pMbCache->pRemIntra4x4PredModeFlag) { + pMa->WelsFree (pMbCache->pRemIntra4x4PredModeFlag, "pMbCache->pRemIntra4x4PredModeFlag"); + pMbCache->pRemIntra4x4PredModeFlag = NULL; + } + if (NULL != pMbCache->pDct) { + pMa->WelsFree (pMbCache->pDct, "pMbCache->pDct"); + pMbCache->pDct = NULL; + } +} + +//Initialize slice's boundary info) +int32_t InitSliceBoundaryInfo (SDqLayer* pCurLayer, + SSliceArgument* pSliceArgument, + const int32_t kiSliceNumInFrame) { + const int32_t* kpSlicesAssignList = (int32_t*) & (pSliceArgument->uiSliceMbNum[0]); + const int32_t kiMBWidth = pCurLayer->iMbWidth; + const int32_t kiMBHeight = pCurLayer->iMbHeight; + const int32_t kiCountNumMbInFrame = kiMBWidth * kiMBHeight; + int32_t iSliceIdx = 0; + int32_t iFirstMBInSlice = 0; + int32_t iMbNumInSlice = 0; + + for (; iSliceIdx < kiSliceNumInFrame; iSliceIdx++) { + if (SM_SINGLE_SLICE == pSliceArgument->uiSliceMode) { + iFirstMBInSlice = 0; + iMbNumInSlice = kiCountNumMbInFrame; + + } else if ((SM_RASTER_SLICE == pSliceArgument->uiSliceMode) && (0 == pSliceArgument->uiSliceMbNum[0])) { + iFirstMBInSlice = iSliceIdx * kiMBWidth; + iMbNumInSlice = kiMBWidth; + } else if (SM_RASTER_SLICE == pSliceArgument->uiSliceMode || + SM_FIXEDSLCNUM_SLICE == pSliceArgument->uiSliceMode) { + int32_t iMbIdx = 0; + for (int i = 0; i < iSliceIdx; i++) { + iMbIdx += kpSlicesAssignList[i]; + } + + if (iMbIdx >= kiCountNumMbInFrame) { + return ENC_RETURN_UNEXPECTED; + } + + iFirstMBInSlice = iMbIdx; + iMbNumInSlice = kpSlicesAssignList[iSliceIdx]; + + } else if (SM_SIZELIMITED_SLICE == pSliceArgument->uiSliceMode) { + iFirstMBInSlice = 0; + iMbNumInSlice = kiCountNumMbInFrame; + + } else { // any else uiSliceMode? + assert (0); + } + + pCurLayer->pCountMbNumInSlice[iSliceIdx] = iMbNumInSlice; + pCurLayer->pFirstMbIdxOfSlice[iSliceIdx] = iFirstMBInSlice; + } + + return ENC_RETURN_SUCCESS; +} + +int32_t SetSliceBoundaryInfo (SDqLayer* pCurLayer, SSlice* pSlice, const int32_t kiSliceIdx) { + if (NULL == pCurLayer || NULL == pSlice || + NULL == pCurLayer->pFirstMbIdxOfSlice || + NULL == pCurLayer->pCountMbNumInSlice) { + + return ENC_RETURN_UNEXPECTED; + } + + pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = pCurLayer->pFirstMbIdxOfSlice[kiSliceIdx]; + pSlice->iCountMbNumInSlice = pCurLayer->pCountMbNumInSlice[kiSliceIdx]; + + return ENC_RETURN_SUCCESS; +} + +//Allocate slice's MB info buffer +int32_t AllocateSliceMBBuffer (SSlice* pSlice, CMemoryAlign* pMa) { + if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) { + return ENC_RETURN_MEMALLOCERR; + } + + return ENC_RETURN_SUCCESS; +} + +// Initialize slice bs buffer info +int32_t InitSliceBsBuffer (SSlice* pSlice, + SBitStringAux* pBsWrite, + bool bIndependenceBsBuffer, + const int32_t iMaxSliceBufferSize, + CMemoryAlign* pMa) { + pSlice->sSliceBs.uiSize = iMaxSliceBufferSize; + pSlice->sSliceBs.uiBsPos = 0; + + if (bIndependenceBsBuffer) { + pSlice->pSliceBsa = &pSlice->sSliceBs.sBsWrite; + pSlice->sSliceBs.pBs = (uint8_t*)pMa->WelsMallocz (iMaxSliceBufferSize, "sSliceBs.pBs"); + if (NULL == pSlice->sSliceBs.pBs) { + return ENC_RETURN_MEMALLOCERR; + } + } else { + pSlice->pSliceBsa = pBsWrite; + pSlice->sSliceBs.pBs = NULL; + } + return ENC_RETURN_SUCCESS; +} + +//free slice bs buffer +void FreeSliceBuffer (SSlice*& pSliceList, const int32_t kiMaxSliceNum, CMemoryAlign* pMa, const char* kpTag) { + if (NULL != pSliceList) { + int32_t iSliceIdx = 0; + while (iSliceIdx < kiMaxSliceNum) { + SSlice* pSlice = &pSliceList[iSliceIdx]; + FreeMbCache (&pSlice->sMbCacheInfo, pMa); + + //slice bs buffer + if (NULL != pSlice->sSliceBs.pBs) { + pMa->WelsFree (pSlice->sSliceBs.pBs, "sSliceBs.pBs"); + pSlice->sSliceBs.pBs = NULL; + } + ++ iSliceIdx; + } + pMa->WelsFree (pSliceList, kpTag); + pSliceList = NULL; + } +} + +int32_t InitSliceList (SSlice*& pSliceList, + SBitStringAux* pBsWrite, + const int32_t kiMaxSliceNum, + const int32_t kiMaxSliceBufferSize, + const bool bIndependenceBsBuffer, + CMemoryAlign* pMa) { + int32_t iSliceIdx = 0; + int32_t iRet = 0; + + if (kiMaxSliceBufferSize <= 0) { + return ENC_RETURN_UNEXPECTED; + } + + while (iSliceIdx < kiMaxSliceNum) { + SSlice* pSlice = pSliceList + iSliceIdx; + if (NULL == pSlice) { + return ENC_RETURN_MEMALLOCERR; + } + + pSlice->iSliceIdx = iSliceIdx; + pSlice->uiBufferIdx = 0; + pSlice->iCountMbNumInSlice = 0; + pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = 0; + + iRet = InitSliceBsBuffer (pSlice, + pBsWrite, + bIndependenceBsBuffer, + kiMaxSliceBufferSize, + pMa); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + iRet = AllocateSliceMBBuffer (pSlice, pMa); + + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + ++ iSliceIdx; + } + return ENC_RETURN_SUCCESS; +} + +int32_t InitAllSlicesInThread (sWelsEncCtx* pCtx) { + SDqLayer* pCurDqLayer = pCtx->pCurDqLayer; + int32_t iSliceIdx = 0; + int32_t iSlcBuffIdx = 0; + + for (; iSliceIdx < pCurDqLayer->iMaxSliceNum; iSliceIdx++) { + if (NULL == pCurDqLayer->ppSliceInLayer[iSliceIdx]) { + return ENC_RETURN_UNEXPECTED; + } + + pCurDqLayer->ppSliceInLayer[iSliceIdx]->iSliceIdx = -1; + } + + for (; iSlcBuffIdx < pCtx->iActiveThreadsNum; iSlcBuffIdx++) { + pCurDqLayer->sSliceBufferInfo[iSlcBuffIdx].iCodedSliceNum = 0; + } + + return ENC_RETURN_SUCCESS; +} + +int32_t InitOneSliceInThread (sWelsEncCtx* pCtx, + SSlice*& pSlice, + const int32_t kiSlcBuffIdx, + const int32_t kiDlayerIdx, + const int32_t kiSliceIdx) { + + if (pCtx->pCurDqLayer->bThreadSlcBufferFlag) { + const int32_t kiCodedNumInThread = pCtx->pCurDqLayer->sSliceBufferInfo[kiSlcBuffIdx].iCodedSliceNum; + assert (kiCodedNumInThread <= pCtx->pCurDqLayer->sSliceBufferInfo[kiSlcBuffIdx].iMaxSliceNum - 1); + pSlice = &pCtx->pCurDqLayer->sSliceBufferInfo [kiSlcBuffIdx].pSliceBuffer[kiCodedNumInThread]; + } else { + pSlice = &pCtx->pCurDqLayer->sSliceBufferInfo [0].pSliceBuffer[kiSliceIdx]; + } + pSlice->iSliceIdx = kiSliceIdx; + pSlice->uiBufferIdx = kiSlcBuffIdx; + + // Initialize slice bs buffer info + pSlice->sSliceBs.uiBsPos = 0; + pSlice->sSliceBs.iNalIndex = 0; + pSlice->sSliceBs.pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiSlcBuffIdx]; + + return ENC_RETURN_SUCCESS; +} + +int32_t InitSliceThreadInfo (sWelsEncCtx* pCtx, + SDqLayer* pDqLayer, + const int32_t kiDlayerIndex, + CMemoryAlign* pMa) { + int32_t iThreadNum = pCtx->pSvcParam->iMultipleThreadIdc; + int32_t iMaxSliceNum = 0; + int32_t iSlcBufferNum = 0; + int32_t iIdx = 0; + int32_t iRet = 0; + + assert (iThreadNum > 0); + + //for fixed slice num case, no need to reallocate, so one slice buffer for all thread + if (pDqLayer->bThreadSlcBufferFlag) { + iMaxSliceNum = pDqLayer->iMaxSliceNum / iThreadNum + 1; + iSlcBufferNum = iThreadNum; + } else { + iMaxSliceNum = pDqLayer->iMaxSliceNum; + iSlcBufferNum = 1; + } + + while (iIdx < iSlcBufferNum) { + pDqLayer->sSliceBufferInfo[iIdx].iMaxSliceNum = iMaxSliceNum; + pDqLayer->sSliceBufferInfo[iIdx].iCodedSliceNum = 0; + pDqLayer->sSliceBufferInfo[iIdx].pSliceBuffer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, + "pSliceBuffer"); + if (NULL == pDqLayer->sSliceBufferInfo[iIdx].pSliceBuffer) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "CWelsH264SVCEncoder::InitSliceThreadInfo: pSliceThreadInfo->pSliceBuffer[iIdx] is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + iRet = InitSliceList (pDqLayer->sSliceBufferInfo[iIdx].pSliceBuffer, + &pCtx->pOut->sBsWrite, + iMaxSliceNum, + pCtx->iSliceBufferSize[kiDlayerIndex], + pDqLayer->bSliceBsBufferFlag, + pMa); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + iIdx++; + } + + for (; iIdx < MAX_THREADS_NUM; iIdx++) { + pDqLayer->sSliceBufferInfo[iIdx].iMaxSliceNum = 0; + pDqLayer->sSliceBufferInfo[iIdx].iCodedSliceNum = 0; + pDqLayer->sSliceBufferInfo[iIdx].pSliceBuffer = NULL; + } + + return ENC_RETURN_SUCCESS; +} + +int32_t InitSliceInLayer (sWelsEncCtx* pCtx, + SDqLayer* pDqLayer, + const int32_t kiDlayerIndex, + CMemoryAlign* pMa) { + int32_t iRet = 0; + int32_t iSliceIdx = 0; + int32_t iSlcBuffIdx = 0; + int32_t iStartIdx = 0; + int32_t iMaxSliceNum = pDqLayer->iMaxSliceNum; + SSliceArgument* pSliceArgument = & pCtx->pSvcParam->sSpatialLayers[kiDlayerIndex].sSliceArgument; + + //SM_SINGLE_SLICE mode using single-thread bs writer pOut->sBsWrite + //even though multi-thread is on for other layers + pDqLayer->bSliceBsBufferFlag = (pCtx->pSvcParam->iMultipleThreadIdc > 1 && + SM_SINGLE_SLICE != pSliceArgument->uiSliceMode) ? true : false; + + pDqLayer->bThreadSlcBufferFlag = (pCtx->pSvcParam->iMultipleThreadIdc > 1 && + SM_SIZELIMITED_SLICE == pSliceArgument->uiSliceMode) ? true : false; + + iRet = InitSliceThreadInfo (pCtx, + pDqLayer, + kiDlayerIndex, + pMa); + if (ENC_RETURN_SUCCESS != iRet) { + return ENC_RETURN_MEMALLOCERR; + } + + pDqLayer->iMaxSliceNum = 0; + for (iSlcBuffIdx = 0; iSlcBuffIdx < pCtx->iActiveThreadsNum; iSlcBuffIdx++) { + pDqLayer->iMaxSliceNum += pDqLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; + } + + pDqLayer->ppSliceInLayer = (SSlice**)pMa->WelsMallocz (sizeof (SSlice*) * pDqLayer->iMaxSliceNum, "ppSliceInLayer"); + if (NULL == pDqLayer->ppSliceInLayer) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::InitSliceInLayer() pDqLayer->ppSliceInLayer is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + + pDqLayer->pFirstMbIdxOfSlice = (int32_t*)pMa->WelsMallocz (sizeof (int32_t*) * pDqLayer->iMaxSliceNum, + "pFirstMbIdxOfSlice"); + if (NULL == pDqLayer->pFirstMbIdxOfSlice) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "CWelsH264SVCEncoder::InitSliceInLayer() pDqLayer->pFirstMbIdxOfSlice is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + + pDqLayer->pCountMbNumInSlice = (int32_t*)pMa->WelsMallocz (sizeof (int32_t*) * pDqLayer->iMaxSliceNum, + "pCountMbNumInSlice"); + if (NULL == pDqLayer->pCountMbNumInSlice) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "CWelsH264SVCEncoder::InitSliceInLayer() pDqLayer->pCountMbNumInSlice is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + + iRet = InitSliceBoundaryInfo (pDqLayer, pSliceArgument, iMaxSliceNum); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + iStartIdx = 0; + for (iSlcBuffIdx = 0; iSlcBuffIdx < pCtx->iActiveThreadsNum; iSlcBuffIdx++) { + for (iSliceIdx = 0; iSliceIdx < pDqLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; iSliceIdx++) { + pDqLayer->ppSliceInLayer[iStartIdx + iSliceIdx] = pDqLayer->sSliceBufferInfo[iSlcBuffIdx].pSliceBuffer + iSliceIdx; + } + + iStartIdx += pDqLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; + } + + return ENC_RETURN_SUCCESS; +} + +void InitSliceHeadWithBase (SSlice* pSlice, SSlice* pBaseSlice) { + if (NULL == pSlice || NULL == pBaseSlice) { + return; + } + + SSliceHeaderExt* pBaseSHExt = &pBaseSlice->sSliceHeaderExt; + SSliceHeaderExt* pSHExt = &pSlice->sSliceHeaderExt; + + pSlice->bSliceHeaderExtFlag = pBaseSlice->bSliceHeaderExtFlag; + pSHExt->sSliceHeader.iPpsId = pBaseSHExt->sSliceHeader.iPpsId; + pSHExt->sSliceHeader.pPps = pBaseSHExt->sSliceHeader.pPps; + pSHExt->sSliceHeader.iSpsId = pBaseSHExt->sSliceHeader.iSpsId; + pSHExt->sSliceHeader.pSps = pBaseSHExt->sSliceHeader.pSps; +} + +void InitSliceRefInfoWithBase (SSlice* pSlice, SSlice* pBaseSlice, const uint8_t kuiRefCount) { + if (NULL == pSlice || NULL == pBaseSlice) { + return; + } + + SSliceHeaderExt* pBaseSHExt = &pBaseSlice->sSliceHeaderExt; + SSliceHeaderExt* pSHExt = &pSlice->sSliceHeaderExt; + + pSHExt->sSliceHeader.uiRefCount = kuiRefCount; + memcpy (&pSHExt->sSliceHeader.sRefMarking, &pBaseSHExt->sSliceHeader.sRefMarking, sizeof (SRefPicMarking)); + memcpy (&pSHExt->sSliceHeader.sRefReordering, &pBaseSHExt->sSliceHeader.sRefReordering, + sizeof (SRefPicListReorderSyntax)); + +} + +static inline int32_t InitSliceRC (SSlice* pSlice, const int32_t kiGlobalQp) { + + if (NULL == pSlice || kiGlobalQp < 0) + return ENC_RETURN_INVALIDINPUT; + + pSlice->sSlicingOverRc.iComplexityIndexSlice = 0; + pSlice->sSlicingOverRc.iCalculatedQpSlice = kiGlobalQp; + pSlice->sSlicingOverRc.iTotalQpSlice = 0; + pSlice->sSlicingOverRc.iTotalMbSlice = 0; + pSlice->sSlicingOverRc.iTargetBitsSlice = 0; + pSlice->sSlicingOverRc.iFrameBitsSlice = 0; + pSlice->sSlicingOverRc.iGomBitsSlice = 0; + + return ENC_RETURN_SUCCESS; +} + +int32_t ReallocateSliceList (sWelsEncCtx* pCtx, + SSliceArgument* pSliceArgument, + SSlice*& pSliceList, + const int32_t kiMaxSliceNumOld, + const int32_t kiMaxSliceNumNew) { + CMemoryAlign* pMA = pCtx->pMemAlign; + SSlice* pBaseSlice = NULL; + SSlice* pNewSliceList = NULL; + SSlice* pSlice = NULL; + int32_t iSliceIdx = 0; + int32_t iRet = 0; + const int32_t kiCurDid = pCtx->uiDependencyId; + int32_t iMaxSliceBufferSize = (pCtx)->iSliceBufferSize[kiCurDid]; + + if (NULL == pSliceList || NULL == pSliceArgument) { + return ENC_RETURN_INVALIDINPUT; + } + + bool bIndependenceBsBuffer = (pCtx->pSvcParam->iMultipleThreadIdc > 1 && + SM_SINGLE_SLICE != pSliceArgument->uiSliceMode) ? true : false; + + pNewSliceList = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * kiMaxSliceNumNew, "pSliceBuffer"); + if (NULL == pNewSliceList) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ReallocateSliceList: pNewSliceList is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + + memcpy (pNewSliceList, pSliceList, sizeof (SSlice) * kiMaxSliceNumOld); + + //update Bs writer + for (iSliceIdx = 0; iSliceIdx < kiMaxSliceNumOld; iSliceIdx++) { + pSlice = pNewSliceList + iSliceIdx; + if (NULL == pSlice) { + FreeSliceBuffer (pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceBuffer"); + return ENC_RETURN_MEMALLOCERR; + } + + if (bIndependenceBsBuffer) { + pSlice->pSliceBsa = &pSlice->sSliceBs.sBsWrite; + } + } + + pBaseSlice = &pSliceList[0]; + if (NULL == pBaseSlice) { + FreeSliceBuffer (pNewSliceList, kiMaxSliceNumNew, pMA, "ReallocateSliceList()::InitSliceBsBuffer()"); + return ENC_RETURN_MEMALLOCERR; + } + + for (iSliceIdx = kiMaxSliceNumOld; iSliceIdx < kiMaxSliceNumNew; iSliceIdx++) { + pSlice = pNewSliceList + iSliceIdx; + if (NULL == pSlice) { + FreeSliceBuffer (pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceBuffer"); + return ENC_RETURN_MEMALLOCERR; + } + + pSlice->iSliceIdx = -1; + pSlice->uiBufferIdx = 0; + pSlice->iCountMbNumInSlice = 0; + pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = 0; + + iRet = InitSliceBsBuffer (pSlice, + & pCtx->pOut->sBsWrite, + bIndependenceBsBuffer, + iMaxSliceBufferSize, + pMA); + if (ENC_RETURN_SUCCESS != iRet) { + FreeSliceBuffer (pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceBuffer"); + return iRet; + } + + iRet = AllocateSliceMBBuffer (pSlice, pMA); + if (ENC_RETURN_SUCCESS != iRet) { + FreeSliceBuffer (pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceBuffer"); + return iRet; + } + + InitSliceHeadWithBase (pSlice, pBaseSlice); + InitSliceRefInfoWithBase (pSlice, pBaseSlice, pCtx->iNumRef0); + + iRet = InitSliceRC (pSlice, pCtx->iGlobalQp); + if (ENC_RETURN_SUCCESS != iRet) { + FreeSliceBuffer (pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceBuffer"); + return iRet; + } + } + + pMA->WelsFree (pSliceList, "pSliceBuffer"); + pSliceList = pNewSliceList; + + return ENC_RETURN_SUCCESS; +} + +int32_t CalculateNewSliceNum (sWelsEncCtx* pCtx, + SSlice* pLastCodedSlice, + const int32_t iMaxSliceNumOld, + int32_t& iMaxSliceNumNew) { + if (NULL == pCtx || NULL == pLastCodedSlice || 0 == iMaxSliceNumOld) { + return ENC_RETURN_INVALIDINPUT; + } + + if (1 == pCtx->iActiveThreadsNum) { + iMaxSliceNumNew = iMaxSliceNumOld * SLICE_NUM_EXPAND_COEF; + return ENC_RETURN_SUCCESS; + } + + int32_t iPartitionID = pLastCodedSlice->iSliceIdx % pCtx->iActiveThreadsNum; + int32_t iMBNumInPatition = pCtx->pCurDqLayer->EndMbIdxOfPartition[iPartitionID] + - pCtx->pCurDqLayer->FirstMbIdxOfPartition[iPartitionID] + 1; + int32_t iLeftMBNum = pCtx->pCurDqLayer->EndMbIdxOfPartition[iPartitionID] + - pCtx->pCurDqLayer->LastCodedMbIdxOfPartition[iPartitionID] + 1; + int32_t iIncreaseSlicNum = (iLeftMBNum * INT_MULTIPLY / iMBNumInPatition) * iMaxSliceNumOld; + + iIncreaseSlicNum = (0 == (iIncreaseSlicNum / INT_MULTIPLY)) ? 1 : (iIncreaseSlicNum / INT_MULTIPLY); + iIncreaseSlicNum = (iIncreaseSlicNum < iMaxSliceNumOld / 2) ? (iMaxSliceNumOld / 2) : iIncreaseSlicNum; + iMaxSliceNumNew = iMaxSliceNumOld + iIncreaseSlicNum; + + return ENC_RETURN_SUCCESS; +} + +int32_t ReallocateSliceInThread (sWelsEncCtx* pCtx, + SDqLayer* pDqLayer, + const int32_t kiDlayerIdx, + const int32_t KiSlcBuffIdx) { + int32_t iMaxSliceNum = pDqLayer->sSliceBufferInfo[KiSlcBuffIdx].iMaxSliceNum; + int32_t iCodedSliceNum = pDqLayer->sSliceBufferInfo[KiSlcBuffIdx].iCodedSliceNum; + int32_t iMaxSliceNumNew = 0; + int32_t iRet = 0; + SSlice* pLastCodedSlice = &pDqLayer->sSliceBufferInfo[KiSlcBuffIdx].pSliceBuffer [iCodedSliceNum - 1]; + SSliceArgument* pSliceArgument = & pCtx->pSvcParam->sSpatialLayers[kiDlayerIdx].sSliceArgument; + + iRet = CalculateNewSliceNum (pCtx, + pLastCodedSlice, + iMaxSliceNum, + iMaxSliceNumNew); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + iRet = ReallocateSliceList (pCtx, + pSliceArgument, + pDqLayer->sSliceBufferInfo[KiSlcBuffIdx].pSliceBuffer, + iMaxSliceNum, + iMaxSliceNumNew); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + pDqLayer->sSliceBufferInfo[KiSlcBuffIdx].iMaxSliceNum = iMaxSliceNumNew; + + return ENC_RETURN_SUCCESS; +} + +int32_t ExtendLayerBuffer (sWelsEncCtx* pCtx, + const int32_t kiMaxSliceNumOld, + const int32_t kiMaxSliceNumNew) { + CMemoryAlign* pMA = pCtx->pMemAlign; + SDqLayer* pCurLayer = pCtx->pCurDqLayer; + SSlice** ppSlice = NULL; + int32_t* pFirstMbIdxOfSlice = NULL; + int32_t* pCountMbNumInSlice = NULL; + + // update for ppsliceInlayer + ppSlice = (SSlice**)pMA->WelsMallocz (sizeof (SSlice*) * kiMaxSliceNumNew, "ppSliceInLayer"); + if (NULL == ppSlice) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ExtendLayerBuffer: ppSlice is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + pMA->WelsFree (pCurLayer->ppSliceInLayer, "ppSliceInLayer"); + pCurLayer->ppSliceInLayer = ppSlice; + + // update for pFirstMbIdxInSlice + pFirstMbIdxOfSlice = (int32_t*)pMA->WelsMallocz (sizeof (int32_t*) * kiMaxSliceNumNew, "pFirstMbIdxOfSlice"); + if (NULL == pFirstMbIdxOfSlice) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ExtendLayerBuffer: pFirstMbIdxOfSlice is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + memset (pFirstMbIdxOfSlice, 0, sizeof (int32_t) * kiMaxSliceNumNew); + memcpy (pFirstMbIdxOfSlice, pCurLayer->pFirstMbIdxOfSlice, sizeof (int32_t) * kiMaxSliceNumOld); + pMA->WelsFree (pCurLayer->pFirstMbIdxOfSlice, "pFirstMbIdxOfSlice"); + pCurLayer->pFirstMbIdxOfSlice = pFirstMbIdxOfSlice; + + // update for pCountMbNumInSlice + pCountMbNumInSlice = (int32_t*)pMA->WelsMallocz (sizeof (int32_t*) * kiMaxSliceNumNew, "pCountMbNumInSlice"); + if (NULL == pCountMbNumInSlice) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ExtendLayerBuffer: pCountMbNumInSlice is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + memset (pCountMbNumInSlice, 0, sizeof (int32_t) * kiMaxSliceNumNew); + memcpy (pCountMbNumInSlice, pCurLayer->pCountMbNumInSlice, sizeof (int32_t) * kiMaxSliceNumOld); + pMA->WelsFree (pCurLayer->pCountMbNumInSlice, "pCountMbNumInSlice"); + pCurLayer->pCountMbNumInSlice = pCountMbNumInSlice; + + return ENC_RETURN_SUCCESS; +} + +int32_t ReallocSliceBuffer (sWelsEncCtx* pCtx) { + + SDqLayer* pCurLayer = pCtx->pCurDqLayer; + int32_t iMaxSliceNumOld = pCurLayer->sSliceBufferInfo[0].iMaxSliceNum; + int32_t iMaxSliceNumNew = 0; + int32_t iRet = 0; + int32_t iSliceIdx = 0; + int32_t iSlcBuffIdx = 0; + int32_t iStartIdx = 0; + const int32_t kiCurDid = pCtx->uiDependencyId; + SSlice* pLastCodedSlice = pCurLayer->sSliceBufferInfo[0].pSliceBuffer + (iMaxSliceNumOld - 1); + SSliceArgument* pSliceArgument = & pCtx->pSvcParam->sSpatialLayers[kiCurDid].sSliceArgument; + iRet = CalculateNewSliceNum (pCtx, + pLastCodedSlice, + iMaxSliceNumOld, + iMaxSliceNumNew); + + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + iRet = ReallocateSliceList (pCtx, + pSliceArgument, + pCurLayer->sSliceBufferInfo[0].pSliceBuffer, + iMaxSliceNumOld, + iMaxSliceNumNew); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + pCurLayer->sSliceBufferInfo[0].iMaxSliceNum = iMaxSliceNumNew; + + iMaxSliceNumNew = 0; + for (iSlcBuffIdx = 0; iSlcBuffIdx < pCtx->iActiveThreadsNum; iSlcBuffIdx++) { + iMaxSliceNumNew += pCurLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; + } + + iRet = ExtendLayerBuffer (pCtx, pCurLayer->iMaxSliceNum, iMaxSliceNumNew); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + + for (iSlcBuffIdx = 0; iSlcBuffIdx < pCtx->iActiveThreadsNum; iSlcBuffIdx++) { + for (iSliceIdx = 0; iSliceIdx < pCurLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; iSliceIdx++) { + pCurLayer->ppSliceInLayer[iStartIdx + iSliceIdx] = pCurLayer->sSliceBufferInfo[iSlcBuffIdx].pSliceBuffer + iSliceIdx; + } + iStartIdx += pCurLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; + } + + pCurLayer->iMaxSliceNum = iMaxSliceNumNew; + + return ENC_RETURN_SUCCESS; +} + +static inline int32_t CheckAllSliceBuffer (SDqLayer* pCurLayer, const int32_t kiCodedSliceNum) { + int32_t iSliceIdx = 0; + for (; iSliceIdx < kiCodedSliceNum ; iSliceIdx ++) { + if (NULL == pCurLayer->ppSliceInLayer[iSliceIdx]) { + return ENC_RETURN_UNEXPECTED; + } + + if (iSliceIdx != pCurLayer->ppSliceInLayer[iSliceIdx]->iSliceIdx) { + return ENC_RETURN_UNEXPECTED; + } + } + + return ENC_RETURN_SUCCESS; +} + +int32_t ReOrderSliceInLayer (sWelsEncCtx* pCtx, + const SliceModeEnum kuiSliceMode, + const int32_t kiThreadNum) { + SDqLayer* pCurLayer = pCtx->pCurDqLayer; + SSlice* pSliceBuffer = NULL; + int32_t iSlcBuffIdx = 0; + int32_t iPartitionIdx = 0; + int32_t iPartitionID = 0; + int32_t iSliceIdx = 0; + int32_t iSliceNumInThread = 0; + int32_t iEncodeSliceNum = 0; + int32_t iActualSliceIdx = 0; + int32_t iNonUsedBufferNum = 0; + int32_t iUsedSliceNum = 0; + + int32_t iPartitionNum = 0; + int32_t aiPartitionOffset[MAX_THREADS_NUM] = {0}; + + //for non-dynamic slice mode, iPartitionNum = 1, iPartitionOffset = 0 + iPartitionNum = (SM_SIZELIMITED_SLICE == kuiSliceMode) ? kiThreadNum : 1; + for (iPartitionIdx = 0; iPartitionIdx < iPartitionNum; iPartitionIdx++) { + aiPartitionOffset[iPartitionIdx] = iEncodeSliceNum; + if (SM_SIZELIMITED_SLICE == kuiSliceMode) { + iEncodeSliceNum += pCurLayer->NumSliceCodedOfPartition[iPartitionIdx]; + } else { + iEncodeSliceNum = pCurLayer->sSliceEncCtx.iSliceNumInFrame; + } + } + + if (iEncodeSliceNum != pCurLayer->sSliceEncCtx.iSliceNumInFrame) { + return ENC_RETURN_UNEXPECTED; + } + + //before encode all slices in layer, slices' index are init with -1 + //pSliceBuffer->iSliceIdx will be set to actual slice index when encode one slice + for (iSlcBuffIdx = 0; iSlcBuffIdx < kiThreadNum; iSlcBuffIdx++) { + iSliceNumInThread = pCurLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; + + for (iSliceIdx = 0; iSliceIdx < iSliceNumInThread; iSliceIdx++) { + pSliceBuffer = pCurLayer->sSliceBufferInfo[iSlcBuffIdx].pSliceBuffer + iSliceIdx; + if (NULL == pSliceBuffer) { + return ENC_RETURN_UNEXPECTED; + } + + if (-1 != pSliceBuffer->iSliceIdx) { + iPartitionID = pSliceBuffer->iSliceIdx % iPartitionNum; + iActualSliceIdx = aiPartitionOffset[iPartitionID] + pSliceBuffer->iSliceIdx / iPartitionNum; + pSliceBuffer->iSliceIdx = iActualSliceIdx; + pCurLayer->ppSliceInLayer[iActualSliceIdx] = pSliceBuffer; + iUsedSliceNum ++; + } else { + pCurLayer->ppSliceInLayer[iEncodeSliceNum + iNonUsedBufferNum] = pSliceBuffer; + iNonUsedBufferNum ++; + } + } + } + + if (iUsedSliceNum != iEncodeSliceNum || + pCurLayer->iMaxSliceNum != (iNonUsedBufferNum + iUsedSliceNum)) { + return ENC_RETURN_UNEXPECTED; + } + + if (ENC_RETURN_SUCCESS != CheckAllSliceBuffer (pCurLayer, iEncodeSliceNum)) { + return ENC_RETURN_UNEXPECTED; + } + + return ENC_RETURN_SUCCESS; +} + +int32_t GetCurLayerNalCount (const SDqLayer* pCurDq, const int32_t kiCodedSliceNum) { + int32_t iTotalNalCount = 0; + int32_t iSliceIdx = 0; + SWelsSliceBs* pSliceBs = NULL; + for (; iSliceIdx < kiCodedSliceNum; iSliceIdx++) { + pSliceBs = &pCurDq->ppSliceInLayer[iSliceIdx]->sSliceBs; + if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) { + iTotalNalCount += pSliceBs->iNalIndex; + } + } + + return iTotalNalCount; +} + +int32_t GetTotalCodedNalCount (SFrameBSInfo* pFbi) { + int32_t iTotalCodedNalCount = 0; + for (int32_t iNalIdx = 0; iNalIdx < MAX_LAYER_NUM_OF_FRAME; iNalIdx++) { + iTotalCodedNalCount += pFbi->sLayerInfo[iNalIdx].iNalCount; + } + + return iTotalCodedNalCount; +} + +int32_t FrameBsRealloc (sWelsEncCtx* pCtx, + SFrameBSInfo* pFrameBsInfo, + SLayerBSInfo* pLayerBsInfo, + const int32_t kiMaxSliceNumOld) { + CMemoryAlign* pMA = pCtx->pMemAlign; + int32_t iCountNals = pCtx->pOut->iCountNals; + iCountNals += kiMaxSliceNumOld * (pCtx->pSvcParam->iSpatialLayerNum + pCtx->bNeedPrefixNalFlag); + + SWelsNalRaw* pNalList = (SWelsNalRaw*)pMA->WelsMallocz (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList"); + if (NULL == pNalList) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::FrameBsRealloc: pNalList is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + memcpy (pNalList, pCtx->pOut->sNalList, sizeof (SWelsNalRaw) * pCtx->pOut->iCountNals); + pMA->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList"); + pCtx->pOut->sNalList = pNalList; + + int32_t* pNalLen = (int32_t*)pMA->WelsMallocz (iCountNals * sizeof (int32_t), "pOut->pNalLen"); + if (NULL == pNalLen) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::FrameBsRealloc: pNalLen is NULL"); + return ENC_RETURN_MEMALLOCERR; + } + memcpy (pNalLen, pCtx->pOut->pNalLen, sizeof (int32_t) * pCtx->pOut->iCountNals); + pMA->WelsFree (pCtx->pOut->pNalLen, "pOut->pNalLen"); + pCtx->pOut->pNalLen = pNalLen; + + pCtx->pOut->iCountNals = iCountNals; + SLayerBSInfo* pLBI1, *pLBI2; + pLBI1 = &pFrameBsInfo->sLayerInfo[0]; + pLBI1->pNalLengthInByte = pCtx->pOut->pNalLen; + while (pLBI1 != pLayerBsInfo) { + pLBI2 = pLBI1; + ++ pLBI1; + pLBI1->pNalLengthInByte = pLBI2->pNalLengthInByte + pLBI2->iNalCount; + } + + return ENC_RETURN_SUCCESS; +} + +int32_t SliceLayerInfoUpdate (sWelsEncCtx* pCtx, + SFrameBSInfo* pFrameBsInfo, + SLayerBSInfo* pLayerBsInfo, + const SliceModeEnum kuiSliceMode) { + int32_t iMaxSliceNum = 0; + int32_t iCodedSliceNum = 0; + int32_t iCodedNalCount = 0; + int32_t iRet = 0; + + for (int32_t iSlcBuffIdx = 0; iSlcBuffIdx < pCtx->iActiveThreadsNum; iSlcBuffIdx++) { + iMaxSliceNum += pCtx->pCurDqLayer->sSliceBufferInfo[iSlcBuffIdx].iMaxSliceNum; + } + + //reallocate ppSliceInLayer if total encoded slice num exceed max slice num + if (iMaxSliceNum > pCtx->pCurDqLayer->iMaxSliceNum) { + iRet = ExtendLayerBuffer (pCtx, pCtx->pCurDqLayer->iMaxSliceNum, iMaxSliceNum); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + pCtx->pCurDqLayer->iMaxSliceNum = iMaxSliceNum; + } + + //update ppSliceInLayer based on pSliceBuffer, reordering based on slice index + iRet = ReOrderSliceInLayer (pCtx, kuiSliceMode, pCtx->iActiveThreadsNum); + if (ENC_RETURN_SUCCESS != iRet) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SliceLayerInfoUpdate: ReOrderSliceInLayer failed"); + return iRet; + } + + //Extend NalList buffer if exceed + iCodedSliceNum = GetCurrentSliceNum (pCtx->pCurDqLayer); + pLayerBsInfo->iNalCount = GetCurLayerNalCount (pCtx->pCurDqLayer, iCodedSliceNum); + iCodedNalCount = GetTotalCodedNalCount (pFrameBsInfo); + + if (iCodedNalCount > pCtx->pOut->iCountNals) { + iRet = FrameBsRealloc (pCtx, pFrameBsInfo, pLayerBsInfo, pCtx->pCurDqLayer->iMaxSliceNum); + if (ENC_RETURN_SUCCESS != iRet) { + return iRet; + } + } + + return ENC_RETURN_SUCCESS; +} + +int32_t WelsCodeOneSlice (sWelsEncCtx* pEncCtx, SSlice* pCurSlice, const int32_t kiNalType) { + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; + SNalUnitHeaderExt* pNalHeadExt = &pCurLayer->sLayerInfo.sNalHeaderExt; + SBitStringAux* pBs = pCurSlice->pSliceBsa; + const int32_t kiDynamicSliceFlag = + (pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId].sSliceArgument.uiSliceMode + == SM_SIZELIMITED_SLICE); + if (I_SLICE == pEncCtx->eSliceType) { + pNalHeadExt->bIdrFlag = 1; + pCurSlice->sScaleShift = 0; + } else { + const uint32_t kuiTemporalId = pNalHeadExt->uiTemporalId; + pCurSlice->sScaleShift = kuiTemporalId ? (kuiTemporalId - pEncCtx->pRefPic->uiTemporalId) : 0; + } + + WelsSliceHeaderExtInit (pEncCtx, pCurLayer, pCurSlice); + + //RomRC init slice by slice + if (pWelsSvcRc->bGomRC) { + GomRCInitForOneSlice (pCurSlice, pWelsSvcRc->iBitsPerMb); + } + + g_pWelsWriteSliceHeader[pCurSlice->bSliceHeaderExtFlag] (pEncCtx, pBs, pCurLayer, pCurSlice, + pEncCtx->pFuncList->pParametersetStrategy); + + pCurSlice->uiLastMbQp = pCurLayer->sLayerInfo.pPpsP->iPicInitQp + pCurSlice->sSliceHeaderExt.sSliceHeader.iSliceQpDelta; + + int32_t iEncReturn = g_pWelsSliceCoding[pNalHeadExt->bIdrFlag][kiDynamicSliceFlag] (pEncCtx, pCurSlice); + if (ENC_RETURN_SUCCESS != iEncReturn) + return iEncReturn; + + WelsWriteSliceEndSyn (pCurSlice, pEncCtx->pSvcParam->iEntropyCodingModeFlag != 0); + + return ENC_RETURN_SUCCESS; +} + +//pFunc: UpdateMbNeighbourInfoForNextSlice() +void UpdateMbNeighbourInfoForNextSlice (SDqLayer* pCurDq, + SMB* pMbList, + const int32_t kiFirstMbIdxOfNextSlice, + const int32_t kiLastMbIdxInPartition) { + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + const int32_t kiMbWidth = pSliceCtx->iMbWidth; + int32_t iIdx = kiFirstMbIdxOfNextSlice; + int32_t iNextSliceFirstMbIdxRowStart = ((kiFirstMbIdxOfNextSlice % kiMbWidth) ? 1 : 0); + int32_t iCountMbUpdate = kiMbWidth + + iNextSliceFirstMbIdxRowStart; //need to update MB(iMbXY+1) to MB(iMbXY+1+row) in common case + const int32_t kiEndMbNeedUpdate = kiFirstMbIdxOfNextSlice + iCountMbUpdate; + SMB* pMb = &pMbList[iIdx]; + + do { + UpdateMbNeighbor (pCurDq, pMb, kiMbWidth, WelsMbToSliceIdc (pCurDq, pMb->iMbXY)); + ++ pMb; + ++ iIdx; + } while ((iIdx < kiEndMbNeedUpdate) && + (iIdx <= kiLastMbIdxInPartition)); +} + +void AddSliceBoundary (sWelsEncCtx* pEncCtx, SSlice* pCurSlice, SSliceCtx* pSliceCtx, SMB* pCurMb, + int32_t iFirstMbIdxOfNextSlice, const int32_t kiLastMbIdxInPartition) { + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SSlice* pSliceBuffer = pCurLayer->sSliceBufferInfo[pCurSlice->uiBufferIdx].pSliceBuffer; + int32_t iCodedSliceNum = pCurLayer->sSliceBufferInfo[pCurSlice->uiBufferIdx].iCodedSliceNum; + int32_t iCurMbIdx = pCurMb->iMbXY; + uint16_t iCurSliceIdc = pSliceCtx->pOverallMbMap[ iCurMbIdx ]; + const int32_t kiSliceIdxStep = pEncCtx->iActiveThreadsNum; + uint16_t iNextSliceIdc = iCurSliceIdc + kiSliceIdxStep; + SSlice* pNextSlice = NULL; + + SMB* pMbList = pCurLayer->sMbDataP; + + //update cur pSlice info + pCurSlice->sSliceHeaderExt.uiNumMbsInSlice = 1 + iCurMbIdx - pCurSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice; + + //pNextSlice pointer/initialization + if (pEncCtx->iActiveThreadsNum > 1) { + pNextSlice = &pSliceBuffer[ iCodedSliceNum + 1 ]; + } else { + pNextSlice = &pSliceBuffer[ iNextSliceIdc ]; + } + +#if _DEBUG + assert (NULL != pNextSlice); +#endif + + //init next pSlice info + pNextSlice->bSliceHeaderExtFlag = + (NAL_UNIT_CODED_SLICE_EXT == pCurLayer->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType); + memcpy (&pNextSlice->sSliceHeaderExt, &pCurSlice->sSliceHeaderExt, + sizeof (SSliceHeaderExt)); // confirmed_safe_unsafe_usage + pNextSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = iFirstMbIdxOfNextSlice; + WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdxOfNextSlice, iNextSliceIdc, + (kiLastMbIdxInPartition - iFirstMbIdxOfNextSlice + 1), sizeof (uint16_t)); + + //DYNAMIC_SLICING_ONE_THREAD: update pMbList slice_neighbor_info + UpdateMbNeighbourInfoForNextSlice (pCurLayer, pMbList, iFirstMbIdxOfNextSlice, kiLastMbIdxInPartition); +} + +bool DynSlcJudgeSliceBoundaryStepBack (void* pCtx, void* pSlice, SSliceCtx* pSliceCtx, SMB* pCurMb, + SDynamicSlicingStack* pDss) { + sWelsEncCtx* pEncCtx = (sWelsEncCtx*)pCtx; + SSlice* pCurSlice = (SSlice*)pSlice; + int32_t iCurMbIdx = pCurMb->iMbXY; + uint32_t uiLen = 0; + int32_t iPosBitOffset = 0; + const int32_t kiActiveThreadsNum = pEncCtx->iActiveThreadsNum; + const int32_t kiPartitaionId = pCurSlice->iSliceIdx % kiActiveThreadsNum; + const int32_t kiEndMbIdxOfPartition = pEncCtx->pCurDqLayer->EndMbIdxOfPartition[kiPartitaionId]; + const bool kbCurMbNotFirstMbOfCurSlice = ((iCurMbIdx > 0) && (pSliceCtx->pOverallMbMap[iCurMbIdx] == + pSliceCtx->pOverallMbMap[iCurMbIdx - 1])); + const bool kbCurMbNotLastMbOfCurPartition = iCurMbIdx < kiEndMbIdxOfPartition; + + if (pCurSlice->bDynamicSlicingSliceSizeCtrlFlag) + return false; + + iPosBitOffset = (pDss->iCurrentPos - pDss->iStartPos); +#if _DEBUG + assert (iPosBitOffset >= 0); +#endif + uiLen = ((iPosBitOffset >> 3) + ((iPosBitOffset & 0x07) ? 1 : 0)); + + if ((kbCurMbNotFirstMbOfCurSlice + && JUMPPACKETSIZE_JUDGE (uiLen, iCurMbIdx, pSliceCtx->uiSliceSizeConstraint)) /*jump_avoiding_pack_exceed*/ + && kbCurMbNotLastMbOfCurPartition) { //decide to add new pSlice + + WelsLog (&pEncCtx->sLogCtx, WELS_LOG_DETAIL, + "DynSlcJudgeSliceBoundaryStepBack: AddSliceBoundary: iCurMbIdx=%d, uiLen=%d, iSliceIdx=%d", iCurMbIdx, uiLen, + pCurSlice->iSliceIdx); + + if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1) { + WelsMutexLock (&pEncCtx->pSliceThreading->mutexSliceNumUpdate); + //lock the acessing to this variable: pSliceCtx->iSliceNumInFrame + } + //tmp choice to avoid complex memory operation, 100520, to be modify + //TODO: pSliceCtx->iSliceNumInFrame should match max slice num limitation in given profile based on standard + // current change is tmp solution which equal to origin design, + // as iMaxSliceNum is always equal to iMaxSliceNumConstraint in origin design + // and will also extend when reallocated, + // tmp change is: iMaxSliceNumConstraint is alway set to be MAXSLICENUM, will not change even reallocate + AddSliceBoundary (pEncCtx, pCurSlice, pSliceCtx, pCurMb, iCurMbIdx, kiEndMbIdxOfPartition); + ++ pSliceCtx->iSliceNumInFrame; + + if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1) { + WelsMutexUnlock (&pEncCtx->pSliceThreading->mutexSliceNumUpdate); + } + + return true; + } + + return false; +} + +/////////////// +// pMb loop +/////////////// +inline void WelsInitInterMDStruc (const SMB* pCurMb, uint16_t* pMvdCostTable, const int32_t kiMvdInterTableStride, + SWelsMD* pMd) { + pMd->iLambda = g_kiQpCostTable[pCurMb->uiLumaQp]; + pMd->pMvdCost = &pMvdCostTable[pCurMb->uiLumaQp * kiMvdInterTableStride]; + pMd-> iMbPixX = (pCurMb->iMbX << 4); + pMd-> iMbPixY = (pCurMb->iMbY << 4); + memset (&pMd->iBlock8x8StaticIdc[0], 0, sizeof (pMd->iBlock8x8StaticIdc)); +} +// for inter non-dynamic pSlice +int32_t WelsMdInterMbLoop (sWelsEncCtx* pEncCtx, SSlice* pSlice, void* pWelsMd, const int32_t kiSliceFirstMbXY) { + SWelsMD* pMd = (SWelsMD*)pWelsMd; + SBitStringAux* pBs = pSlice->pSliceBsa; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SMB* pMbList = pCurLayer->sMbDataP; + SMB* pCurMb = NULL; + int32_t iNumMbCoded = 0; + int32_t iNextMbIdx = kiSliceFirstMbXY; + int32_t iCurMbIdx = -1; + const int32_t kiTotalNumMb = pCurLayer->iMbWidth * pCurLayer->iMbHeight; + const int32_t kiMvdInterTableStride = pEncCtx->iMvdCostTableStride; + uint16_t* pMvdCostTable = &pEncCtx->pMvdCostTable[pEncCtx->iMvdCostTableSize]; + const int32_t kiSliceIdx = pSlice->iSliceIdx; + const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + int32_t iEncReturn = ENC_RETURN_SUCCESS; + SDynamicSlicingStack sDss; + if (pEncCtx->pSvcParam->iEntropyCodingModeFlag) { + WelsInitSliceCabac (pEncCtx, pSlice); + sDss.pRestoreBuffer = NULL; + sDss.iStartPos = sDss.iCurrentPos = 0; + } + pSlice->iMbSkipRun = 0; + for (;;) { + if (!pEncCtx->pSvcParam->iEntropyCodingModeFlag) + pEncCtx->pFuncList->pfStashMBStatus (&sDss, pSlice, pSlice->iMbSkipRun); + //point to current pMb + iCurMbIdx = iNextMbIdx; + pCurMb = &pMbList[ iCurMbIdx ]; + + + //step(1): set QP for the current MB + pEncCtx->pFuncList->pfRc.pfWelsRcMbInit (pEncCtx, pCurMb, pSlice); + + //step (2). save some vale for future use, initial pWelsMd + WelsMdIntraInit (pEncCtx, pCurMb, pMbCache, kiSliceFirstMbXY); + WelsMdInterInit (pEncCtx, pSlice, pCurMb, kiSliceFirstMbXY); + +TRY_REENCODING: + WelsInitInterMDStruc (pCurMb, pMvdCostTable, kiMvdInterTableStride, pMd); + pEncCtx->pFuncList->pfInterMd (pEncCtx, pMd, pSlice, pCurMb, pMbCache); + //mb_qp + + //step (4): save from the MD process from future use + WelsMdInterSaveSadAndRefMbType ((pCurLayer->pDecPic->uiRefMbType), pMbCache, pCurMb, pMd); + + pEncCtx->pFuncList->pfMdBackgroundInfoUpdate (pCurLayer, pCurMb, pMbCache->bCollocatedPredFlag, + pEncCtx->pRefPic->iPictureType); + + //step (5): update cache + UpdateNonZeroCountCache (pCurMb, pMbCache); + + //step (6): begin to write bit stream; if the pSlice size is controlled, the writing may be skipped + + iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb); + if (!pEncCtx->pSvcParam->iEntropyCodingModeFlag) { + if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) { + pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice); + UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset); + goto TRY_REENCODING; + } + } + if (ENC_RETURN_SUCCESS != iEncReturn) + return iEncReturn; + + + //step (7): reconstruct current MB + pCurMb->uiSliceIdc = kiSliceIdx; + OutputPMbWithoutConstructCsRsNoCopy (pEncCtx, pCurLayer, pSlice, pCurMb); + +#if defined(MB_TYPES_CHECK) + WelsCountMbType (pEncCtx->sPerInfo.iMbCount, P_SLICE, pCurMb); +#endif//MB_TYPES_CHECK + + //step (8): update status and other parameters + pEncCtx->pFuncList->pfRc.pfWelsRcMbInfoUpdate (pEncCtx, pCurMb, pMd->iCostLuma, pSlice); + + /*judge if all pMb in cur pSlice has been encoded*/ + ++ iNumMbCoded; + iNextMbIdx = WelsGetNextMbOfSlice (pCurLayer, iCurMbIdx); + //whether all of MB in current pSlice encoded or not + if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbCoded >= kiTotalNumMb) { + break; + } + } + + if (pSlice->iMbSkipRun) { + BsWriteUE (pBs, pSlice->iMbSkipRun); + } + + return iEncReturn; +} + +// Only for inter dynamic slicing +int32_t WelsMdInterMbLoopOverDynamicSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice, void* pWelsMd, + const int32_t kiSliceFirstMbXY) { + SWelsMD* pMd = (SWelsMD*)pWelsMd; + SBitStringAux* pBs = pSlice->pSliceBsa; + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SSliceCtx* pSliceCtx = &pCurLayer->sSliceEncCtx; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SMB* pMbList = pCurLayer->sMbDataP; + SMB* pCurMb = NULL; + int32_t iNumMbCoded = 0; + const int32_t kiTotalNumMb = pCurLayer->iMbWidth * pCurLayer->iMbHeight; + int32_t iNextMbIdx = kiSliceFirstMbXY; + int32_t iCurMbIdx = -1; + const int32_t kiMvdInterTableStride = pEncCtx->iMvdCostTableStride; + uint16_t* pMvdCostTable = &pEncCtx->pMvdCostTable[pEncCtx->iMvdCostTableSize]; + const int32_t kiSliceIdx = pSlice->iSliceIdx; + const int32_t kiPartitionId = (kiSliceIdx % pEncCtx->iActiveThreadsNum); + const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + int32_t iEncReturn = ENC_RETURN_SUCCESS; + + SDynamicSlicingStack sDss; + if (pEncCtx->pSvcParam->iEntropyCodingModeFlag) { + WelsInitSliceCabac (pEncCtx, pSlice); + sDss.iStartPos = sDss.iCurrentPos = 0; + sDss.pRestoreBuffer = pEncCtx->pDynamicBsBuffer[kiPartitionId]; + } else { + sDss.iStartPos = BsGetBitsPos (pBs); + } + pSlice->iMbSkipRun = 0; + for (;;) { + //DYNAMIC_SLICING_ONE_THREAD - MultiD + //stack pBs pointer + pEncCtx->pFuncList->pfStashMBStatus (&sDss, pSlice, pSlice->iMbSkipRun); + + //point to current pMb + iCurMbIdx = iNextMbIdx; + pCurMb = &pMbList[ iCurMbIdx ]; + + //step(1): set QP for the current MB + pEncCtx->pFuncList->pfRc.pfWelsRcMbInit (pEncCtx, pCurMb, pSlice); + // if already reaches the largest number of slices, set QPs to the upper bound + if (pSlice->bDynamicSlicingSliceSizeCtrlFlag) { + //a clearer logic may be: + //if there is no need from size control from the pSlice size, the QP will be decided by RC; else it will be set to the max QP + // however, there are some parameter updating in the rc_mb_init() function, so it cannot be skipped? + pCurMb->uiLumaQp = pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId].iMaxQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + kuiChromaQpIndexOffset)]; + } + + //step (2). save some vale for future use, initial pWelsMd + WelsMdIntraInit (pEncCtx, pCurMb, pMbCache, kiSliceFirstMbXY); + WelsMdInterInit (pEncCtx, pSlice, pCurMb, kiSliceFirstMbXY); + +TRY_REENCODING: + WelsInitInterMDStruc (pCurMb, pMvdCostTable, kiMvdInterTableStride, pMd); + pEncCtx->pFuncList->pfInterMd (pEncCtx, pMd, pSlice, pCurMb, pMbCache); + //mb_qp + + //step (4): save from the MD process from future use + WelsMdInterSaveSadAndRefMbType ((pCurLayer->pDecPic->uiRefMbType), pMbCache, pCurMb, pMd); + + pEncCtx->pFuncList->pfMdBackgroundInfoUpdate (pCurLayer, pCurMb, pMbCache->bCollocatedPredFlag, + pEncCtx->pRefPic->iPictureType); + + //step (5): update cache + UpdateNonZeroCountCache (pCurMb, pMbCache); + + //step (6): begin to write bit stream; if the pSlice size is controlled, the writing may be skipped + + + + iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb); + if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) { + pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice); + UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset); + goto TRY_REENCODING; + } + if (ENC_RETURN_SUCCESS != iEncReturn) + return iEncReturn; + + + //DYNAMIC_SLICING_ONE_THREAD - MultiD + sDss.iCurrentPos = pEncCtx->pFuncList->pfGetBsPosition (pSlice); + if (DynSlcJudgeSliceBoundaryStepBack (pEncCtx, pSlice, pSliceCtx, pCurMb, &sDss)) { + pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice); + pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx - + 1; // update LastCodedMbIdxOfPartition, need to -1 due to stepping back + ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId]; + + break; + } + + //step (7): reconstruct current MB + pCurMb->uiSliceIdc = kiSliceIdx; + OutputPMbWithoutConstructCsRsNoCopy (pEncCtx, pCurLayer, pSlice, pCurMb); + +#if defined(MB_TYPES_CHECK) + WelsCountMbType (pEncCtx->sPerInfo.iMbCount, P_SLICE, pCurMb); +#endif//MB_TYPES_CHECK + + //step (8): update status and other parameters + pEncCtx->pFuncList->pfRc.pfWelsRcMbInfoUpdate (pEncCtx, pCurMb, pMd->iCostLuma, pSlice); + + /*judge if all pMb in cur pSlice has been encoded*/ + ++ iNumMbCoded; + iNextMbIdx = WelsGetNextMbOfSlice (pCurLayer, iCurMbIdx); + //whether all of MB in current pSlice encoded or not + if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbCoded >= kiTotalNumMb) { + pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx; + ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId]; + + break; + } + } + + if (pSlice->iMbSkipRun) { + BsWriteUE (pBs, pSlice->iMbSkipRun); + } + + return iEncReturn; +} + +}//namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_mode_decision.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_mode_decision.cpp new file mode 100644 index 000000000..5b4793561 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_mode_decision.cpp @@ -0,0 +1,686 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_mode_decision.c + * + * \brief Algorithmetic MD for: + * - multi-spatial Enhancement Layer MD; + * - Scrolling PSkip Decision for screen content + * + * \date 2009.7.29 + * + + ************************************************************************************** + */ +#include "mv_pred.h" +#include "ls_defines.h" +#include "svc_base_layer_md.h" +#include "svc_mode_decision.h" + +namespace WelsEnc { + +////////////// +// MD for enhancement layers +////////////// +void WelsMdSpatialelInterMbIlfmdNoilp (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, + SMB* pCurMb, const Mb_Type kuiRefMbType) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + + const uint32_t kuiNeighborAvail = pCurMb->uiNeighborAvail; + const int32_t kiMbWidth = pCurDqLayer->iMbWidth; + const SMB* kpTopMb = pCurMb - kiMbWidth; + const bool kbMbLeftAvailPskip = ((kuiNeighborAvail & LEFT_MB_POS) ? IS_SKIP ((pCurMb - 1)->uiMbType) : false); + const bool kbMbTopAvailPskip = ((kuiNeighborAvail & TOP_MB_POS) ? IS_SKIP (kpTopMb->uiMbType) : false); + const bool kbMbTopLeftAvailPskip = ((kuiNeighborAvail & TOPLEFT_MB_POS) ? IS_SKIP ((kpTopMb - 1)->uiMbType) : false); + const bool kbMbTopRightAvailPskip = ((kuiNeighborAvail & TOPRIGHT_MB_POS) ? IS_SKIP ((kpTopMb + 1)->uiMbType) : false); + + bool bTrySkip = kbMbLeftAvailPskip | kbMbTopAvailPskip | kbMbTopLeftAvailPskip | kbMbTopRightAvailPskip; + bool bKeepSkip = kbMbLeftAvailPskip & kbMbTopAvailPskip & kbMbTopRightAvailPskip; + bool bSkip = false; + + if (pEncCtx->pFuncList->pfInterMdBackgroundDecision (pEncCtx, pWelsMd, pSlice, pCurMb, pMbCache, &bKeepSkip)) { + return; + } + + //step 1: try SKIP + bSkip = WelsMdInterJudgePskip (pEncCtx, pWelsMd, pSlice, pCurMb, pMbCache, bTrySkip); + + if (bSkip && bKeepSkip) { + WelsMdInterDecidedPskip (pEncCtx, pSlice, pCurMb, pMbCache); + return; + } + + if (! IS_SVC_INTRA (kuiRefMbType)) { + if (!bSkip) { + PredictSad (pMbCache->sMvComponents.iRefIndexCache, pMbCache->iSadCost, 0, &pWelsMd->iSadPredMb); + + //step 2: P_16x16 + pWelsMd->iCostLuma = WelsMdP16x16 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, pCurMb); + pCurMb->uiMbType = MB_TYPE_16x16; + } + + WelsMdInterSecondaryModesEnc (pEncCtx, pWelsMd, pSlice, pCurMb, pMbCache, bSkip); + } else { //BLMODE == SVC_INTRA + //initial prediction memory for I_16x16 + const int32_t kiCostI16x16 = WelsMdI16x16 (pEncCtx->pFuncList, pEncCtx->pCurDqLayer, pMbCache, pWelsMd->iLambda); + if (bSkip && (pWelsMd->iCostLuma <= kiCostI16x16)) { + WelsMdInterDecidedPskip (pEncCtx, pSlice, pCurMb, pMbCache); + } else { + pWelsMd->iCostLuma = kiCostI16x16; + pCurMb->uiMbType = MB_TYPE_INTRA16x16; + + WelsMdIntraSecondaryModesEnc (pEncCtx, pWelsMd, pCurMb, pMbCache); + } + } +} + + + +void WelsMdInterMbEnhancelayer (sWelsEncCtx* pEncCtx, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache) { + SDqLayer* pCurLayer = pEncCtx->pCurDqLayer; + SWelsMD* pWelsMd = (SWelsMD*)pMd; + const SMB* kpInterLayerRefMb = GetRefMb (pCurLayer, pCurMb); + const Mb_Type kuiInterLayerRefMbType = kpInterLayerRefMb->uiMbType; + + SetMvBaseEnhancelayer (pWelsMd, pCurMb, + kpInterLayerRefMb); // initial sMvBase here only when pRef mb type is inter, if not sMvBase will be not used! + //step (3): do the MD process + WelsMdSpatialelInterMbIlfmdNoilp (pEncCtx, pWelsMd, pSlice, pCurMb, kuiInterLayerRefMbType); //MD process +} + + +// do initiation for noILP (needed by ILFMD) +SMB* GetRefMb (SDqLayer* pCurLayer, SMB* pCurMb) { + const SDqLayer* kpRefLayer = pCurLayer->pRefLayer; + const int32_t kiRefMbIdx = (pCurMb->iMbY >> 1) * kpRefLayer->iMbWidth + (pCurMb->iMbX >> + 1); //because current lower layer is half size on both vertical and horizontal + return (&kpRefLayer->sMbDataP[kiRefMbIdx]); +} + +void SetMvBaseEnhancelayer (SWelsMD* pMd, SMB* pCurMb, const SMB* kpRefMb) { + const Mb_Type kuiRefMbType = kpRefMb->uiMbType; + + if (! IS_SVC_INTRA (kuiRefMbType)) { + SMVUnitXY sMv; + int32_t iRefMbPartIdx = ((pCurMb->iMbY & 0x01) << 1) + (pCurMb->iMbX & 0x01); //may be need modified + int32_t iScan4RefPartIdx = g_kuiMbCountScan4Idx[ (iRefMbPartIdx << 2)]; + sMv.iMvX = kpRefMb->sMv[iScan4RefPartIdx].iMvX * (1 << 1); + sMv.iMvY = kpRefMb->sMv[iScan4RefPartIdx].iMvY * (1 << 1); + + pMd->sMe.sMe16x16.sMvBase = sMv; + + pMd->sMe.sMe8x8[0].sMvBase = + pMd->sMe.sMe8x8[1].sMvBase = + pMd->sMe.sMe8x8[2].sMvBase = + pMd->sMe.sMe8x8[3].sMvBase = sMv; + + pMd->sMe.sMe16x8[0].sMvBase = + pMd->sMe.sMe16x8[1].sMvBase = + pMd->sMe.sMe8x16[0].sMvBase = + pMd->sMe.sMe8x16[1].sMvBase = sMv; + } +} + + + +////////////// +// MD for Background decision +////////////// +////// +// try the BGD Pskip +////// +inline int32_t GetChromaCost (PSampleSadSatdCostFunc* pCalculateFunc, + uint8_t* pSrcChroma, int32_t iSrcStride, uint8_t* pRefChroma, int32_t iRefStride) { + return pCalculateFunc[BLOCK_8x8] (pSrcChroma, iSrcStride, pRefChroma, iRefStride); +} +inline bool IsCostLessEqualSkipCost (int32_t iCurCost, const int32_t iPredPskipSad, const int32_t iRefMbType, + const SPicture* pRef, const int32_t iMbXy, const int32_t iSmallestInvisibleTh) { + return ((iPredPskipSad > iSmallestInvisibleTh && iCurCost >= iPredPskipSad) || + (pRef->iPictureType == P_SLICE && + iRefMbType == MB_TYPE_SKIP && + pRef->pMbSkipSad[iMbXy] > iSmallestInvisibleTh && + iCurCost >= (pRef->pMbSkipSad[iMbXy]))); +} +bool CheckChromaCost (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMbCache* pMbCache, const int32_t iCurMbXy) { +#define KNOWN_CHROMA_TOO_LARGE 640 +#define SMALLEST_INVISIBLE 128 //2*64, 2 in pixel maybe the smallest not visible for luma + + PSampleSadSatdCostFunc* pSad = pEncCtx->pFuncList->sSampleDealingFuncs.pfSampleSad; + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + + uint8_t* pCbEnc = pMbCache->SPicData.pEncMb[1]; + uint8_t* pCrEnc = pMbCache->SPicData.pEncMb[2]; + uint8_t* pCbRef = pMbCache->SPicData.pRefMb[1]; + uint8_t* pCrRef = pMbCache->SPicData.pRefMb[2]; + + const int32_t iCbEncStride = pCurDqLayer->iEncStride[1]; + const int32_t iCrEncStride = pCurDqLayer->iEncStride[2]; + const int32_t iChromaRefStride = pCurDqLayer->pRefPic->iLineSize[1]; + + const int32_t iCbSad = GetChromaCost (pSad, pCbEnc, iCbEncStride, pCbRef, iChromaRefStride); + const int32_t iCrSad = GetChromaCost (pSad, pCrEnc, iCrEncStride, pCrRef, iChromaRefStride); + + //01/17/13 + //the in-question error area is + //from: (yellow) Y=212, V=023, U=145 + //to: (grey) Y=213, V=136, U=124 + //visible difference can be seen on the U plane + //so the allowing chroma difference should be at least no larger than + //20*8*8 = 1280 for U or V + //one local test case show that "either one >640" will become a too strict criteria, which will appear when QP is large(36) and maybe no much harm for visual + //another local test case show that "either one >960" will be a moderate criteria, an area is changed from light green to light pink, but without careful observation it won't be obvious, but people will feel the unclean area (and note that, the color visible criteria is also related to the luma of them!) + //another case show that color changed from black to very dark red can be visible even under the threshold 960, the color difference is about 13*64=832 (U123V145->U129V132) + //TODO: + //OPTI-ABLE: the visible color criteria may be related to luma (very bright or very dark), or related to the ratio of U/V rather than the absolute value + const bool bChromaTooLarge = (iCbSad > KNOWN_CHROMA_TOO_LARGE || iCrSad > KNOWN_CHROMA_TOO_LARGE); + + const int32_t iChromaSad = iCbSad + iCrSad; + PredictSadSkip (pMbCache->sMvComponents.iRefIndexCache, pMbCache->bMbTypeSkip, pMbCache->iSadCostSkip, 0, + & (pWelsMd->iSadPredSkip)); + const bool bChromaCostCannotSkip = IsCostLessEqualSkipCost (iChromaSad, pWelsMd->iSadPredSkip, pMbCache->uiRefMbType, + pCurDqLayer->pRefPic, iCurMbXy, SMALLEST_INVISIBLE); + + return (!bChromaCostCannotSkip && !bChromaTooLarge); +} + +//01/17/2013. USE the NEW BGD Pskip with COLOR CHECK for screen content and camera because of color artifact seen in test +bool WelsMdInterJudgeBGDPskip (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + bool* bKeepSkip) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + + const int32_t kiRefMbQp = pCurDqLayer->pRefPic->pRefMbQp[pCurMb->iMbXY]; + const int32_t kiCurMbQp = pCurMb->uiLumaQp;// unsigned -> signed + int8_t* pVaaBgMbFlag = pEncCtx->pVaa->pVaaBackgroundMbFlag + pCurMb->iMbXY; + + const int32_t kiMbWidth = pCurDqLayer->iMbWidth; + + *bKeepSkip = (*bKeepSkip) && + ((!pVaaBgMbFlag[-1]) && + (!pVaaBgMbFlag[-kiMbWidth]) && + (!pVaaBgMbFlag[-kiMbWidth + 1])); + + if ( + *pVaaBgMbFlag + && !IS_INTRA (pMbCache->uiRefMbType) + && (kiRefMbQp - kiCurMbQp <= DELTA_QP_BGD_THD || kiRefMbQp <= 26) + ) { + //01/16/13 + //the current BGD method uses luma SAD in first step judging of Background blocks + //and uses chroma edges to confirm the Background blocks + //HOWEVER, there is such case in SCC, + //that the luma of two collocated blocks (block in reference frame and in current frame) is very similar + //but the chroma are very different, at the same time the chroma are plain and without edge + //IN SUCH A CASE, + //it will be not proper to just use Pskip + //TODO: consider reusing this result of ChromaCheck when SCDSkip needs this as well + + if (CheckChromaCost (pEncCtx, pWelsMd, pMbCache, pCurMb->iMbXY)) { + SMVUnitXY sVaaPredSkipMv = { 0 }; + PredSkipMv (pMbCache, &sVaaPredSkipMv); + WelsMdBackgroundMbEnc (pEncCtx, pWelsMd, pCurMb, pMbCache, pSlice, (LD32 (&sVaaPredSkipMv) == 0)); + return true; + } + } + + return false; +} + +bool WelsMdInterJudgeBGDPskipFalse (sWelsEncCtx* pCtx, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + bool* bKeepSkip) { + return false; +} + + + +////// +// update BGD related info +////// +void WelsMdUpdateBGDInfo (SDqLayer* pCurLayer, SMB* pCurMb, const bool bCollocatedPredFlag, + const int32_t iRefPictureType) { + uint8_t* pTargetRefMbQpList = (pCurLayer->pDecPic->pRefMbQp); + const int32_t kiMbXY = pCurMb->iMbXY; + + if (pCurMb->uiCbp || I_SLICE == iRefPictureType || 0 == bCollocatedPredFlag) { + pTargetRefMbQpList[kiMbXY] = pCurMb->uiLumaQp; + } else { //unchange, do not need to evaluation? + uint8_t* pRefPicRefMbQpList = (pCurLayer->pRefPic->pRefMbQp); + pTargetRefMbQpList[kiMbXY] = pRefPicRefMbQpList[kiMbXY]; + } + + if (pCurMb->uiMbType == MB_TYPE_BACKGROUND) { + pCurMb->uiMbType = MB_TYPE_SKIP; + } +} + +void WelsMdUpdateBGDInfoNULL (SDqLayer* pCurLayer, SMB* pCurMb, const bool bCollocatedPredFlag, + const int32_t iRefPictureType) { + WelsMdUpdateBGDInfo (pCurLayer, pCurMb, bCollocatedPredFlag, iRefPictureType); +} + + +////////////// +// MD for screen contents +////////////// +inline bool IsMbStatic (int32_t* pBlockType, EStaticBlockIdc eType) { + return (pBlockType != NULL && + eType == pBlockType[0] && + eType == pBlockType[1] && + eType == pBlockType[2] && + eType == pBlockType[3]); +} +inline bool IsMbCollocatedStatic (int32_t* pBlockType) { + return IsMbStatic (pBlockType, COLLOCATED_STATIC); +} + +inline bool IsMbScrolledStatic (int32_t* pBlockType) { + return IsMbStatic (pBlockType, SCROLLED_STATIC); +} + +inline int32_t CalUVSadCost (SWelsFuncPtrList* pFunc, uint8_t* pEncOri, int32_t iStrideUV, uint8_t* pRefOri, + int32_t iRefLineSize) { + return pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pEncOri, iStrideUV, pRefOri, iRefLineSize); +} + +inline bool CheckBorder (int32_t iMbX, int32_t iMbY, int32_t iScrollMvX, int32_t iScrollMvY, int32_t iMbWidth, + int32_t iMbHeight) { + return ((iMbX << 4) + iScrollMvX < 0 || + (iMbX << 4) + iScrollMvX > (iMbWidth - 1) << 4 || + (iMbY << 4) + iScrollMvY < 0 || + (iMbY << 4) + iScrollMvY > (iMbHeight - 1) << 4 + ); //border check for safety +} + + +bool JudgeStaticSkip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, SWelsMD* pWelsMd) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + const int32_t kiMbX = pCurMb->iMbX; + const int32_t kiMbY = pCurMb->iMbY; + + bool bTryStaticSkip = IsMbCollocatedStatic (pWelsMd->iBlock8x8StaticIdc); + if (bTryStaticSkip) { + int32_t iStrideUV, iOffsetUV; + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SPicture* pRefOri = pCurDqLayer->pRefOri[0]; + if (pRefOri != NULL) { + iStrideUV = pCurDqLayer->iEncStride[1]; + iOffsetUV = (kiMbX + kiMbY * iStrideUV) << 3; + + int32_t iSadCostCb = CalUVSadCost (pFunc, pMbCache->SPicData.pEncMb[1], iStrideUV, pRefOri->pData[1] + iOffsetUV, + pRefOri->iLineSize[1]); + if (iSadCostCb == 0) { + int32_t iSadCostCr = CalUVSadCost (pFunc, pMbCache->SPicData.pEncMb[2], iStrideUV, pRefOri->pData[2] + iOffsetUV, + pRefOri->iLineSize[1]); + bTryStaticSkip = (0 == iSadCostCr); + } else bTryStaticSkip = false; + } else { + bTryStaticSkip = false; + } + } + return bTryStaticSkip; +} + +bool JudgeScrollSkip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, SWelsMD* pWelsMd) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + const int32_t kiMbX = pCurMb->iMbX; + const int32_t kiMbY = pCurMb->iMbY; + const int32_t kiMbWidth = pCurDqLayer->iMbWidth; + const int32_t kiMbHeight = pCurDqLayer->iMbHeight; + // const int32_t block_width = mb_width << 1; + SVAAFrameInfoExt_t* pVaaExt = static_cast (pEncCtx->pVaa); + + bool bTryScrollSkip = false; + + if (pVaaExt->sScrollDetectInfo.bScrollDetectFlag) + bTryScrollSkip = IsMbScrolledStatic (pWelsMd->iBlock8x8StaticIdc); + else return 0; + + if (bTryScrollSkip) { + int32_t iStrideUV, iOffsetUV; + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SPicture* pRefOri = pCurDqLayer->pRefOri[0]; + if (pRefOri != NULL) { + int32_t iScrollMvX = pVaaExt->sScrollDetectInfo.iScrollMvX; + int32_t iScrollMvY = pVaaExt->sScrollDetectInfo.iScrollMvY; + if (CheckBorder (kiMbX, kiMbY, iScrollMvX, iScrollMvY, kiMbWidth, kiMbHeight)) { + bTryScrollSkip = false; + } else { + iStrideUV = pCurDqLayer->iEncStride[1]; + iOffsetUV = (kiMbX << 3) + (iScrollMvX >> 1) + ((kiMbY << 3) + (iScrollMvY >> 1)) * iStrideUV; + + int32_t iSadCostCb = CalUVSadCost (pFunc, pMbCache->SPicData.pEncMb[1], iStrideUV, pRefOri->pData[1] + iOffsetUV, + pRefOri->iLineSize[1]); + if (iSadCostCb == 0) { + int32_t iSadCostCr = CalUVSadCost (pFunc, pMbCache->SPicData.pEncMb[2], iStrideUV, pRefOri->pData[2] + iOffsetUV, + pRefOri->iLineSize[1]); + bTryScrollSkip = (0 == iSadCostCr); + } else bTryScrollSkip = false; + } + } + } + return bTryScrollSkip; +} + +void SvcMdSCDMbEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache, SSlice* pSlice, + bool bQpSimilarFlag, + bool bMbSkipFlag, SMVUnitXY sCurMbMv[], ESkipModes eSkipMode) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + SWelsFuncPtrList* pFunc = pEncCtx->pFuncList; + SMVUnitXY sMvp = { 0}; + ST16 (&sMvp.iMvX, sCurMbMv[eSkipMode].iMvX); + ST16 (&sMvp.iMvY, sCurMbMv[eSkipMode].iMvY); + uint8_t* pRefLuma = pMbCache->SPicData.pRefMb[0]; + uint8_t* pRefCb = pMbCache->SPicData.pRefMb[1]; + uint8_t* pRefCr = pMbCache->SPicData.pRefMb[2]; + int32_t iLineSizeY = pCurDqLayer->pRefPic->iLineSize[0]; + int32_t iLineSizeUV = pCurDqLayer->pRefPic->iLineSize[1]; + uint8_t* pDstLuma = pMbCache->pSkipMb; + uint8_t* pDstCb = pMbCache->pSkipMb + 256; + uint8_t* pDstCr = pMbCache->pSkipMb + 256 + 64; + + const int32_t iOffsetY = (sCurMbMv[eSkipMode].iMvX >> 2) + (sCurMbMv[eSkipMode].iMvY >> 2) * iLineSizeY; + const int32_t iOffsetUV = (sCurMbMv[eSkipMode].iMvX >> 3) + (sCurMbMv[eSkipMode].iMvY >> 3) * iLineSizeUV; + + if (!bQpSimilarFlag || !bMbSkipFlag) { + pDstLuma = pMbCache->pMemPredLuma; + pDstCb = pMbCache->pMemPredChroma; + pDstCr = pMbCache->pMemPredChroma + 64; + } + //MC + pFunc->sMcFuncs.pMcLumaFunc (pRefLuma + iOffsetY, iLineSizeY, pDstLuma, 16, 0, 0, 16, 16); + pFunc->sMcFuncs.pMcChromaFunc (pRefCb + iOffsetUV, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); + pFunc->sMcFuncs.pMcChromaFunc (pRefCr + iOffsetUV, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); + + pCurMb->uiCbp = 0; + pWelsMd->iCostLuma = 0; + pCurMb->pSadCost[0] = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurDqLayer->iEncStride[0], pRefLuma + iOffsetY, iLineSizeY); + + pWelsMd->iCostSkipMb = pCurMb->pSadCost[0]; + + ST16 (& (pCurMb->sP16x16Mv.iMvX), sCurMbMv[eSkipMode].iMvX); + ST16 (& (pCurMb->sP16x16Mv.iMvY), sCurMbMv[eSkipMode].iMvY); + + ST16 (& (pCurDqLayer->pDecPic->sMvList[pCurMb->iMbXY].iMvX), sCurMbMv[eSkipMode].iMvX); + ST16 (& (pCurDqLayer->pDecPic->sMvList[pCurMb->iMbXY].iMvY), sCurMbMv[eSkipMode].iMvY); + + if (bQpSimilarFlag && bMbSkipFlag) { + //update motion info to current MB + ST32 (pCurMb->pRefIndex, 0); + pFunc->pfUpdateMbMv (pCurMb->sMv, sMvp); + pCurMb->uiMbType = MB_TYPE_SKIP; + WelsRecPskip (pCurDqLayer, pEncCtx->pFuncList, pCurMb, pMbCache); + WelsMdInterUpdatePskip (pCurDqLayer, pSlice, pCurMb, pMbCache); + return; + } + + pCurMb->uiMbType = MB_TYPE_16x16; + + pWelsMd->sMe.sMe16x16.sMv.iMvX = sCurMbMv[eSkipMode].iMvX; + pWelsMd->sMe.sMe16x16.sMv.iMvY = sCurMbMv[eSkipMode].iMvY; + PredMv (&pMbCache->sMvComponents, 0, 4, 0, &pWelsMd->sMe.sMe16x16.sMvp); + pMbCache->sMbMvp[0] = pWelsMd->sMe.sMe16x16.sMvp; + + UpdateP16x16MotionInfo (pMbCache, pCurMb, 0, &pWelsMd->sMe.sMe16x16.sMv); + + if (pWelsMd->bMdUsingSad) + pWelsMd->iCostLuma = pCurMb->pSadCost[0]; + else + pWelsMd->iCostLuma = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], + pCurDqLayer->iEncStride[0], pRefLuma, iLineSizeY); + + WelsInterMbEncode (pEncCtx, pSlice, pCurMb); + WelsPMbChromaEncode (pEncCtx, pSlice, pCurMb); + + pFunc->pfCopy16x16Aligned (pMbCache->SPicData.pCsMb[0], pCurDqLayer->iCsStride[0], pMbCache->pMemPredLuma, 16); + pFunc->pfCopy8x8Aligned (pMbCache->SPicData.pCsMb[1], pCurDqLayer->iCsStride[1], pMbCache->pMemPredChroma, 8); + pFunc->pfCopy8x8Aligned (pMbCache->SPicData.pCsMb[2], pCurDqLayer->iCsStride[1], pMbCache->pMemPredChroma + 64, 8); +} + +bool MdInterSCDPskipProcess (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache, + ESkipModes eSkipMode) { + SVAAFrameInfoExt_t* pVaaExt = static_cast (pEncCtx->pVaa); + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + + const int32_t kiRefMbQp = pCurDqLayer->pRefPic->pRefMbQp[pCurMb->iMbXY]; + const int32_t kiCurMbQp = pCurMb->uiLumaQp;// unsigned -> signed + + pJudgeSkipFun pJudeSkip[2] = {JudgeStaticSkip, JudgeScrollSkip}; + bool bSkipFlag = pJudeSkip[eSkipMode] (pEncCtx, pCurMb, pMbCache, pWelsMd); + + if (bSkipFlag) { + bool bQpSimilarFlag = (kiRefMbQp - kiCurMbQp <= DELTA_QP_SCD_THD || kiRefMbQp <= 26); + SMVUnitXY sVaaPredSkipMv = {0, 0}, sCurMbMv[2] = {{0, 0}, {0, 0}}; + PredSkipMv (pMbCache, &sVaaPredSkipMv); + + if (eSkipMode == SCROLLED) { + sCurMbMv[1].iMvX = static_cast (pVaaExt->sScrollDetectInfo.iScrollMvX << 2); + sCurMbMv[1].iMvY = static_cast (pVaaExt->sScrollDetectInfo.iScrollMvY << 2); + } + + bool bMbSkipFlag = (LD32 (&sVaaPredSkipMv) == LD32 (&sCurMbMv[eSkipMode])) ; + SvcMdSCDMbEnc (pEncCtx, pWelsMd, pCurMb, pMbCache, pSlice, bQpSimilarFlag, bMbSkipFlag, sCurMbMv, eSkipMode); + + return true; + } + + return false; +} + +void SetBlockStaticIdcToMd (void* pVaa, SWelsMD* pWelsMd, SMB* pCurMb, SDqLayer* pDqLayer) { + SVAAFrameInfoExt_t* pVaaExt = static_cast (pVaa); + + const int32_t kiMbX = pCurMb->iMbX; + const int32_t kiMbY = pCurMb->iMbY; + const int32_t kiMbWidth = pDqLayer->iMbWidth; + const int32_t kiWidth = kiMbWidth << 1; + + const int32_t kiBlockIndexUp = (kiMbY << 1) * kiWidth + (kiMbX << 1); + const int32_t kiBlockIndexLow = ((kiMbY << 1) + 1) * kiWidth + (kiMbX << 1); + + //fill_blockstaticidc with pVaaExt->pVaaBestBlockStaticIdc + pWelsMd->iBlock8x8StaticIdc[0] = pVaaExt->pVaaBestBlockStaticIdc[kiBlockIndexUp]; + pWelsMd->iBlock8x8StaticIdc[1] = pVaaExt->pVaaBestBlockStaticIdc[kiBlockIndexUp + 1]; + pWelsMd->iBlock8x8StaticIdc[2] = pVaaExt->pVaaBestBlockStaticIdc[kiBlockIndexLow]; + pWelsMd->iBlock8x8StaticIdc[3] = pVaaExt->pVaaBestBlockStaticIdc[kiBlockIndexLow + 1]; + +} + +/////////////////////// +// Scene Change Detection (SCD) PSkip Decision for screen content +//////////////////////// +bool WelsMdInterJudgeSCDPskip (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, SMbCache* pMbCache) { + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + + SetBlockStaticIdcToMd (pEncCtx->pVaa, pWelsMd, pCurMb, pCurDqLayer); + + //try static Pskip; + if (MdInterSCDPskipProcess (pEncCtx, pWelsMd, slice, pCurMb, pMbCache, STATIC)) { + return true; + } + + //try scrolled Pskip + if (MdInterSCDPskipProcess (pEncCtx, pWelsMd, slice, pCurMb, pMbCache, SCROLLED)) { + return true; + } + + return false; +} +bool WelsMdInterJudgeSCDPskipFalse (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, + SMbCache* pMbCache) { + return false; +} + + +void WelsInitSCDPskipFunc (SWelsFuncPtrList* pFuncList, const bool bScrollingDetection) { + if (bScrollingDetection) { + pFuncList->pfSCDPSkipDecision = WelsMdInterJudgeSCDPskip; + } else { + pFuncList->pfSCDPSkipDecision = WelsMdInterJudgeSCDPskipFalse; + } +} + +/////////////////////// +// SubP16x16 Mode Decision for screen content +//////////////////////// +// +//func pointer of inter MD for sub16x16 INTER MD for screen content coding +// +static inline void MergeSub16Me (const SWelsME& sSrcMe0, const SWelsME& sSrcMe1, SWelsME* pTarMe) { + memcpy (pTarMe, &sSrcMe0, sizeof (sSrcMe0)); // confirmed_safe_unsafe_usage + + pTarMe->uiSadCost = sSrcMe0.uiSadCost + sSrcMe1.uiSadCost;//not precise cost since MVD cost is not the same + pTarMe->uiSatdCost = sSrcMe0.uiSatdCost + sSrcMe1.uiSatdCost;//not precise cost since MVD cost is not the same +} +static inline bool IsSameMv (const SMVUnitXY& sMv0, const SMVUnitXY& sMv1) { + return ((sMv0.iMvX == sMv1.iMvX) && (sMv0.iMvY == sMv1.iMvY)); +} +bool TryModeMerge (SMbCache* pMbCache, SWelsMD* pWelsMd, SMB* pCurMb) { + SWelsME* pMe8x8 = & (pWelsMd->sMe.sMe8x8[0]); + const bool bSameMv16x8_0 = IsSameMv (pMe8x8[0].sMv, pMe8x8[1].sMv); + const bool bSameMv16x8_1 = IsSameMv (pMe8x8[2].sMv, pMe8x8[3].sMv); + + const bool bSameMv8x16_0 = IsSameMv (pMe8x8[0].sMv, pMe8x8[2].sMv); + const bool bSameMv8x16_1 = IsSameMv (pMe8x8[1].sMv, pMe8x8[3].sMv); + //need to consider iRefIdx when multi ref is available + const bool bSameRefIdx16x8_0 = true; //pMe8x8[0].iRefIdx == pMe8x8[1].iRefIdx; + const bool bSameRefIdx16x8_1 = true; //pMe8x8[2].iRefIdx == pMe8x8[3].iRefIdx; + const bool bSameRefIdx8x16_0 = true; //pMe8x8[0].iRefIdx == pMe8x8[2].iRefIdx; + const bool bSameRefIdx8x16_1 = true; //pMe8x8[1].iRefIdx == pMe8x8[3].iRefIdx; + const int32_t iSameMv = ((bSameMv16x8_0 && bSameRefIdx16x8_0 && bSameMv16x8_1 && bSameRefIdx16x8_1) << 1) | + (bSameMv8x16_0 && bSameRefIdx8x16_0 && bSameMv8x16_1 && bSameRefIdx8x16_1); + + //TODO: did not consider the MVD cost here, may consider later + switch (iSameMv) { + case 3: + //MERGE_16x16 + //from test results of multiple sequences show that using the following 0x0F to merge 16x16 + //for some seq there is BR saving some loss + //on the whole the BR will increase little bit + //to save complexity we decided not to merge 16x16 at present (10/12/2012) + //TODO: agjusted order, consider re-test later + break; + case 2: + pCurMb->uiMbType = MB_TYPE_16x8; + MergeSub16Me (pMe8x8[0], pMe8x8[1], & (pWelsMd->sMe.sMe16x8[0])); + MergeSub16Me (pMe8x8[2], pMe8x8[3], & (pWelsMd->sMe.sMe16x8[1])); + PredInter16x8Mv (pMbCache, 0, 0, & (pWelsMd->sMe.sMe16x8[0].sMvp)); + PredInter16x8Mv (pMbCache, 8, 0, & (pWelsMd->sMe.sMe16x8[1].sMvp)); + break; + case 1: + pCurMb->uiMbType = MB_TYPE_8x16; + MergeSub16Me (pMe8x8[0], pMe8x8[2], & (pWelsMd->sMe.sMe8x16[0])); + MergeSub16Me (pMe8x8[1], pMe8x8[3], & (pWelsMd->sMe.sMe8x16[1])); + PredInter8x16Mv (pMbCache, 0, 0, & (pWelsMd->sMe.sMe8x16[0].sMvp)); + PredInter8x16Mv (pMbCache, 4, 0, & (pWelsMd->sMe.sMe8x16[1].sMvp)); + break; + default: + break; + } + return (MB_TYPE_8x8 != pCurMb->uiMbType); +} + + +void WelsMdInterFinePartitionVaaOnScreen (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, + int32_t iBestCost) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; + int32_t iCostP8x8; + uint8_t uiMbSign = pEncCtx->pFuncList->pfGetMbSignFromInterVaa (&pEncCtx->pVaa->sVaaCalcInfo.pSad8x8[pCurMb->iMbXY][0]); + + if (MBVAASIGN_FLAT == uiMbSign) { + return; + } + + iCostP8x8 = WelsMdP8x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice); + if (iCostP8x8 < iBestCost) { + iBestCost = iCostP8x8; + pCurMb->uiMbType = MB_TYPE_8x8; + memset (pCurMb->uiSubMbType, SUB_MB_TYPE_8x8, 4); +#if 0 //Disable for sub8x8 modes for now + iBestCost = 0; + //reset neighbor info for sub8x8 + pMbCache->sMvComponents.iRefIndexCache [9] = pMbCache->sMvComponents.iRefIndexCache [21] = REF_NOT_AVAIL; + for (int32_t i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) { + int32_t iCurCostSub8x8, iBestCostSub8x8 = pWelsMd->sMe.sMe8x8[i8x8Idx].uiSatdCost; + //4x4 + iCurCostSub8x8 = WelsMdP4x4 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx); + if (iCurCostSub8x8 < iBestCostSub8x8) { + pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_4x4; + iBestCostSub8x8 = iCurCostSub8x8; + } + //8x4 + iCurCostSub8x8 = WelsMdP8x4 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx); + if (iCurCostSub8x8 < iBestCostSub8x8) { + pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_8x4; + iBestCostSub8x8 = iCurCostSub8x8; + } + //4x8 + iCurCostSub8x8 = WelsMdP4x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx); + if (iCurCostSub8x8 < iBestCostSub8x8) { + pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_4x8; + iBestCostSub8x8 = iCurCostSub8x8; + } + iBestCost += iBestCostSub8x8; + } + if ((pCurMb->uiSubMbType[0] == SUB_MB_TYPE_8x8) && (pCurMb->uiSubMbType[1] == SUB_MB_TYPE_8x8) + && (pCurMb->uiSubMbType[2] == SUB_MB_TYPE_8x8) && (pCurMb->uiSubMbType[3] == SUB_MB_TYPE_8x8)) //all 8x8 +#endif + TryModeMerge (pMbCache, pWelsMd, pCurMb); + } + pWelsMd->iCostLuma = iBestCost; +} + + + + + +// +// SetScrollingMvToMd +// +void SetScrollingMvToMd (SVAAFrameInfo* pVaa, SWelsMD* pWelsMd) { + SVAAFrameInfoExt* pVaaExt = static_cast (pVaa); + + SMVUnitXY sTempMv; + sTempMv.iMvX = pVaaExt->sScrollDetectInfo.iScrollMvX; + sTempMv.iMvY = pVaaExt->sScrollDetectInfo.iScrollMvY; + + (pWelsMd->sMe.sMe16x16).sDirectionalMv = + (pWelsMd->sMe.sMe8x8[0]).sDirectionalMv = + (pWelsMd->sMe.sMe8x8[1]).sDirectionalMv = + (pWelsMd->sMe.sMe8x8[2]).sDirectionalMv = + (pWelsMd->sMe.sMe8x8[3]).sDirectionalMv = sTempMv; +} + +void SetScrollingMvToMdNull (SVAAFrameInfo* pVaa, SWelsMD* pWelsMd) { +} + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_motion_estimate.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_motion_estimate.cpp new file mode 100644 index 000000000..9e6b1b217 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_motion_estimate.cpp @@ -0,0 +1,1089 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc motion estimate.c + * + * \brief Interfaces introduced in svc mb motion estimation + * + * \date 08/11/2009 Created + * + ************************************************************************************* + */ + +#include "cpu_core.h" +#include "ls_defines.h" +#include "svc_motion_estimate.h" +#include "wels_transpose_matrix.h" + +namespace WelsEnc { + +const int32_t QStepx16ByQp[52] = { /* save QStep<<4 for int32_t */ + 10, 11, 13, 14, 16, 18, /* 0~5 */ + 20, 22, 26, 28, 32, 36, /* 6~11 */ + 40, 44, 52, 56, 64, 72, /* 12~17 */ + 80, 88, 104, 112, 128, 144, /* 18~23 */ + 160, 176, 208, 224, 256, 288, /* 24~29 */ + 320, 352, 416, 448, 512, 576, /* 30~35 */ + 640, 704, 832, 896, 1024, 1152, /* 36~41 */ + 1280, 1408, 1664, 1792, 2048, 2304, /* 42~47 */ + 2560, 2816, 3328, 3584 /* 48~51 */ +}; + +static inline void UpdateMeResults (const SMVUnitXY ksBestMv, const uint32_t kiBestSadCost, uint8_t* pRef, + SWelsME* pMe) { + pMe->sMv = ksBestMv; + pMe->pRefMb = pRef; + pMe->uiSadCost = kiBestSadCost; +} +static inline void MeEndIntepelSearch (SWelsME* pMe) { + /* -> qpel mv */ + pMe->sMv.iMvX *= (1 << 2); + pMe->sMv.iMvY *= (1 << 2); + pMe->uiSatdCost = pMe->uiSadCost; +} + +void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent) { + pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull; + + if (!bScreenContent) { + pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse; + pFuncList->pfCalculateBlockFeatureOfFrame[0] = + pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL; + pFuncList->pfCalculateSingleBlockFeature[0] = + pFuncList->pfCalculateSingleBlockFeature[1] = NULL; + + } else { + pFuncList->pfCheckDirectionalMv = CheckDirectionalMv; + + //for cross serarch + pFuncList->pfVerticalFullSearch = LineFullSearch_c; + pFuncList->pfHorizontalFullSearch = LineFullSearch_c; + +#if defined (X86_ASM) + if (uiCpuFlag & WELS_CPU_SSE41) { + pFuncList->pfSampleSadHor8[0] = SampleSad8x8Hor8_sse41; + pFuncList->pfSampleSadHor8[1] = SampleSad16x16Hor8_sse41; + pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41; + pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41; + } +#endif + + //for feature search + pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_c; + pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_c; + pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c; + pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c; + //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8? + pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c; + pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c; +#if defined (X86_ASM) + if (uiCpuFlag & WELS_CPU_SSE2) { + //for feature search + pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_sse2; + pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_sse2; + pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse2; + pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse2; + //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8? + pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_sse2; + pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_sse2; + } + if (uiCpuFlag & WELS_CPU_SSE41) { + //for feature search + pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse4; + pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse4; + } +#endif + +#if defined (HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + //for feature search + pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_neon; + pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_neon; + pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_neon; + pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_neon; + //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8? + pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_neon; + pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_neon; + } +#endif + +#if defined (HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + //for feature search + pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_AArch64_neon; + pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_AArch64_neon; + pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_AArch64_neon; + pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_AArch64_neon; + //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8? + pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_AArch64_neon; + pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_AArch64_neon; + } +#endif + } +} + +/*! + * \brief BL mb motion estimate search + * + * \param enc Wels encoder context + * \param pMe Wels me information + * + * \return NONE + */ + +void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe, SSlice* pSlice) { + const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0]; + const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0]; + + // Step 1: Initial point prediction + if (!WelsMotionEstimateInitialPoint (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef)) { + pFuncList->pfSearchMethod[pMe->uiBlockSize] (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef); + MeEndIntepelSearch (pMe); + } + + pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc, + kiStrideRef); +} + +void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe, + SSlice* pLpslice) { + const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0]; + const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0]; + + pMe->sMv.iMvX = pMe->sMv.iMvY = 0; + pMe->uiSadCost = + pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef) ; + pMe->uiSadCost += COST_MVD (pMe->pMvdCost, - pMe->sMvp.iMvX, - pMe->sMvp.iMvY); + MeEndIntepelSearch (pMe); + pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc, + kiStrideRef); +} + +void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe, + SSlice* pSlice) { + const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0]; + const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0]; + + pMe->sMv = pMe->sDirectionalMv; + pMe->pRefMb = pMe->pColoRefMb + pMe->sMv.iMvY * kiStrideRef + pMe->sMv.iMvX; + pMe->uiSadCost = + pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef) + + COST_MVD (pMe->pMvdCost, (pMe->sMv.iMvX * (1 << 2)) - pMe->sMvp.iMvX, (pMe->sMv.iMvY * (1 << 2)) - pMe->sMvp.iMvY); + MeEndIntepelSearch (pMe); + pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc, + kiStrideRef); +} +/*! + * \brief EL mb motion estimate initial point testing + * + * \param pix_pFuncList SSampleDealingFunc + * \param pMe Wels me information + * \param mv_range search range in motion estimate + * \param point the best match point in motion estimation + * + * \return NONE + */ +bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, int32_t iStrideEnc, + int32_t iStrideRef) { + PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize]; + const uint16_t* kpMvdCost = pMe->pMvdCost; + uint8_t* const kpEncMb = pMe->pEncMb; + int16_t iMvc0, iMvc1; + int32_t iSadCost; + int32_t iBestSadCost; + uint8_t* pRefMb; + uint8_t* pFref2; + uint32_t i; + const uint32_t kuiMvcNum = pSlice->uiMvcNum; + const SMVUnitXY* kpMvcList = &pSlice->sMvc[0]; + const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin; + const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax; + const SMVUnitXY ksMvp = pMe->sMvp; + SMVUnitXY sMv; + + // Step 1: Initial point prediction + // init with sMvp + sMv.iMvX = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX); + sMv.iMvY = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY); + + pRefMb = &pMe->pRefMb[sMv.iMvY * iStrideRef + sMv.iMvX]; + + iBestSadCost = pSad (kpEncMb, iStrideEnc, pRefMb, iStrideRef); + iBestSadCost += COST_MVD (kpMvdCost, ((sMv.iMvX) * (1 << 2)) - ksMvp.iMvX, ((sMv.iMvY) * (1 << 2)) - ksMvp.iMvY); + + for (i = 0; i < kuiMvcNum; i++) { + //clipping here is essential since some pOut-of-range MVC may happen here (i.e., refer to baseMV) + iMvc0 = WELS_CLIP3 ((2 + kpMvcList[i].iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX); + iMvc1 = WELS_CLIP3 ((2 + kpMvcList[i].iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY); + + if (((iMvc0 - sMv.iMvX) || (iMvc1 - sMv.iMvY))) { + pFref2 = &pMe->pRefMb[iMvc1 * iStrideRef + iMvc0]; + + iSadCost = pSad (kpEncMb, iStrideEnc, pFref2, iStrideRef) + + COST_MVD (kpMvdCost, (iMvc0 * (1 << 2)) - ksMvp.iMvX, (iMvc1 * (1 << 2)) - ksMvp.iMvY); + + if (iSadCost < iBestSadCost) { + sMv.iMvX = iMvc0; + sMv.iMvY = iMvc1; + pRefMb = pFref2; + iBestSadCost = iSadCost; + } + } + } + + if (pFuncList->pfCheckDirectionalMv + (pSad, pMe, ksMvStartMin, ksMvStartMax, iStrideEnc, iStrideRef, iSadCost)) { + sMv = pMe->sDirectionalMv; + pRefMb = &pMe->pColoRefMb[sMv.iMvY * iStrideRef + sMv.iMvX]; + iBestSadCost = iSadCost; + } + + UpdateMeResults (sMv, iBestSadCost, pRefMb, pMe); + if (iBestSadCost < static_cast (pMe->uSadPredISatd.uiSadPred)) { + //Initial point early Stop + MeEndIntepelSearch (pMe); + return true; + } + return false; +} + +void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, + const int32_t kiEncStride, const int32_t kiRefStride) { + pMe->uSadPredISatd.uiSatd = pSatd (pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride); + pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX, + pMe->sMv.iMvY - pMe->sMvp.iMvY); +} +void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, + const int32_t kiEncStride, const int32_t kiRefStride) { +} + + +///////////////////////// +// Diamond Search Basics +///////////////////////// +bool WelsMeSadCostSelect (int32_t* iSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx, + const int32_t kiDy, int32_t* pIx, int32_t* pIy) { + int32_t iTempSadCost[4]; + int32_t iInputSadCost = *pBestCost; + iTempSadCost[0] = iSadCost[0] + COST_MVD (kpMvdCost, kiDx, kiDy - 4); + iTempSadCost[1] = iSadCost[1] + COST_MVD (kpMvdCost, kiDx, kiDy + 4); + iTempSadCost[2] = iSadCost[2] + COST_MVD (kpMvdCost, kiDx - 4, kiDy); + iTempSadCost[3] = iSadCost[3] + COST_MVD (kpMvdCost, kiDx + 4, kiDy); + + if (iTempSadCost[0] < *pBestCost) { + *pBestCost = iTempSadCost[0]; + *pIx = 0; + *pIy = 1; + } + + if (iTempSadCost[1] < *pBestCost) { + *pBestCost = iTempSadCost[1]; + *pIx = 0; + *pIy = -1; + } + + if (iTempSadCost[2] < *pBestCost) { + *pBestCost = iTempSadCost[2]; + *pIx = 1; + *pIy = 0; + } + + if (iTempSadCost[3] < *pBestCost) { + *pBestCost = iTempSadCost[3]; + *pIx = -1; + *pIy = 0; + } + return (*pBestCost == iInputSadCost); +} + +void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, + const int32_t kiStrideEnc, const int32_t kiStrideRef) { + PSample4SadCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSample4Sad[pMe->uiBlockSize]; + + uint8_t* pFref = pMe->pRefMb; + uint8_t* const kpEncMb = pMe->pEncMb; + const uint16_t* kpMvdCost = pMe->pMvdCost; + + const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin; + const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax; + + int32_t iMvDx = ((pMe->sMv.iMvX) * (1 << 2)) - pMe->sMvp.iMvX; + int32_t iMvDy = ((pMe->sMv.iMvY) * (1 << 2)) - pMe->sMvp.iMvY; + + uint8_t* pRefMb = pFref; + int32_t iBestCost = (pMe->uiSadCost); + + int32_t iTimeThreshold = ITERATIVE_TIMES; + ENFORCE_STACK_ALIGN_1D (int32_t, iSadCosts, 4, 16) + + while (iTimeThreshold--) { + pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2; + pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2; + if (!CheckMvInRange (pMe->sMv, ksMvStartMin, ksMvStartMax)) + continue; + pSad (kpEncMb, kiStrideEnc, pRefMb, kiStrideRef, &iSadCosts[0]); + + int32_t iX, iY; + + const bool kbIsBestCostWorse = WelsMeSadCostSelect (iSadCosts, kpMvdCost, &iBestCost, iMvDx, iMvDy, &iX, &iY); + if (kbIsBestCostWorse) + break; + + iMvDx -= (iX * (1 << 2)) ; + iMvDy -= (iY * (1 << 2)) ; + + pRefMb -= (iX + iY * kiStrideRef); + + } + + /* integer-pel mv */ + pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2; + pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2; + pMe->uiSatdCost = pMe->uiSadCost = (iBestCost); + pMe->pRefMb = pRefMb; +} + +///////////////////////// +// DirectionalMv Basics +///////////////////////// +bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe, + const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, + int32_t& iBestSadCost) { + const int16_t kiMvX = pMe->sDirectionalMv.iMvX; + const int16_t kiMvY = pMe->sDirectionalMv.iMvY; + + //Check MV from scrolling detection + if ((BLOCK_16x16 != pMe->uiBlockSize) //scrolled_MV with P16x16 is checked SKIP checking function + && (kiMvX | kiMvY) //(0,0) checked in ordinary initial point checking + && CheckMvInRange (pMe->sDirectionalMv, ksMinMv, ksMaxMv)) { + uint8_t* pRef = &pMe->pColoRefMb[kiMvY * kiRefStride + kiMvX]; + uint32_t uiCurrentSadCost = pSad (pMe->pEncMb, kiEncStride, pRef, kiRefStride) + + COST_MVD (pMe->pMvdCost, (kiMvX * (1 << 2)) - pMe->sMvp.iMvX, (kiMvY * (1 << 2)) - pMe->sMvp.iMvY); + if (uiCurrentSadCost < pMe->uiSadCost) { + iBestSadCost = uiCurrentSadCost; + return true; + } + } + return false; +} + +bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* vpMe, + const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, + int32_t& iBestSadCost) { + return false; +} + +///////////////////////// +// Cross Search Basics +///////////////////////// +#if defined (X86_ASM) +void CalcMvdCostx8_c (uint16_t* pMvdCost, const int32_t kiStartMv, uint16_t* pMvdTable, const uint16_t kiFixedCost) { + uint16_t* pBaseCost = pMvdCost; + const int32_t kiOffset = (kiStartMv * (1 << 2)); + uint16_t* pMvd = pMvdTable + kiOffset; + for (int32_t i = 0; i < 8; ++ i) { + pBaseCost[i] = ((*pMvd) + kiFixedCost); + pMvd += 4; + } +} +void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe, + uint16_t* pMvdTable, + const int32_t kiEncStride, const int32_t kiRefStride, + const int16_t kiMinMv, const int16_t kiMaxMv, + const bool bVerticalSearch) { + uint8_t* kpEncMb = pMe->pEncMb; + const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY; + uint8_t* pRef = &pMe->pColoRefMb[kiMinMv * kiRefStride]; + + const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY; + + int32_t iMinPos = kiCurMeBlockPixY + kiMinMv; + int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv; + int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX); + uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvY]); + int16_t iStartMv = 0; + + + const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16; + const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8; + PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16]; + PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize]; + PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 : + TransposeMatrixBlock8x8_mmx; + PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 : + TransposeMatrixBlocksx8_mmx; + + const int32_t kiDiff = iMaxPos - iMinPos; + const int32_t kiRowNum = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks); + const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3); + int32_t iCountLoop8 = (kiRowNum - kiEdgeBlocks) >> 3; + const int32_t kiRemainingVectors = kiDiff - (iCountLoop8 << 3); + const int32_t kiMatrixStride = MAX_VERTICAL_MV_RANGE; + ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixRef, 16, kiMatrixStride, 16); // transpose matrix result for ref + ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixEnc, 16, 16, 16); // transpose matrix result for enc + assert (kiRowNum <= kiMatrixStride); // make sure effective memory + + TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride); + TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum); + ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16); + int32_t iTargetPos = iMinPos; + int16_t iBestPos = pMe->sMv.iMvX; + uint32_t uiBestCost = pMe->uiSadCost; + uint32_t uiCostMin; + int32_t iIndexMinPos; + kpEncMb = &uiMatrixEnc[0][0]; + pRef = &uiMatrixRef[0][0]; + + while (iCountLoop8 > 0) { + CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd); + uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos); + if (uiCostMin < uiBestCost) { + uiBestCost = uiCostMin; + iBestPos = iTargetPos + iIndexMinPos; + } + iTargetPos += 8; + pRef += 8; + iStartMv += 8; + -- iCountLoop8; + } + if (kiRemainingVectors > 0) { + kpEncMb = pMe->pEncMb; + pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride]; + while (iTargetPos < iMaxPos) { + const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)]; + uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost); + if (uiSadCost < uiBestCost) { + uiBestCost = uiSadCost; + iBestPos = iTargetPos; + } + iStartMv++; + pRef += kiRefStride; + ++iTargetPos; + } + } + if (uiBestCost < pMe->uiSadCost) { + SMVUnitXY sBestMv; + sBestMv.iMvX = 0; + sBestMv.iMvY = iBestPos - kiCurMeBlockPix; + UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride], pMe); + } +} + +void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe, + uint16_t* pMvdTable, + const int32_t kiEncStride, const int32_t kiRefStride, + const int16_t kiMinMv, const int16_t kiMaxMv, + const bool bVerticalSearch) { + uint8_t* kpEncMb = pMe->pEncMb; + + const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX; + int32_t iMinPos = iCurMeBlockPixX + kiMinMv; + int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv; + int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY); + uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvX]); + int16_t iStartMv = 0; + uint8_t* pRef = &pMe->pColoRefMb[kiMinMv]; + const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16; + PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16]; + PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize]; + ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16); + const int32_t kiNumVector = iMaxPos - iMinPos; + int32_t iCountLoop8 = kiNumVector >> 3; + const int32_t kiRemainingLoop8 = kiNumVector & 7; + int32_t iTargetPos = iMinPos; + int16_t iBestPos = pMe->sMv.iMvX; + uint32_t uiBestCost = pMe->uiSadCost; + uint32_t uiCostMin; + int32_t iIndexMinPos; + + while (iCountLoop8 > 0) { + CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd); + uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos); + if (uiCostMin < uiBestCost) { + uiBestCost = uiCostMin; + iBestPos = iTargetPos + iIndexMinPos; + } + iTargetPos += 8; + pRef += 8; + iStartMv += 8; + -- iCountLoop8; + } + if (kiRemainingLoop8 > 0) { + while (iTargetPos < iMaxPos) { + const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)]; + uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost); + if (uiSadCost < uiBestCost) { + uiBestCost = uiSadCost; + iBestPos = iTargetPos; + } + iStartMv++; + ++pRef; + ++iTargetPos; + } + } + if (uiBestCost < pMe->uiSadCost) { + SMVUnitXY sBestMv; + sBestMv.iMvX = iBestPos - iCurMeBlockPixX; + sBestMv.iMvY = 0; + UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe); + } +} +#endif +void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe, + uint16_t* pMvdTable, + const int32_t kiEncStride, const int32_t kiRefStride, + const int16_t iMinMv, const int16_t iMaxMv, + const bool bVerticalSearch) { + PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize]; + const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX; + const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY; + int32_t iMinPos, iMaxPos; + int32_t iFixedMvd; + int32_t iCurMeBlockPix; + int32_t iStride; + uint16_t* pMvdCost; + + if (bVerticalSearch) { + iMinPos = kiCurMeBlockPixY + iMinMv; + iMaxPos = kiCurMeBlockPixY + iMaxMv; + iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX); + iCurMeBlockPix = pMe->iCurMeBlockPixY; + iStride = kiRefStride; + pMvdCost = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvY]); + } else { + iMinPos = kiCurMeBlockPixX + iMinMv; + iMaxPos = kiCurMeBlockPixX + iMaxMv; + iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY); + iCurMeBlockPix = pMe->iCurMeBlockPixX; + iStride = 1; + pMvdCost = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvX]); + } + uint8_t* pRef = &pMe->pColoRefMb[ iMinMv * iStride]; + uint32_t uiBestCost = 0xFFFFFFFF; + int32_t iBestPos = 0; + + for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) { + uint8_t* const kpEncMb = pMe->pEncMb; + uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost); + if (uiSadCost < uiBestCost) { + uiBestCost = uiSadCost; + iBestPos = iTargetPos; + } + pRef += iStride; + pMvdCost += 4; + } + + if (uiBestCost < pMe->uiSadCost) { + SMVUnitXY sBestMv; + sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix); + sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0; + UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe); + } +} + +void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, + const int32_t kiEncStride, const int32_t kiRefStride) { + PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch; + PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch; + + //vertical search + pfVerticalFullSearchFunc (pFuncList, pMe, + pMe->pMvdCost, + kiEncStride, kiRefStride, + pSlice->sMvStartMin.iMvY, + pSlice->sMvStartMax.iMvY, true); + + //horizontal search + if (pMe->uiSadCost >= pMe->uiSadCostThreshold) { + pfHorizontalFullSearchFunc (pFuncList, pMe, + pMe->pMvdCost, + kiEncStride, kiRefStride, + pSlice->sMvStartMin.iMvX, + pSlice->sMvStartMax.iMvX, + false); + } +} + + +///////////////////////// +// Feature Search Basics +///////////////////////// +//memory related +int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, + const int32_t iNeedFeatureStorage, + SFeatureSearchPreparation* pFeatureSearchPreparation) { + const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16; + const bool bFme8x8 = ((iNeedFeatureStorage & 0x0000FF & ME_FME) == ME_FME); + const int32_t kiMarginSize = bFme8x8 ? 8 : 16; + const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize); + int32_t iListOfFeatureOfBlock; + + if (0 == kiFeatureStrategyIndex) { + iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize; + } else { + iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize + + (kiFrameWidth - kiMarginSize) * sizeof (uint32_t) + kiFrameWidth * 8 * sizeof (uint8_t); + } + pFeatureSearchPreparation->pFeatureOfBlock = + (uint16_t*)pMa->WelsMallocz (iListOfFeatureOfBlock, "pFeatureOfBlock"); + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == (pFeatureSearchPreparation->pFeatureOfBlock)) + + pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex; + pFeatureSearchPreparation->bFMESwitchFlag = true; + pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM; + pFeatureSearchPreparation->iHighFreMbCount = 0; + + return ENC_RETURN_SUCCESS; +} +int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock) { + if (pMa && pFeatureOfBlock) { + pMa->WelsFree (pFeatureOfBlock, "pFeatureOfBlock"); + pFeatureOfBlock = NULL; + return ENC_RETURN_SUCCESS; + } + return ENC_RETURN_UNEXPECTED; +} + +int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, + const int32_t iNeedFeatureStorage, + SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) { + + const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16; + const int32_t kiMe8x8FME = iNeedFeatureStorage & 0x0000FF & ME_FME; + const int32_t kiMe16x16FME = ((iNeedFeatureStorage & 0x00FF00) >> 8) & ME_FME; + if ((kiMe8x8FME == ME_FME) && (kiMe16x16FME == ME_FME)) { + return ENC_RETURN_UNSUPPORTED_PARA; + //the following memory allocation cannot support when FME at both size + } + + const bool bIsBlock8x8 = (kiMe8x8FME == ME_FME); + const int32_t kiMarginSize = bIsBlock8x8 ? 8 : 16; + const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize); + const int32_t kiListSize = (0 == kiFeatureStrategyIndex) ? (bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16) : + 256; + + pScreenBlockFeatureStorage->pTimesOfFeatureValue = (uint32_t*)pMa->WelsMallocz (kiListSize * sizeof (uint32_t), + "pScreenBlockFeatureStorage->pTimesOfFeatureValue"); + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pTimesOfFeatureValue) + + pScreenBlockFeatureStorage->pLocationOfFeature = (uint16_t**)pMa->WelsMallocz (kiListSize * sizeof (uint16_t*), + "pScreenBlockFeatureStorage->pLocationOfFeature"); + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationOfFeature) + + pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMallocz (2 * kiFrameSize * sizeof (uint16_t), + "pScreenBlockFeatureStorage->pLocationPointer"); + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer) + // uint16_t* pFeatureValuePointerList[WELS_MAX (LIST_SIZE_SUM_16x16, LIST_SIZE_MSE_16x16)] = {0}; + pScreenBlockFeatureStorage->pFeatureValuePointerList = (uint16_t**)pMa->WelsMallocz (WELS_MAX (LIST_SIZE_SUM_16x16, + LIST_SIZE_MSE_16x16) * sizeof (uint16_t*), + "pScreenBlockFeatureStorage->pFeatureValuePointerList"); + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pFeatureValuePointerList) + + pScreenBlockFeatureStorage->pFeatureOfBlockPointer = NULL; + pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8; + pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex; + pScreenBlockFeatureStorage->iActualListSize = kiListSize; + WelsSetMemMultiplebytes_c (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL, sizeof (uint32_t)); + pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false; + + return ENC_RETURN_SUCCESS; +} +int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) { + if (pMa && pScreenBlockFeatureStorage) { + if (pScreenBlockFeatureStorage->pTimesOfFeatureValue) { + pMa->WelsFree (pScreenBlockFeatureStorage->pTimesOfFeatureValue, "pScreenBlockFeatureStorage->pTimesOfFeatureValue"); + pScreenBlockFeatureStorage->pTimesOfFeatureValue = NULL; + } + + if (pScreenBlockFeatureStorage->pLocationOfFeature) { + pMa->WelsFree (pScreenBlockFeatureStorage->pLocationOfFeature, "pScreenBlockFeatureStorage->pLocationOfFeature"); + pScreenBlockFeatureStorage->pLocationOfFeature = NULL; + } + + if (pScreenBlockFeatureStorage->pLocationPointer) { + pMa->WelsFree (pScreenBlockFeatureStorage->pLocationPointer, "pScreenBlockFeatureStorage->pLocationPointer"); + pScreenBlockFeatureStorage->pLocationPointer = NULL; + } + + if (pScreenBlockFeatureStorage->pFeatureValuePointerList) { + pMa->WelsFree (pScreenBlockFeatureStorage->pFeatureValuePointerList, + "pScreenBlockFeatureStorage->pFeatureValuePointerList"); + pScreenBlockFeatureStorage->pFeatureValuePointerList = NULL; + } + + return ENC_RETURN_SUCCESS; + } + return ENC_RETURN_UNEXPECTED; +} + +//preprocess related +int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) { + int32_t iSum = 0, i; + for (i = 0; i < 8; i++) { + iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3]; + iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7]; + pRef += kiRefStride; + } + return iSum; +} +int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) { + int32_t iSum = 0, i; + for (i = 0; i < 16; i++) { + iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3]; + iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7]; + iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11]; + iSum += pRef[12] + pRef[13] + pRef[14] + pRef[15]; + pRef += kiRefStride; + } + return iSum; +} + +void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) { + int32_t x, y; + uint8_t* pRef; + uint16_t* pBuffer; + int32_t iSum; + for (y = 0; y < kiHeight; y++) { + pRef = pRefPicture + kiRefStride * y; + pBuffer = pFeatureOfBlock + kiWidth * y; + for (x = 0; x < kiWidth; x++) { + iSum = SumOf8x8SingleBlock_c (pRef + x, kiRefStride); + + pBuffer[x] = iSum; + pTimesOfFeatureValue[iSum]++; + } + } +} + +void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, + const int32_t kiRefStride, + uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) { + //TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able? + int32_t x, y; + uint8_t* pRef; + uint16_t* pBuffer; + int32_t iSum; + for (y = 0; y < kiHeight; y++) { + pRef = pRefPicture + kiRefStride * y; + pBuffer = pFeatureOfBlock + kiWidth * y; + for (x = 0; x < kiWidth; x++) { + iSum = SumOf16x16SingleBlock_c (pRef + x, kiRefStride); + + pBuffer[x] = iSum; + pTimesOfFeatureValue[iSum]++; + } + } +} + +void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, + uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList) { + //assign location pointer + uint16_t* pBufPos = pBuf; + for (int32_t i = 0 ; i < kiListSize; ++i) { + pLocationOfFeature[i] = + pFeatureValuePointerList[i] = pBufPos; + pBufPos += (pTimesOfFeatureValue[i] << 1); + } +} +void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, + uint16_t** pFeatureValuePointerList) { + //assign each pixel's position + uint16_t* pSrcPointer = pFeatureOfBlock; + int32_t iQpelY = 0; + for (int32_t y = 0; y < kiHeight; y++) { + for (int32_t x = 0; x < kiWidth; x++) { + uint16_t uiFeature = pSrcPointer[x]; + pFeatureValuePointerList[uiFeature][0] = x << 2; + pFeatureValuePointerList[uiFeature][1] = iQpelY; + pFeatureValuePointerList[uiFeature] += 2; + } + iQpelY += 4; + pSrcPointer += kiWidth; + } +} + +bool CalculateFeatureOfBlock (SWelsFuncPtrList* pFunc, SPicture* pRef, + SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) { + uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer; + uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue; + uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature; + uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer; + + if (NULL == pFeatureOfBlock || NULL == pTimesOfFeatureValue || NULL == pLocationOfFeature || NULL == pBuf + || NULL == pRef->pData[0]) { + return false; + } + + uint8_t* pRefData = pRef->pData[0]; + const int32_t iRefStride = pRef->iLineSize[0]; + int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16; + const int32_t iEdgeDiscard = (iIs16x16 ? 16 : 8); //this is to save complexity of padding on pRef + const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard; + const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard; + const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize; + + memset (pTimesOfFeatureValue, 0, sizeof (int32_t)*kiActualListSize); + (pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16]) (pRefData, iWidth, kiHeight, iRefStride, pFeatureOfBlock, + pTimesOfFeatureValue); + + //assign pLocationOfFeature pointer + pFunc->pfInitializeHashforFeature (pTimesOfFeatureValue, pBuf, kiActualListSize, + pLocationOfFeature, pScreenBlockFeatureStorage->pFeatureValuePointerList); + + //assign each pixel's pLocationOfFeature + pFunc->pfFillQpelLocationByFeatureValue (pFeatureOfBlock, iWidth, kiHeight, + pScreenBlockFeatureStorage->pFeatureValuePointerList); + return true; +} + +void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock, + SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) { + pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureOfBlock; + pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = CalculateFeatureOfBlock (pFunc, pRef, + pScreenBlockFeatureStorage); + + if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) { + uint32_t uiRefPictureAvgQstepx16 = QStepx16ByQp[WelsMedian (0, pRef->iFrameAverageQp, 51)]; + uint32_t uiSadCostThreshold16x16 = ((30 * (uiRefPictureAvgQstepx16 + 160)) >> 3); + pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x16] = uiSadCostThreshold16x16; + pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x8] = (uiSadCostThreshold16x16 >> 2); + pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x8] + = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x16] + = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_4x4] = UINT_MAX; + } +} + +//search related +bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc, const SWelsME& sMe, + const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage, + const int32_t kiEncStride, const int32_t kiRefStride, + SFeatureSearchIn* pFeatureSearchIn) { + pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize]; + pFeatureSearchIn->iFeatureOfCurrent = pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16 == sMe.uiBlockSize] (sMe.pEncMb, + kiEncStride); + + pFeatureSearchIn->pEnc = sMe.pEncMb; + pFeatureSearchIn->pColoRef = sMe.pColoRefMb; + pFeatureSearchIn->iEncStride = kiEncStride; + pFeatureSearchIn->iRefStride = kiRefStride; + pFeatureSearchIn->uiSadCostThresh = sMe.uiSadCostThreshold; + + pFeatureSearchIn->iCurPixX = sMe.iCurMeBlockPixX; + pFeatureSearchIn->iCurPixXQpel = (pFeatureSearchIn->iCurPixX << 2); + pFeatureSearchIn->iCurPixY = sMe.iCurMeBlockPixY; + pFeatureSearchIn->iCurPixYQpel = (pFeatureSearchIn->iCurPixY << 2); + + pFeatureSearchIn->pTimesOfFeature = pRefFeatureStorage->pTimesOfFeatureValue; + pFeatureSearchIn->pQpelLocationOfFeature = pRefFeatureStorage->pLocationOfFeature; + pFeatureSearchIn->pMvdCostX = sMe.pMvdCost - pFeatureSearchIn->iCurPixXQpel - sMe.sMvp.iMvX; + pFeatureSearchIn->pMvdCostY = sMe.pMvdCost - pFeatureSearchIn->iCurPixYQpel - sMe.sMvp.iMvY; + + pFeatureSearchIn->iMinQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMin.iMvX) * (1 << 2)); + pFeatureSearchIn->iMinQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMin.iMvY) * (1 << 2)); + pFeatureSearchIn->iMaxQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMax.iMvX) * (1 << 2)); + pFeatureSearchIn->iMaxQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMax.iMvY) * (1 << 2)); + + if (NULL == pFeatureSearchIn->pSad || NULL == pFeatureSearchIn->pTimesOfFeature + || NULL == pFeatureSearchIn->pQpelLocationOfFeature) { + return false; + } + return true; +} +void SaveFeatureSearchOut (const SMVUnitXY sBestMv, const uint32_t uiBestSadCost, uint8_t* pRef, + SFeatureSearchOut* pFeatureSearchOut) { + pFeatureSearchOut->sBestMv = sBestMv; + pFeatureSearchOut->uiBestSadCost = uiBestSadCost; + pFeatureSearchOut->pBestRef = pRef; +} + +bool FeatureSearchOne (SFeatureSearchIn& sFeatureSearchIn, const int32_t iFeatureDifference, + const uint32_t kuiExpectedSearchTimes, + SFeatureSearchOut* pFeatureSearchOut) { + const int32_t iFeatureOfRef = (sFeatureSearchIn.iFeatureOfCurrent + iFeatureDifference); + if (iFeatureOfRef < 0 || iFeatureOfRef >= LIST_SIZE) + return true; + + PSampleSadSatdCostFunc pSad = sFeatureSearchIn.pSad; + uint8_t* pEnc = sFeatureSearchIn.pEnc; + uint8_t* pColoRef = sFeatureSearchIn.pColoRef; + const int32_t iEncStride = sFeatureSearchIn.iEncStride; + const int32_t iRefStride = sFeatureSearchIn.iRefStride; + const uint16_t uiSadCostThresh = sFeatureSearchIn.uiSadCostThresh; + + const int32_t iCurPixX = sFeatureSearchIn.iCurPixX; + const int32_t iCurPixY = sFeatureSearchIn.iCurPixY; + const int32_t iCurPixXQpel = sFeatureSearchIn.iCurPixXQpel; + const int32_t iCurPixYQpel = sFeatureSearchIn.iCurPixYQpel; + + const int32_t iMinQpelX = sFeatureSearchIn.iMinQpelX; + const int32_t iMinQpelY = sFeatureSearchIn.iMinQpelY; + const int32_t iMaxQpelX = sFeatureSearchIn.iMaxQpelX; + const int32_t iMaxQpelY = sFeatureSearchIn.iMaxQpelY; + + const int32_t iSearchTimes = WELS_MIN (sFeatureSearchIn.pTimesOfFeature[iFeatureOfRef], kuiExpectedSearchTimes); + const int32_t iSearchTimesx2 = (iSearchTimes << 1); + const uint16_t* pQpelPosition = sFeatureSearchIn.pQpelLocationOfFeature[iFeatureOfRef]; + + SMVUnitXY sBestMv; + uint32_t uiBestCost, uiTmpCost; + uint8_t* pBestRef, *pCurRef; + int32_t iQpelX, iQpelY; + int32_t iIntepelX, iIntepelY; + int32_t i; + + sBestMv.iMvX = pFeatureSearchOut->sBestMv.iMvX; + sBestMv.iMvY = pFeatureSearchOut->sBestMv.iMvY; + uiBestCost = pFeatureSearchOut->uiBestSadCost; + pBestRef = pFeatureSearchOut->pBestRef; + + for (i = 0; i < iSearchTimesx2; i += 2) { + iQpelX = pQpelPosition[i]; + iQpelY = pQpelPosition[i + 1]; + + if ((iQpelX > iMaxQpelX) || (iQpelX < iMinQpelX) + || (iQpelY > iMaxQpelY) || (iQpelY < iMinQpelY) + || (iQpelX == iCurPixXQpel) || (iQpelY == iCurPixYQpel)) + continue; + + uiTmpCost = sFeatureSearchIn.pMvdCostX[ iQpelX ] + sFeatureSearchIn.pMvdCostY[ iQpelY ]; + if (uiTmpCost + iFeatureDifference >= uiBestCost) + continue; + + iIntepelX = (iQpelX >> 2) - iCurPixX; + iIntepelY = (iQpelY >> 2) - iCurPixY; + pCurRef = &pColoRef[iIntepelX + iIntepelY * iRefStride]; + uiTmpCost += pSad (pEnc, iEncStride, pCurRef, iRefStride); + if (uiTmpCost < uiBestCost) { + sBestMv.iMvX = iIntepelX; + sBestMv.iMvY = iIntepelY; + uiBestCost = uiTmpCost; + pBestRef = pCurRef; + + if (uiBestCost < uiSadCostThresh) + break; + } + } + SaveFeatureSearchOut (sBestMv, uiBestCost, pBestRef, pFeatureSearchOut); + return (i < iSearchTimesx2); +} + + +void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn, + const uint32_t kuiMaxSearchPoint, + SWelsME* pMe) { + SFeatureSearchOut sFeatureSearchOut = { { 0 } };//TODO: this can be refactored and removed + sFeatureSearchOut.uiBestSadCost = pMe->uiSadCost; + sFeatureSearchOut.sBestMv = pMe->sMv; + sFeatureSearchOut.pBestRef = pMe->pRefMb; + + int32_t iFeatureDifference = 0;//TODO: change it according to computational-complexity setting when needed + FeatureSearchOne (sFeatureSearchIn, iFeatureDifference, kuiMaxSearchPoint, &sFeatureSearchOut); + if (sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost) { //TODO: this may be refactored and removed + UpdateMeResults (sFeatureSearchOut.sBestMv, + sFeatureSearchOut.uiBestSadCost, sFeatureSearchOut.pBestRef, + pMe); + } +} + +//switch related +static uint32_t CountFMECostDown (const SDqLayer* pCurLayer) { + uint32_t uiCostDownSum = 0; + const int32_t kiSliceCount = GetCurrentSliceNum (pCurLayer); + if (kiSliceCount >= 1) { + int32_t iSliceIndex = 0; + SSlice* pSlice = pCurLayer->ppSliceInLayer[iSliceIndex]; + while (iSliceIndex < kiSliceCount) { + pSlice = pCurLayer->ppSliceInLayer[iSliceIndex]; + uiCostDownSum += pSlice->uiSliceFMECostDown; + ++ iSliceIndex; + } + } + return uiCostDownSum; +} +#define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set. +#define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set. +static void UpdateFMEGoodFrameCount (const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) { + //this strategy may be changed, here the number is derived from empirical-numbers + // uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX] + if (iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD) { + if (uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX) + ++ uiFMEGoodFrameCount; + } else { + if (uiFMEGoodFrameCount > 0) + -- uiFMEGoodFrameCount; + } +} +void UpdateFMESwitch (SDqLayer* pCurLayer) { + const uint32_t iFMECost = CountFMECostDown (pCurLayer); + const uint32_t iAvMBNormalizedRDcostDown = iFMECost / (pCurLayer->iMbWidth * pCurLayer->iMbHeight); + UpdateFMEGoodFrameCount (iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount); +} +void UpdateFMESwitchNull (SDqLayer* pCurLayer) { +} +///////////////////////// +// Search function options +///////////////////////// +void WelsDiamondCrossSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, + const int32_t kiRefStride) { + // Step 1: diamond search + WelsDiamondSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride); + + // Step 2: CROSS search + pMe->uiSadCostThreshold = pMe->pRefFeatureStorage->uiSadCostThreshold[pMe->uiBlockSize]; + if (pMe->uiSadCost >= pMe->uiSadCostThreshold) { + WelsMotionCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride); + } +} +void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, + const int32_t kiRefStride) { + // Step 1: diamond search + cross + WelsDiamondCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride); + + // Step 2: FeatureSearch + if (pMe->uiSadCost >= pMe->uiSadCostThreshold) { + pSlice->uiSliceFMECostDown += pMe->uiSadCost; + + uint32_t uiMaxSearchPoint = INT_MAX;//TODO: change it according to computational-complexity setting + SFeatureSearchIn sFeatureSearchIn = {0}; + if (SetFeatureSearchIn (pFunc, *pMe, pSlice, pMe->pRefFeatureStorage, + kiEncStride, kiRefStride, + &sFeatureSearchIn)) { + MotionEstimateFeatureFullSearch (sFeatureSearchIn, uiMaxSearchPoint, pMe); + } + pSlice->uiSliceFMECostDown -= pMe->uiSadCost; + } +} + + +} // namespace WelsEnc + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cabac.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cabac.cpp new file mode 100644 index 000000000..03ea61e64 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cabac.cpp @@ -0,0 +1,739 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_set_mb_syn_cabac.cpp + * + * \brief wrtie cabac syntax + * + * \date 9/28/2014 Created + * + ************************************************************************************* + */ +#include "svc_set_mb_syn.h" +#include "set_mb_syn_cabac.h" +#include "svc_enc_golomb.h" + +using namespace WelsEnc; + +namespace { + +static const uint16_t uiSignificantCoeffFlagOffset[5] = {0, 15, 29, 44, 47}; +static const uint16_t uiLastCoeffFlagOffset[5] = {0, 15, 29, 44, 47}; +static const uint16_t uiCoeffAbsLevelMinus1Offset[5] = {0, 10, 20, 30, 39}; +static const uint16_t uiCodecBlockFlagOffset[5] = {0, 4, 8, 12, 16}; + + +static void WelsCabacMbType (SCabacCtx* pCabacCtx, SMB* pCurMb, SMbCache* pMbCache, int32_t iMbWidth, + EWelsSliceType eSliceType) { + + if (eSliceType == I_SLICE) { + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + SMB* pLeftMb = pCurMb - 1 ; + SMB* pTopMb = pCurMb - iMbWidth; + int32_t iCtx = 3; + if ((uiNeighborAvail & LEFT_MB_POS) && !IS_INTRA4x4 (pLeftMb->uiMbType)) + iCtx++; + if ((uiNeighborAvail & TOP_MB_POS) && !IS_INTRA4x4 (pTopMb->uiMbType)) //TOP MB + iCtx++; + + if (pCurMb->uiMbType == MB_TYPE_INTRA4x4) { + WelsCabacEncodeDecision (pCabacCtx, iCtx, 0); + } else { + int32_t iCbpChroma = pCurMb->uiCbp >> 4; + int32_t iCbpLuma = pCurMb->uiCbp & 15; + int32_t iPredMode = g_kiMapModeI16x16[pMbCache->uiLumaI16x16Mode]; + + WelsCabacEncodeDecision (pCabacCtx, iCtx, 1); + WelsCabacEncodeTerminate (pCabacCtx, 0); + if (iCbpLuma) + WelsCabacEncodeDecision (pCabacCtx, 6, 1); + else + WelsCabacEncodeDecision (pCabacCtx, 6, 0); + + if (iCbpChroma == 0) + WelsCabacEncodeDecision (pCabacCtx, 7, 0); + else { + WelsCabacEncodeDecision (pCabacCtx, 7, 1); + WelsCabacEncodeDecision (pCabacCtx, 8, iCbpChroma >> 1); + } + WelsCabacEncodeDecision (pCabacCtx, 9, iPredMode >> 1); + WelsCabacEncodeDecision (pCabacCtx, 10, iPredMode & 1); + } + } else if (eSliceType == P_SLICE) { + uint32_t uiMbType = pCurMb->uiMbType; + if (uiMbType == MB_TYPE_16x16) { + WelsCabacEncodeDecision (pCabacCtx, 14, 0); + WelsCabacEncodeDecision (pCabacCtx, 15, 0); + WelsCabacEncodeDecision (pCabacCtx, 16, 0); + } else if ((uiMbType == MB_TYPE_16x8) || (uiMbType == MB_TYPE_8x16)) { + + WelsCabacEncodeDecision (pCabacCtx, 14, 0); + WelsCabacEncodeDecision (pCabacCtx, 15, 1); + WelsCabacEncodeDecision (pCabacCtx, 17, pCurMb->uiMbType == MB_TYPE_16x8); + + } else if ((uiMbType == MB_TYPE_8x8) || (uiMbType == MB_TYPE_8x8_REF0)) { + WelsCabacEncodeDecision (pCabacCtx, 14, 0); + WelsCabacEncodeDecision (pCabacCtx, 15, 0); + WelsCabacEncodeDecision (pCabacCtx, 16, 1); + } else if (pCurMb->uiMbType == MB_TYPE_INTRA4x4) { + WelsCabacEncodeDecision (pCabacCtx, 14, 1); + WelsCabacEncodeDecision (pCabacCtx, 17, 0); + } else { + + int32_t iCbpChroma = pCurMb->uiCbp >> 4; + int32_t iCbpLuma = pCurMb->uiCbp & 15; + int32_t iPredMode = g_kiMapModeI16x16[pMbCache->uiLumaI16x16Mode]; + //prefix + WelsCabacEncodeDecision (pCabacCtx, 14, 1); + + //suffix + WelsCabacEncodeDecision (pCabacCtx, 17, 1); + WelsCabacEncodeTerminate (pCabacCtx, 0); + if (iCbpLuma) + WelsCabacEncodeDecision (pCabacCtx, 18, 1); + else + WelsCabacEncodeDecision (pCabacCtx, 18, 0); + if (iCbpChroma == 0) + WelsCabacEncodeDecision (pCabacCtx, 19, 0); + else { + WelsCabacEncodeDecision (pCabacCtx, 19, 1); + WelsCabacEncodeDecision (pCabacCtx, 19, iCbpChroma >> 1); + } + WelsCabacEncodeDecision (pCabacCtx, 20, iPredMode >> 1); + WelsCabacEncodeDecision (pCabacCtx, 20, iPredMode & 1); + + } + } + +} +void WelsCabacMbIntra4x4PredMode (SCabacCtx* pCabacCtx, SMbCache* pMbCache) { + + for (int32_t iMode = 0; iMode < 16; iMode++) { + + bool bPredFlag = pMbCache->pPrevIntra4x4PredModeFlag[iMode]; + int8_t iRemMode = pMbCache->pRemIntra4x4PredModeFlag[iMode]; + + if (bPredFlag) + WelsCabacEncodeDecision (pCabacCtx, 68, 1); + else { + WelsCabacEncodeDecision (pCabacCtx, 68, 0); + + WelsCabacEncodeDecision (pCabacCtx, 69, iRemMode & 0x01); + WelsCabacEncodeDecision (pCabacCtx, 69, (iRemMode >> 1) & 0x01); + WelsCabacEncodeDecision (pCabacCtx, 69, (iRemMode >> 2)); + } + } +} + +void WelsCabacMbIntraChromaPredMode (SCabacCtx* pCabacCtx, SMB* pCurMb, SMbCache* pMbCache, int32_t iMbWidth) { + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + SMB* pLeftMb = pCurMb - 1 ; + SMB* pTopMb = pCurMb - iMbWidth; + + int32_t iPredMode = g_kiMapModeIntraChroma[pMbCache->uiChmaI8x8Mode]; + int32_t iCtx = 64; + if ((uiNeighborAvail & LEFT_MB_POS) && g_kiMapModeIntraChroma[pLeftMb->uiChromPredMode] != 0) + iCtx++; + if ((uiNeighborAvail & TOP_MB_POS) && g_kiMapModeIntraChroma[pTopMb->uiChromPredMode] != 0) + iCtx++; + + if (iPredMode == 0) { + WelsCabacEncodeDecision (pCabacCtx, iCtx, 0); + } else if (iPredMode == 1) { + WelsCabacEncodeDecision (pCabacCtx, iCtx, 1); + WelsCabacEncodeDecision (pCabacCtx, 67, 0); + } else if (iPredMode == 2) { + WelsCabacEncodeDecision (pCabacCtx, iCtx, 1); + WelsCabacEncodeDecision (pCabacCtx, 67, 1); + WelsCabacEncodeDecision (pCabacCtx, 67, 0); + } else { + WelsCabacEncodeDecision (pCabacCtx, iCtx, 1); + WelsCabacEncodeDecision (pCabacCtx, 67, 1); + WelsCabacEncodeDecision (pCabacCtx, 67, 1); + } +} + +void WelsCabacMbCbp (SMB* pCurMb, int32_t iMbWidth, SCabacCtx* pCabacCtx) { + int32_t iCbpBlockLuma[4] = { (pCurMb->uiCbp) & 1, (pCurMb->uiCbp >> 1) & 1, (pCurMb->uiCbp >> 2) & 1, (pCurMb->uiCbp >> 3) & 1}; + int32_t iCbpChroma = pCurMb->uiCbp >> 4; + int32_t iCbpBlockLeft[4] = {0, 0, 0, 0}; + int32_t iCbpBlockTop[4] = {0, 0, 0, 0}; + int32_t iCbpLeftChroma = 0; + int32_t iCbpTopChroma = 0; + int32_t iCbp = 0; + int32_t iCtx = 0; + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + if (uiNeighborAvail & LEFT_MB_POS) { + iCbp = (pCurMb - 1)->uiCbp; + iCbpBlockLeft[0] = ! (iCbp & 1); + iCbpBlockLeft[1] = ! ((iCbp >> 1) & 1); + iCbpBlockLeft[2] = ! ((iCbp >> 2) & 1); + iCbpBlockLeft[3] = ! ((iCbp >> 3) & 1); + iCbpLeftChroma = iCbp >> 4; + if (iCbpLeftChroma) + iCtx += 1; + } + if (uiNeighborAvail & TOP_MB_POS) { + iCbp = (pCurMb - iMbWidth)->uiCbp; + iCbpBlockTop[0] = ! (iCbp & 1); + iCbpBlockTop[1] = ! ((iCbp >> 1) & 1); + iCbpBlockTop[2] = ! ((iCbp >> 2) & 1); + iCbpBlockTop[3] = ! ((iCbp >> 3) & 1); + iCbpTopChroma = iCbp >> 4; + if (iCbpTopChroma) + iCtx += 2; + } + WelsCabacEncodeDecision (pCabacCtx, 73 + iCbpBlockLeft[1] + iCbpBlockTop[2] * 2, iCbpBlockLuma[0]); + WelsCabacEncodeDecision (pCabacCtx, 73 + !iCbpBlockLuma[0] + iCbpBlockTop[3] * 2, iCbpBlockLuma[1]); + WelsCabacEncodeDecision (pCabacCtx, 73 + iCbpBlockLeft[3] + (!iCbpBlockLuma[0]) * 2 , iCbpBlockLuma[2]); + WelsCabacEncodeDecision (pCabacCtx, 73 + !iCbpBlockLuma[2] + (!iCbpBlockLuma[1]) * 2, iCbpBlockLuma[3]); + + + //chroma + if (iCbpChroma) { + WelsCabacEncodeDecision (pCabacCtx, 77 + iCtx, 1); + WelsCabacEncodeDecision (pCabacCtx, 81 + (iCbpLeftChroma >> 1) + ((iCbpTopChroma >> 1) * 2), iCbpChroma > 1); + } else { + WelsCabacEncodeDecision (pCabacCtx, 77 + iCtx, 0); + } +} + +void WelsCabacMbDeltaQp (SMB* pCurMb, SCabacCtx* pCabacCtx, bool bFirstMbInSlice) { + SMB* pPrevMb = NULL; + int32_t iCtx = 0; + + if (!bFirstMbInSlice) { + pPrevMb = pCurMb - 1; + pCurMb->iLumaDQp = pCurMb->uiLumaQp - pPrevMb->uiLumaQp; + + if (IS_SKIP (pPrevMb->uiMbType) || ((pPrevMb->uiMbType != MB_TYPE_INTRA16x16) && (!pPrevMb->uiCbp)) + || (!pPrevMb->iLumaDQp)) + iCtx = 0; + else + iCtx = 1; + } + + if (pCurMb->iLumaDQp) { + int32_t iValue = pCurMb->iLumaDQp < 0 ? (-2 * pCurMb->iLumaDQp) : (2 * pCurMb->iLumaDQp - 1); + WelsCabacEncodeDecision (pCabacCtx, 60 + iCtx, 1); + if (iValue == 1) { + WelsCabacEncodeDecision (pCabacCtx, 60 + 2, 0); + } else { + WelsCabacEncodeDecision (pCabacCtx, 60 + 2, 1); + iValue--; + while ((--iValue) > 0) + WelsCabacEncodeDecision (pCabacCtx, 60 + 3, 1); + WelsCabacEncodeDecision (pCabacCtx, 60 + 3, 0); + } + } else { + WelsCabacEncodeDecision (pCabacCtx, 60 + iCtx, 0); + } +} + +void WelsMbSkipCabac (SCabacCtx* pCabacCtx, SMB* pCurMb, int32_t iMbWidth, EWelsSliceType eSliceType, + int16_t bSkipFlag) { + int32_t iCtx = (eSliceType == P_SLICE) ? 11 : 24; + uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail; + if (uiNeighborAvail & LEFT_MB_POS) { //LEFT MB + if (!IS_SKIP ((pCurMb - 1)->uiMbType)) + iCtx++; + } + if (uiNeighborAvail & TOP_MB_POS) { //TOP MB + if (!IS_SKIP ((pCurMb - iMbWidth)->uiMbType)) + iCtx++; + } + WelsCabacEncodeDecision (pCabacCtx, iCtx, bSkipFlag); + + if (bSkipFlag) { + for (int i = 0; i < 16; i++) { + pCurMb->sMvd[i].iMvX = 0; + pCurMb->sMvd[i].iMvY = 0; + } + pCurMb->uiCbp = pCurMb->iCbpDc = 0; + } +} + +void WelsCabacMbRef (SCabacCtx* pCabacCtx, SMB* pCurMb, SMbCache* pMbCache, int16_t iIdx) { + SMVComponentUnit* pMvComp = &pMbCache->sMvComponents; + const int16_t iRefIdxA = pMvComp->iRefIndexCache[iIdx + 6]; + const int16_t iRefIdxB = pMvComp->iRefIndexCache[iIdx + 1]; + int16_t iRefIdx = pMvComp->iRefIndexCache[iIdx + 7]; + int16_t iCtx = 0; + + if ((iRefIdxA > 0) && (!pMbCache->bMbTypeSkip[3])) + iCtx++; + if ((iRefIdxB > 0) && (!pMbCache->bMbTypeSkip[1])) + iCtx += 2; + + while (iRefIdx > 0) { + WelsCabacEncodeDecision (pCabacCtx, 54 + iCtx, 1); + iCtx = (iCtx >> 2) + 4; + iRefIdx--; + } + WelsCabacEncodeDecision (pCabacCtx, 54 + iCtx, 0); +} + +inline void WelsCabacMbMvdLx (SCabacCtx* pCabacCtx, int32_t sMvd, int32_t iCtx, int32_t iPredMvd) { + const int32_t iAbsMvd = WELS_ABS (sMvd); + int32_t iCtxInc = 0; + int32_t iPrefix = WELS_MIN (iAbsMvd, 9); + int32_t i = 0; + + if (iPredMvd > 32) + iCtxInc += 2; + else if (iPredMvd > 2) + iCtxInc += 1; + + if (iPrefix) { + if (iPrefix < 9) { + WelsCabacEncodeDecision (pCabacCtx, iCtx + iCtxInc, 1); + iCtxInc = 3; + for (i = 0; i < iPrefix - 1; i++) { + WelsCabacEncodeDecision (pCabacCtx, iCtx + iCtxInc, 1); + if (i < 3) + iCtxInc++; + } + WelsCabacEncodeDecision (pCabacCtx, iCtx + iCtxInc, 0); + WelsCabacEncodeBypassOne (pCabacCtx, sMvd < 0); + } else { + WelsCabacEncodeDecision (pCabacCtx, iCtx + iCtxInc, 1); + iCtxInc = 3; + for (i = 0; i < (9 - 1); i++) { + WelsCabacEncodeDecision (pCabacCtx, iCtx + iCtxInc, 1); + if (i < 3) + iCtxInc++; + } + WelsCabacEncodeUeBypass (pCabacCtx, 3, iAbsMvd - 9); + WelsCabacEncodeBypassOne (pCabacCtx, sMvd < 0); + } + } else { + WelsCabacEncodeDecision (pCabacCtx, iCtx + iCtxInc, 0); + } +} +SMVUnitXY WelsCabacMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, uint32_t iMbWidth, + SMVUnitXY sCurMv, SMVUnitXY sPredMv, int16_t i4x4ScanIdx) { + uint32_t iAbsMvd0, iAbsMvd1; + uint8_t uiNeighborAvail = pCurMb->uiNeighborAvail; + SMVUnitXY sMvd; + SMVUnitXY sMvdLeft; + SMVUnitXY sMvdTop; + + sMvdLeft.iMvX = sMvdLeft.iMvY = sMvdTop.iMvX = sMvdTop.iMvY = 0; + sMvd.sDeltaMv (sCurMv, sPredMv); + if ((i4x4ScanIdx < 4) && (uiNeighborAvail & TOP_MB_POS)) { //top row blocks + sMvdTop.sAssignMv ((pCurMb - iMbWidth)->sMvd[i4x4ScanIdx + 12]); + } else if (i4x4ScanIdx >= 4) { + sMvdTop.sAssignMv (pCurMb->sMvd[i4x4ScanIdx - 4]); + } + if ((! (i4x4ScanIdx & 0x03)) && (uiNeighborAvail & LEFT_MB_POS)) { //left column blocks + sMvdLeft.sAssignMv ((pCurMb - 1)->sMvd[i4x4ScanIdx + 3]); + } else if (i4x4ScanIdx & 0x03) { + sMvdLeft.sAssignMv (pCurMb->sMvd[i4x4ScanIdx - 1]); + } + + iAbsMvd0 = WELS_ABS (sMvdLeft.iMvX) + WELS_ABS (sMvdTop.iMvX); + iAbsMvd1 = WELS_ABS (sMvdLeft.iMvY) + WELS_ABS (sMvdTop.iMvY); + + WelsCabacMbMvdLx (pCabacCtx, sMvd.iMvX, 40, iAbsMvd0); + WelsCabacMbMvdLx (pCabacCtx, sMvd.iMvY, 47, iAbsMvd1); + return sMvd; +} +static void WelsCabacSubMbType (SCabacCtx* pCabacCtx, SMB* pCurMb) { + for (int32_t i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) { + uint32_t uiSubMbType = pCurMb->uiSubMbType[i8x8Idx]; + if (SUB_MB_TYPE_8x8 == uiSubMbType) { + WelsCabacEncodeDecision (pCabacCtx, 21, 1); + continue; + } + WelsCabacEncodeDecision (pCabacCtx, 21, 0); + if (SUB_MB_TYPE_8x4 == uiSubMbType) { + WelsCabacEncodeDecision (pCabacCtx, 22, 0); + } else { + WelsCabacEncodeDecision (pCabacCtx, 22, 1); + WelsCabacEncodeDecision (pCabacCtx, 23, SUB_MB_TYPE_4x8 == uiSubMbType); + } + } //for +} + +static void WelsCabacSubMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, SMbCache* pMbCache, const int kiMbWidth) { + SMVUnitXY sMvd; + int32_t i8x8Idx, i4x4ScanIdx; + for (i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) { + uint32_t uiSubMbType = pCurMb->uiSubMbType[i8x8Idx]; + if (SUB_MB_TYPE_8x8 == uiSubMbType) { + i4x4ScanIdx = g_kuiMbCountScan4Idx[i8x8Idx << 2]; + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx], + i4x4ScanIdx); + pCurMb->sMvd[ i4x4ScanIdx].sAssignMv (sMvd); + pCurMb->sMvd[1 + i4x4ScanIdx].sAssignMv (sMvd); + pCurMb->sMvd[4 + i4x4ScanIdx].sAssignMv (sMvd); + pCurMb->sMvd[5 + i4x4ScanIdx].sAssignMv (sMvd); + } else if (SUB_MB_TYPE_4x4 == uiSubMbType) { + for (int32_t i4x4Idx = 0; i4x4Idx < 4; ++i4x4Idx) { + i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + i4x4Idx]; + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx], + i4x4ScanIdx); + pCurMb->sMvd[i4x4ScanIdx].sAssignMv (sMvd); + } + } else if (SUB_MB_TYPE_8x4 == uiSubMbType) { + for (int32_t i8x4Idx = 0; i8x4Idx < 2; ++i8x4Idx) { + i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + (i8x4Idx << 1)]; + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx], + i4x4ScanIdx); + pCurMb->sMvd[ i4x4ScanIdx].sAssignMv (sMvd); + pCurMb->sMvd[1 + i4x4ScanIdx].sAssignMv (sMvd); + } + } else if (SUB_MB_TYPE_4x8 == uiSubMbType) { + for (int32_t i4x8Idx = 0; i4x8Idx < 2; ++i4x8Idx) { + i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + i4x8Idx]; + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx], + i4x4ScanIdx); + pCurMb->sMvd[ i4x4ScanIdx].sAssignMv (sMvd); + pCurMb->sMvd[4 + i4x4ScanIdx].sAssignMv (sMvd); + } + } + } +} + +int16_t WelsGetMbCtxCabac (SMbCache* pMbCache, SMB* pCurMb, uint32_t iMbWidth, ECtxBlockCat eCtxBlockCat, + int16_t iIdx) { + int16_t iNzA = -1, iNzB = -1; + int8_t* pNonZeroCoeffCount = pMbCache->iNonZeroCoeffCount; + int32_t bIntra = IS_INTRA (pCurMb->uiMbType); + int32_t iCtxInc = 0; + switch (eCtxBlockCat) { + case LUMA_AC: + case CHROMA_AC: + case LUMA_4x4: + iNzA = pNonZeroCoeffCount[iIdx - 1]; + iNzB = pNonZeroCoeffCount[iIdx - 8]; + break; + case LUMA_DC: + case CHROMA_DC: + if (pCurMb->uiNeighborAvail & LEFT_MB_POS) + iNzA = (pCurMb - 1)->iCbpDc & (1 << iIdx); + if (pCurMb->uiNeighborAvail & TOP_MB_POS) + iNzB = (pCurMb - iMbWidth)->iCbpDc & (1 << iIdx); + break; + default: + break; + } + if (((iNzA == -1) && bIntra) || (iNzA > 0)) + iCtxInc += 1; + if (((iNzB == -1) && bIntra) || (iNzB > 0)) + iCtxInc += 2; + return 85 + uiCodecBlockFlagOffset[eCtxBlockCat] + iCtxInc; +} + +void WelsWriteBlockResidualCabac (SMbCache* pMbCache, SMB* pCurMb, uint32_t iMbWidth, SCabacCtx* pCabacCtx, + ECtxBlockCat eCtxBlockCat, int16_t iIdx, int16_t iNonZeroCount, int16_t* pBlock, int16_t iEndIdx) { + int32_t iCtx = WelsGetMbCtxCabac (pMbCache, pCurMb, iMbWidth, eCtxBlockCat, iIdx); + if (iNonZeroCount) { + int16_t iLevel[16]; + const int32_t iCtxSig = 105 + uiSignificantCoeffFlagOffset[eCtxBlockCat]; + const int32_t iCtxLast = 166 + uiLastCoeffFlagOffset[eCtxBlockCat]; + const int32_t iCtxLevel = 227 + uiCoeffAbsLevelMinus1Offset[eCtxBlockCat]; + int32_t iNonZeroIdx = 0; + int32_t i = 0; + + WelsCabacEncodeDecision (pCabacCtx, iCtx, 1); + while (1) { + if (pBlock[i]) { + iLevel[iNonZeroIdx] = pBlock[i]; + + iNonZeroIdx++; + WelsCabacEncodeDecision (pCabacCtx, iCtxSig + i, 1); + if (iNonZeroIdx != iNonZeroCount) + WelsCabacEncodeDecision (pCabacCtx, iCtxLast + i, 0); + else { + WelsCabacEncodeDecision (pCabacCtx, iCtxLast + i, 1); + break; + } + } else + WelsCabacEncodeDecision (pCabacCtx, iCtxSig + i, 0); + i++; + if (i == iEndIdx) { + iLevel[iNonZeroIdx] = pBlock[i]; + iNonZeroIdx++; + break; + } + } + + int32_t iNumAbsLevelGt1 = 0; + int32_t iCtx1 = iCtxLevel + 1; + + do { + int32_t iPrefix = 0; + iNonZeroIdx--; + iPrefix = WELS_ABS (iLevel[iNonZeroIdx]) - 1; + if (iPrefix) { + iPrefix = WELS_MIN (iPrefix, 14); + iCtx = WELS_MIN (iCtxLevel + 4, iCtx1); + WelsCabacEncodeDecision (pCabacCtx, iCtx, 1); + iNumAbsLevelGt1++; + iCtx = iCtxLevel + 4 + WELS_MIN (5 - (eCtxBlockCat == CHROMA_DC), iNumAbsLevelGt1); + for (i = 1; i < iPrefix; i++) + WelsCabacEncodeDecision (pCabacCtx, iCtx, 1); + if (WELS_ABS (iLevel[iNonZeroIdx]) < 15) + WelsCabacEncodeDecision (pCabacCtx, iCtx, 0); + else + WelsCabacEncodeUeBypass (pCabacCtx, 0, WELS_ABS (iLevel[iNonZeroIdx]) - 15); + iCtx1 = iCtxLevel; + } else { + iCtx = WELS_MIN (iCtxLevel + 4, iCtx1); + WelsCabacEncodeDecision (pCabacCtx, iCtx, 0); + iCtx1 += iNumAbsLevelGt1 == 0; + } + WelsCabacEncodeBypassOne (pCabacCtx, iLevel[iNonZeroIdx] < 0); + } while (iNonZeroIdx > 0); + + } else { + WelsCabacEncodeDecision (pCabacCtx, iCtx, 0); + } + + +} +int32_t WelsCalNonZeroCount2x2Block (int16_t* pBlock) { + return (pBlock[0] != 0) + + (pBlock[1] != 0) + + (pBlock[2] != 0) + + (pBlock[3] != 0); +} +int32_t WelsWriteMbResidualCabac (SWelsFuncPtrList* pFuncList, SSlice* pSlice, SMbCache* sMbCacheInfo, SMB* pCurMb, + SCabacCtx* pCabacCtx, + int16_t iMbWidth, uint32_t uiChromaQpIndexOffset) { + + const uint16_t uiMbType = pCurMb->uiMbType; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + int16_t i = 0; + int8_t* pNonZeroCoeffCount = pMbCache->iNonZeroCoeffCount; + SSliceHeaderExt* pSliceHeadExt = &pSlice->sSliceHeaderExt; + const int32_t iSliceFirstMbXY = pSliceHeadExt->sSliceHeader.iFirstMbInSlice; + + + pCurMb->iCbpDc = 0; + pCurMb->iLumaDQp = 0; + + if ((pCurMb->uiCbp > 0) || (uiMbType == MB_TYPE_INTRA16x16)) { + int32_t iCbpChroma = pCurMb->uiCbp >> 4; + int32_t iCbpLuma = pCurMb->uiCbp & 15; + + pCurMb->iLumaDQp = pCurMb->uiLumaQp - pSlice->uiLastMbQp; + WelsCabacMbDeltaQp (pCurMb, pCabacCtx, (pCurMb->iMbXY == iSliceFirstMbXY)); + pSlice->uiLastMbQp = pCurMb->uiLumaQp; + + if (uiMbType == MB_TYPE_INTRA16x16) { + //Luma DC + int iNonZeroCount = pFuncList->pfGetNoneZeroCount (pMbCache->pDct->iLumaI16x16Dc); + WelsWriteBlockResidualCabac (pMbCache, pCurMb, iMbWidth, pCabacCtx, LUMA_DC, 0, iNonZeroCount, + pMbCache->pDct->iLumaI16x16Dc, 15); + if (iNonZeroCount) + pCurMb->iCbpDc |= 1; + //Luma AC + + if (iCbpLuma) { + for (i = 0; i < 16; i++) { + int32_t iIdx = g_kuiCache48CountScan4Idx[i]; + WelsWriteBlockResidualCabac (pMbCache, pCurMb, iMbWidth, pCabacCtx, LUMA_AC, iIdx, + pNonZeroCoeffCount[iIdx], pMbCache->pDct->iLumaBlock[i], 14); + } + } + } else { + //Luma AC + for (i = 0; i < 16; i++) { + if (iCbpLuma & (1 << (i >> 2))) { + int32_t iIdx = g_kuiCache48CountScan4Idx[i]; + WelsWriteBlockResidualCabac (pMbCache, pCurMb, iMbWidth, pCabacCtx, LUMA_4x4, iIdx, + pNonZeroCoeffCount[iIdx], pMbCache->pDct->iLumaBlock[i], 15); + } + + } + } + + if (iCbpChroma) { + int32_t iNonZeroCount = 0; + //chroma DC + iNonZeroCount = WelsCalNonZeroCount2x2Block (pMbCache->pDct->iChromaDc[0]); + if (iNonZeroCount) + pCurMb->iCbpDc |= 0x2; + WelsWriteBlockResidualCabac (pMbCache, pCurMb, iMbWidth, pCabacCtx, CHROMA_DC, 1, iNonZeroCount, + pMbCache->pDct->iChromaDc[0], 3); + + iNonZeroCount = WelsCalNonZeroCount2x2Block (pMbCache->pDct->iChromaDc[1]); + if (iNonZeroCount) + pCurMb->iCbpDc |= 0x4; + WelsWriteBlockResidualCabac (pMbCache, pCurMb, iMbWidth, pCabacCtx, CHROMA_DC, 2, iNonZeroCount, + pMbCache->pDct->iChromaDc[1], 3); + if (iCbpChroma & 0x02) { + const uint8_t* g_kuiCache48CountScan4Idx_16base = &g_kuiCache48CountScan4Idx[16]; + //Cb AC + for (i = 0; i < 4; i++) { + int32_t iIdx = g_kuiCache48CountScan4Idx_16base[i]; + WelsWriteBlockResidualCabac (pMbCache, pCurMb, iMbWidth, pCabacCtx, CHROMA_AC, iIdx, + pNonZeroCoeffCount[iIdx], pMbCache->pDct->iChromaBlock[i], 14); + + } + + //Cr AC + + for (i = 0; i < 4; i++) { + int32_t iIdx = 24 + g_kuiCache48CountScan4Idx_16base[i]; + WelsWriteBlockResidualCabac (pMbCache, pCurMb, iMbWidth, pCabacCtx, CHROMA_AC, iIdx, + pNonZeroCoeffCount[iIdx], pMbCache->pDct->iChromaBlock[4 + i], 14); + } + } + } + } else { + pCurMb->iLumaDQp = 0; + pCurMb->uiLumaQp = pSlice->uiLastMbQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + uiChromaQpIndexOffset)]; + } + return 0; +} + +} // anon ns. + +namespace WelsEnc { + +void WelsInitSliceCabac (sWelsEncCtx* pEncCtx, SSlice* pSlice) { + /* alignment needed */ + SBitStringAux* pBs = pSlice->pSliceBsa; + BsAlign (pBs); + + /* init cabac */ + WelsCabacContextInit (pEncCtx, &pSlice->sCabacCtx, pSlice->iCabacInitIdc); + WelsCabacEncodeInit (&pSlice->sCabacCtx, pBs->pCurBuf, pBs->pEndBuf); +} + +int32_t WelsSpatialWriteMbSynCabac (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SCabacCtx* pCabacCtx = &pSlice->sCabacCtx; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + const uint16_t uiMbType = pCurMb->uiMbType; + SSliceHeaderExt* pSliceHeadExt = &pSlice->sSliceHeaderExt; + uint32_t uiNumRefIdxL0Active = pSliceHeadExt->sSliceHeader.uiNumRefIdxL0Active - 1; + const int32_t iSliceFirstMbXY = pSliceHeadExt->sSliceHeader.iFirstMbInSlice; + int16_t i = 0; + int16_t iMbWidth = pEncCtx->pCurDqLayer->iMbWidth; + uint32_t uiChromaQpIndexOffset = pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + SMVUnitXY sMvd; + int32_t iRet = 0; + if (pCurMb->iMbXY > iSliceFirstMbXY) + WelsCabacEncodeTerminate (&pSlice->sCabacCtx, 0); + + if (IS_SKIP (pCurMb->uiMbType)) { + pCurMb->uiLumaQp = pSlice->uiLastMbQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + uiChromaQpIndexOffset)]; + WelsMbSkipCabac (&pSlice->sCabacCtx, pCurMb, iMbWidth, pEncCtx->eSliceType, 1); + + } else { + //skip flag + if (pEncCtx->eSliceType != I_SLICE) + WelsMbSkipCabac (&pSlice->sCabacCtx, pCurMb, iMbWidth, pEncCtx->eSliceType, 0); + + //write mb type + WelsCabacMbType (pCabacCtx, pCurMb, pMbCache, iMbWidth, pEncCtx->eSliceType); + + if (IS_INTRA (uiMbType)) { + if (uiMbType == MB_TYPE_INTRA4x4) { + WelsCabacMbIntra4x4PredMode (pCabacCtx, pMbCache); + } + WelsCabacMbIntraChromaPredMode (pCabacCtx, pCurMb, pMbCache, iMbWidth); + sMvd.iMvX = sMvd.iMvY = 0; + for (i = 0; i < 16; ++i) { + pCurMb->sMvd[i].sAssignMv (sMvd); + } + + } else if (uiMbType == MB_TYPE_16x16) { + + if (uiNumRefIdxL0Active > 0) { + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0); + } + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0); + + for (i = 0; i < 16; ++i) { + pCurMb->sMvd[i].sAssignMv (sMvd); + } + + } else if (uiMbType == MB_TYPE_16x8) { + if (uiNumRefIdxL0Active > 0) { + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0); + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 12); + } + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth , pCurMb->sMv[0], pMbCache->sMbMvp[0], 0); + for (i = 0; i < 8; ++i) { + pCurMb->sMvd[i].sAssignMv (sMvd); + } + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[8], pMbCache->sMbMvp[1], 8); + for (i = 8; i < 16; ++i) { + pCurMb->sMvd[i].sAssignMv (sMvd); + } + } else if (uiMbType == MB_TYPE_8x16) { + if (uiNumRefIdxL0Active > 0) { + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0); + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 2); + } + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0); + for (i = 0; i < 16; i += 4) { + pCurMb->sMvd[i ].sAssignMv (sMvd); + pCurMb->sMvd[i + 1].sAssignMv (sMvd); + } + sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[2], pMbCache->sMbMvp[1], 2); + for (i = 0; i < 16; i += 4) { + pCurMb->sMvd[i + 2].sAssignMv (sMvd); + pCurMb->sMvd[i + 3].sAssignMv (sMvd); + } + } else if ((uiMbType == MB_TYPE_8x8) || (uiMbType == MB_TYPE_8x8_REF0)) { + //write sub_mb_type + WelsCabacSubMbType (pCabacCtx, pCurMb); + + if (uiNumRefIdxL0Active > 0) { + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0); + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 2); + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 12); + WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 14); + } + //write sub8x8 mvd + WelsCabacSubMbMvd (pCabacCtx, pCurMb, pMbCache, iMbWidth); + } + if (uiMbType != MB_TYPE_INTRA16x16) { + WelsCabacMbCbp (pCurMb, iMbWidth, pCabacCtx); + } + iRet = WelsWriteMbResidualCabac (pEncCtx->pFuncList, pSlice, pMbCache, pCurMb, pCabacCtx, iMbWidth, + uiChromaQpIndexOffset); + } + if (!IS_INTRA (pCurMb->uiMbType)) + pCurMb->uiChromPredMode = 0; + + return iRet; +} + + +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp new file mode 100644 index 000000000..05944fcdf --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp @@ -0,0 +1,422 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file svc_set_mb_syn_cavlc.h + * + * \brief Seting all syntax elements of mb and decoding residual with cavlc + * + * \date 2009.8.12 Created + * + ************************************************************************************* + */ + +#include "vlc_encoder.h" +#include "ls_defines.h" +#include "svc_set_mb_syn.h" + +namespace WelsEnc { +const uint32_t g_kuiIntra4x4CbpMap[48] = { + 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2, //15 + 16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1, //31 + 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0 //47 +}; + +const uint32_t g_kuiInterCbpMap[48] = { + 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11, //15 + 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19, //31 + 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12 //47 +}; + +//============================Enhance Layer CAVLC Writing=========================== +void WelsSpatialWriteMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SBitStringAux* pBs = pSlice->pSliceBsa; + SSliceHeaderExt* pSliceHeadExt = &pSlice->sSliceHeaderExt; + int32_t iNumRefIdxl0ActiveMinus1 = pSliceHeadExt->sSliceHeader.uiNumRefIdxL0Active - 1; + + Mb_Type uiMbType = pCurMb->uiMbType; + int32_t iCbpChroma = pCurMb->uiCbp >> 4; + int32_t iCbpLuma = pCurMb->uiCbp & 15; + int32_t i = 0; + + SMVUnitXY sMvd[2]; + bool* pPredFlag; + int8_t* pRemMode; + + int32_t iMbOffset = 0; + + switch (pSliceHeadExt->sSliceHeader.eSliceType) { + case I_SLICE: + iMbOffset = 0; + break; + case P_SLICE: + iMbOffset = 5; + break; + default: + return; + } + + switch (uiMbType) { + case MB_TYPE_INTRA4x4: + /* mb type */ + BsWriteUE (pBs, iMbOffset + 0); + + /* prediction: luma */ + pPredFlag = &pMbCache->pPrevIntra4x4PredModeFlag[0]; + pRemMode = &pMbCache->pRemIntra4x4PredModeFlag[0]; + do { + BsWriteOneBit (pBs, *pPredFlag); /* b_prev_intra4x4_pred_mode */ + + if (!*pPredFlag) { + BsWriteBits (pBs, 3, *pRemMode); + } + + pPredFlag++; + pRemMode++; + ++ i; + } while (i < 16); + + /* prediction: chroma */ + BsWriteUE (pBs, g_kiMapModeIntraChroma[pMbCache->uiChmaI8x8Mode]); + + break; + + case MB_TYPE_INTRA16x16: + /* mb type */ + BsWriteUE (pBs, 1 + iMbOffset + g_kiMapModeI16x16[pMbCache->uiLumaI16x16Mode] + (iCbpChroma << 2) + + (iCbpLuma == 0 ? 0 : 12)); + + /* prediction: chroma */ + BsWriteUE (pBs, g_kiMapModeIntraChroma[pMbCache->uiChmaI8x8Mode]); + + break; + + case MB_TYPE_16x16: + BsWriteUE (pBs, 0); //uiMbType + sMvd[0].sDeltaMv (pCurMb->sMv[0], pMbCache->sMbMvp[0]); + + if (iNumRefIdxl0ActiveMinus1 > 0) { + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[0]); + } + + BsWriteSE (pBs, sMvd[0].iMvX); + BsWriteSE (pBs, sMvd[0].iMvY); + + break; + + case MB_TYPE_16x8: + BsWriteUE (pBs, 1); //uiMbType + + sMvd[0].sDeltaMv (pCurMb->sMv[0], pMbCache->sMbMvp[0]); + sMvd[1].sDeltaMv (pCurMb->sMv[8], pMbCache->sMbMvp[1]); + + if (iNumRefIdxl0ActiveMinus1 > 0) { + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[0]); + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[2]); + } + BsWriteSE (pBs, sMvd[0].iMvX); //block0 + BsWriteSE (pBs, sMvd[0].iMvY); + BsWriteSE (pBs, sMvd[1].iMvX); //block1 + BsWriteSE (pBs, sMvd[1].iMvY); + + break; + + case MB_TYPE_8x16: + BsWriteUE (pBs, 2); //uiMbType + + sMvd[0].sDeltaMv (pCurMb->sMv[0], pMbCache->sMbMvp[0]); + sMvd[1].sDeltaMv (pCurMb->sMv[2], pMbCache->sMbMvp[1]); + + if (iNumRefIdxl0ActiveMinus1 > 0) { + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[0]); + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[1]); + } + BsWriteSE (pBs, sMvd[0].iMvX); //block0 + BsWriteSE (pBs, sMvd[0].iMvY); + BsWriteSE (pBs, sMvd[1].iMvX); //block1 + BsWriteSE (pBs, sMvd[1].iMvY); + + break; + } +} + +void WelsSpatialWriteSubMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + SBitStringAux* pBs = pSlice->pSliceBsa; + SSliceHeaderExt* pSliceHeadExt = &pSlice->sSliceHeaderExt; + + int32_t iNumRefIdxl0ActiveMinus1 = pSliceHeadExt->sSliceHeader.uiNumRefIdxL0Active - 1; + int32_t i; + + bool bSubRef0 = false; + const uint8_t* kpScan4 = & (g_kuiMbCountScan4Idx[0]); + + /* mb type */ + if (LD32 (pCurMb->pRefIndex) == 0) { + BsWriteUE (pBs, 4); + bSubRef0 = false; + } else { + BsWriteUE (pBs, 3); + bSubRef0 = true; + } + + //step 1: sub_mb_type + for (i = 0; i < 4; i++) { + switch (pCurMb->uiSubMbType[i]) { + case SUB_MB_TYPE_8x8: + BsWriteUE (pBs, 0); + break; + case SUB_MB_TYPE_8x4: + BsWriteUE (pBs, 1); + break; + case SUB_MB_TYPE_4x8: + BsWriteUE (pBs, 2); + break; + case SUB_MB_TYPE_4x4: + BsWriteUE (pBs, 3); + break; + default: //should not enter + break; + } + } + + //step 2: get and write uiRefIndex and sMvd + if (iNumRefIdxl0ActiveMinus1 > 0 && bSubRef0) { + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[0]); + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[1]); + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[2]); + BsWriteTE (pBs, iNumRefIdxl0ActiveMinus1, pCurMb->pRefIndex[3]); + } + //write sMvd + for (i = 0; i < 4; i++) { + uint32_t uiSubMbType = pCurMb->uiSubMbType[i]; + if (SUB_MB_TYPE_8x8 == uiSubMbType) { + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX); + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY); + } else if (SUB_MB_TYPE_4x4 == uiSubMbType) { + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX); + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvX); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvY); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvX); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvY); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 3)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 3)].iMvX); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 3)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 3)].iMvY); + } else if (SUB_MB_TYPE_8x4 == uiSubMbType) { + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX); + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvX); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvY); + } else if (SUB_MB_TYPE_4x8 == uiSubMbType) { + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX); + BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvX); + BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvY); + } + kpScan4 += 4; + } +} + +int32_t CheckBitstreamBuffer (const uint32_t kuiSliceIdx, sWelsEncCtx* pEncCtx, SBitStringAux* pBs) { + const intX_t iLeftLength = pBs->pEndBuf - pBs->pCurBuf - 1; + assert (iLeftLength > 0); + + if (iLeftLength < MAX_MACROBLOCK_SIZE_IN_BYTE_x2) { + return ENC_RETURN_VLCOVERFLOWFOUND;//ENC_RETURN_MEMALLOCERR; + //TODO: call the realloc© instead + } + return ENC_RETURN_SUCCESS; +} + +//============================Base Layer CAVLC Writing=============================== +int32_t WelsSpatialWriteMbSyn (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb) { + SBitStringAux* pBs = pSlice->pSliceBsa; + SMbCache* pMbCache = &pSlice->sMbCacheInfo; + const uint8_t kuiChromaQpIndexOffset = pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset; + + if (IS_SKIP (pCurMb->uiMbType)) { + pCurMb->uiLumaQp = pSlice->uiLastMbQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + kuiChromaQpIndexOffset)]; + + pSlice->iMbSkipRun++; + return ENC_RETURN_SUCCESS; + } else { + if (pEncCtx->eSliceType != I_SLICE) { + BsWriteUE (pBs, pSlice->iMbSkipRun); + pSlice->iMbSkipRun = 0; + } + /* Step 1: write mb type and pred */ + if (IS_Inter_8x8 (pCurMb->uiMbType)) { + WelsSpatialWriteSubMbPred (pEncCtx, pSlice, pCurMb); + } else { + WelsSpatialWriteMbPred (pEncCtx, pSlice, pCurMb); + } + + /* Step 2: write coded block patern */ + if (IS_INTRA4x4 (pCurMb->uiMbType)) { + BsWriteUE (pBs, g_kuiIntra4x4CbpMap[pCurMb->uiCbp]); + } else if (!IS_INTRA16x16 (pCurMb->uiMbType)) { + BsWriteUE (pBs, g_kuiInterCbpMap[pCurMb->uiCbp]); + } + + /* Step 3: write QP and residual */ + if (pCurMb->uiCbp > 0 || IS_INTRA16x16 (pCurMb->uiMbType)) { + const int32_t kiDeltaQp = pCurMb->uiLumaQp - pSlice->uiLastMbQp; + pSlice->uiLastMbQp = pCurMb->uiLumaQp; + + BsWriteSE (pBs, kiDeltaQp); + if (WelsWriteMbResidual (pEncCtx->pFuncList, pMbCache, pCurMb, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + } else { + pCurMb->uiLumaQp = pSlice->uiLastMbQp; + pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + + pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)]; + } + + /* Step 4: Check the left buffer */ + return CheckBitstreamBuffer (pSlice->iSliceIdx, pEncCtx, pBs); + } +} + +int32_t WelsWriteMbResidual (SWelsFuncPtrList* pFuncList, SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs) { + int32_t i; + Mb_Type uiMbType = pCurMb->uiMbType; + const int32_t kiCbpChroma = pCurMb->uiCbp >> 4; + const int32_t kiCbpLuma = pCurMb->uiCbp & 0x0F; + int8_t* pNonZeroCoeffCount = sMbCacheInfo->iNonZeroCoeffCount; + int16_t* pBlock; + int8_t iA, iB, iC; + + if (IS_INTRA16x16 (uiMbType)) { + /* DC luma */ + iA = pNonZeroCoeffCount[8]; + iB = pNonZeroCoeffCount[ 1]; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, sMbCacheInfo->pDct->iLumaI16x16Dc, 15, 1, LUMA_4x4, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + + /* AC Luma */ + if (kiCbpLuma) { + pBlock = sMbCacheInfo->pDct->iLumaBlock[0]; + + for (i = 0; i < 16; i++) { + int32_t iIdx = g_kuiCache48CountScan4Idx[i]; + iA = pNonZeroCoeffCount[iIdx - 1]; + iB = pNonZeroCoeffCount[iIdx - 8]; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, LUMA_AC, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + pBlock += 16; + } + } + } else { + /* Luma DC AC */ + if (kiCbpLuma) { + pBlock = sMbCacheInfo->pDct->iLumaBlock[0]; + + for (i = 0; i < 16; i += 4) { + if (kiCbpLuma & (1 << (i >> 2))) { + int32_t iIdx = g_kuiCache48CountScan4Idx[i]; + const int8_t kiA = pNonZeroCoeffCount[iIdx]; + const int8_t kiB = pNonZeroCoeffCount[iIdx + 1]; + const int8_t kiC = pNonZeroCoeffCount[iIdx + 8]; + const int8_t kiD = pNonZeroCoeffCount[iIdx + 9]; + iA = pNonZeroCoeffCount[iIdx - 1]; + iB = pNonZeroCoeffCount[iIdx - 8]; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, pBlock, 15, kiA > 0, LUMA_4x4, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + + iA = kiA; + iB = pNonZeroCoeffCount[iIdx - 7]; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, pBlock + 16, 15, kiB > 0, LUMA_4x4, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + + iA = pNonZeroCoeffCount[iIdx + 7]; + iB = kiA; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, pBlock + 32, 15, kiC > 0, LUMA_4x4, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + + iA = kiC; + iB = kiB; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, pBlock + 48, 15, kiD > 0, LUMA_4x4, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + } + pBlock += 64; + } + } + } + + if (kiCbpChroma) { + /* Chroma DC residual present */ + pBlock = sMbCacheInfo->pDct->iChromaDc[0]; // Cb + if (WriteBlockResidualCavlc (pFuncList, pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + + pBlock += 4; // Cr + if (WriteBlockResidualCavlc (pFuncList, pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + + /* Chroma AC residual present */ + if (kiCbpChroma & 0x02) { + const uint8_t* kCache48CountScan4Idx16base = &g_kuiCache48CountScan4Idx[16]; + pBlock = sMbCacheInfo->pDct->iChromaBlock[0]; // Cb + + for (i = 0; i < 4; i++) { + int32_t iIdx = kCache48CountScan4Idx16base[i]; + iA = pNonZeroCoeffCount[iIdx - 1]; + iB = pNonZeroCoeffCount[iIdx - 8]; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + pBlock += 16; + } + + pBlock = sMbCacheInfo->pDct->iChromaBlock[4]; // Cr + + for (i = 0; i < 4; i++) { + int32_t iIdx = 24 + kCache48CountScan4Idx16base[i]; + iA = pNonZeroCoeffCount[iIdx - 1]; + iB = pNonZeroCoeffCount[iIdx - 8]; + WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB); + if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs)) + return ENC_RETURN_VLCOVERFLOWFOUND; + pBlock += 16; + } + } + } + return 0; +} + +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_preprocess.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_preprocess.cpp new file mode 100644 index 000000000..6ac79169a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_preprocess.cpp @@ -0,0 +1,1458 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "wels_preprocess.h" +#include "picture_handle.h" +#include "encoder_context.h" +#include "utils.h" +#include "encoder.h" + +namespace { + +void ClearEndOfLinePadding (uint8_t* pData, int32_t iStride, int32_t iWidth, int32_t iHeight) { + if (iWidth < iStride) { + for (int32_t i = 0; i < iHeight; ++i) + memset (pData + i * iStride + iWidth, 0, iStride - iWidth); + } +} + +} // anon ns. + +namespace WelsEnc { + + + +//***** entry API declaration ************************************************************************// + +int32_t WelsInitScaledPic (SWelsSvcCodingParam* pParam, Scaled_Picture* pScaledPic, CMemoryAlign* pMemoryAlign); +bool JudgeNeedOfScaling (SWelsSvcCodingParam* pParam, Scaled_Picture* pScaledPic); +void FreeScaledPic (Scaled_Picture* pScaledPic, CMemoryAlign* pMemoryAlign); +void WelsMoveMemory_c (uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStrideY, int32_t iDstStrideUV, + uint8_t* pSrcY, uint8_t* pSrcU, uint8_t* pSrcV, int32_t iSrcStrideY, int32_t iSrcStrideUV, int32_t iWidth, + int32_t iHeight); + +//******* table definition ***********************************************************************// +const uint8_t g_kuiRefTemporalIdx[MAX_TEMPORAL_LEVEL][MAX_GOP_SIZE] = { + { 0, }, // 0 + { 0, 0, }, // 1 + { 0, 0, 0, 1, }, // 2 + { 0, 0, 0, 2, 0, 1, 1, 2, }, // 3 +}; + +const int32_t g_kiPixMapSizeInBits = sizeof (uint8_t) * 8; + + +inline void WelsUpdateSpatialIdxMap (sWelsEncCtx* pEncCtx, const int32_t iPos, SPicture* const pPic, + const int32_t iDidx) { + pEncCtx->sSpatialIndexMap[iPos].pSrc = pPic; + pEncCtx->sSpatialIndexMap[iPos].iDid = iDidx; +} + + +/*************************************************************************** +* +* implement of the interface +* +***************************************************************************/ + + + +CWelsPreProcess* CWelsPreProcess::CreatePreProcess (sWelsEncCtx* pEncCtx) { + + CWelsPreProcess* pPreProcess = NULL; + switch (pEncCtx->pSvcParam->iUsageType) { + case SCREEN_CONTENT_REAL_TIME: + pPreProcess = WELS_NEW_OP (CWelsPreProcessScreen (pEncCtx), + CWelsPreProcessScreen); + break; + default: + pPreProcess = WELS_NEW_OP (CWelsPreProcessVideo (pEncCtx), + CWelsPreProcessVideo); + break; + + } + WELS_VERIFY_RETURN_IF (NULL, NULL == pPreProcess) + return pPreProcess; +} + + +CWelsPreProcess::CWelsPreProcess (sWelsEncCtx* pEncCtx) { + m_pInterfaceVp = NULL; + m_bInitDone = false; + m_pEncCtx = pEncCtx; + memset (&m_sScaledPicture, 0, sizeof (m_sScaledPicture)); + memset (m_pSpatialPic, 0, sizeof (m_pSpatialPic)); + memset (m_uiSpatialLayersInTemporal, 0, sizeof (m_uiSpatialLayersInTemporal)); + memset (m_uiSpatialPicNum, 0, sizeof (m_uiSpatialPicNum)); +} + +CWelsPreProcess::~CWelsPreProcess() { + FreeScaledPic (&m_sScaledPicture, m_pEncCtx->pMemAlign); + WelsPreprocessDestroy(); +} + +int32_t CWelsPreProcess::WelsPreprocessCreate() { + if (m_pInterfaceVp == NULL) { + WelsCreateVpInterface ((void**) &m_pInterfaceVp, WELSVP_INTERFACE_VERION); + if (!m_pInterfaceVp) + goto exit; + } else + goto exit; + + return 0; + +exit: + WelsPreprocessDestroy(); + return 1; +} + +int32_t CWelsPreProcess::WelsPreprocessDestroy() { + WelsDestroyVpInterface (m_pInterfaceVp, WELSVP_INTERFACE_VERION); + m_pInterfaceVp = NULL; + + return 0; +} + +int32_t CWelsPreProcess::WelsPreprocessReset (sWelsEncCtx* pCtx, int32_t iWidth, int32_t iHeight) { + int32_t iRet = -1; + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + //init source width and height + pSvcParam->SUsedPicRect.iLeft = 0; + pSvcParam->SUsedPicRect.iTop = 0; + pSvcParam->SUsedPicRect.iWidth = iWidth; + pSvcParam->SUsedPicRect.iHeight = iHeight; + if ((iWidth < 16) || ((iHeight < 16))) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "Don't support width(%d) or height(%d) which is less than 16 ", iWidth, + iHeight); + return iRet; + } + if (pCtx) { + FreeScaledPic (&m_sScaledPicture, pCtx->pMemAlign); + iRet = InitLastSpatialPictures (pCtx); + iRet = WelsInitScaledPic (pCtx->pSvcParam, &m_sScaledPicture, pCtx->pMemAlign); + } + + return iRet; +} + +int32_t CWelsPreProcess::AllocSpatialPictures (sWelsEncCtx* pCtx, SWelsSvcCodingParam* pParam) { + CMemoryAlign* pMa = pCtx->pMemAlign; + const int32_t kiDlayerCount = pParam->iSpatialLayerNum; + int32_t iDlayerIndex = 0; + + // spatial pictures + iDlayerIndex = 0; + do { + const int32_t kiPicWidth = pParam->sSpatialLayers[iDlayerIndex].iVideoWidth; + const int32_t kiPicHeight = pParam->sSpatialLayers[iDlayerIndex].iVideoHeight; + const uint8_t kuiLayerInTemporal = 2 + WELS_MAX (pParam->sDependencyLayers[iDlayerIndex].iHighestTemporalId, 1); + const uint8_t kuiRefNumInTemporal = kuiLayerInTemporal + pParam->iLTRRefNum; + uint8_t i = 0; + + m_uiSpatialPicNum[iDlayerIndex] = kuiRefNumInTemporal; + do { + SPicture* pPic = AllocPicture (pMa, kiPicWidth, kiPicHeight, false, 0); + WELS_VERIFY_RETURN_IF (1, (NULL == pPic)) + m_pSpatialPic[iDlayerIndex][i] = pPic; + ++ i; + } while (i < kuiRefNumInTemporal); + + if (pParam->iUsageType == SCREEN_CONTENT_REAL_TIME) + m_uiSpatialLayersInTemporal[iDlayerIndex] = 1; + else + m_uiSpatialLayersInTemporal[iDlayerIndex] = kuiLayerInTemporal; + + ++ iDlayerIndex; + } while (iDlayerIndex < kiDlayerCount); + + return 0; +} + +void CWelsPreProcess::FreeSpatialPictures (sWelsEncCtx* pCtx) { + CMemoryAlign* pMa = pCtx->pMemAlign; + int32_t j = 0; + while (j < pCtx->pSvcParam->iSpatialLayerNum) { + uint8_t i = 0; + uint8_t uiRefNumInTemporal = m_uiSpatialPicNum[j]; + + while (i < uiRefNumInTemporal) { + if (NULL != m_pSpatialPic[j][i]) { + FreePicture (pMa, &m_pSpatialPic[j][i]); + } + ++ i; + } + m_uiSpatialLayersInTemporal[j] = 0; + ++ j; + } +} + +int32_t CWelsPreProcess::BuildSpatialPicList (sWelsEncCtx* pCtx, const SSourcePicture* kpSrcPic) { + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + int32_t iSpatialNum = 0; + int32_t iWidth = ((kpSrcPic->iPicWidth >> 1) << 1); + int32_t iHeight = ((kpSrcPic->iPicHeight >> 1) << 1); + + if (!m_bInitDone) { + if (WelsPreprocessCreate() != 0) + return -1; + + if (WelsPreprocessReset (pCtx, iWidth, iHeight) != 0) + return -1; + + m_iAvaliableRefInSpatialPicList = pSvcParam->iNumRefFrame; + + m_bInitDone = true; + } else { + if ((iWidth != pSvcParam->SUsedPicRect.iWidth) || (iHeight != pSvcParam->SUsedPicRect.iHeight)) { + if (WelsPreprocessReset (pCtx, iWidth, iHeight) != 0) + return -1; + } + } + + if (m_pInterfaceVp == NULL) + return -1; + + pCtx->pVaa->bSceneChangeFlag = pCtx->pVaa->bIdrPeriodFlag = false; + + iSpatialNum = SingleLayerPreprocess (pCtx, kpSrcPic, &m_sScaledPicture); + + return iSpatialNum; +} + +SPicture* CWelsPreProcess::GetBestRefPic (EUsageType iUsageType, bool bSceneLtr, EWelsSliceType eSliceType, + int32_t kiDidx, int32_t iRefTemporalIdx) { + assert (iUsageType == SCREEN_CONTENT_REAL_TIME); + SVAAFrameInfoExt* pVaaExt = static_cast (m_pEncCtx->pVaa); + SRefInfoParam* BestRefCandidateParam = (bSceneLtr) ? (& (pVaaExt->sVaaLtrBestRefCandidate[0])) : + (& (pVaaExt->sVaaStrBestRefCandidate[0])); + return m_pSpatialPic[0][BestRefCandidateParam->iSrcListIdx]; + +} +SPicture* CWelsPreProcess::GetBestRefPic (const int32_t kiDidx, const int32_t iRefTemporalIdx) { + + return m_pSpatialPic[kiDidx][iRefTemporalIdx]; +} + +int32_t CWelsPreProcess::AnalyzeSpatialPic (sWelsEncCtx* pCtx, const int32_t kiDidx) { + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + bool bNeededMbAq = (pSvcParam->bEnableAdaptiveQuant && (pCtx->eSliceType == P_SLICE)); + bool bCalculateBGD = (pCtx->eSliceType == P_SLICE && pSvcParam->bEnableBackgroundDetection); + SSpatialLayerInternal* pParamInternal = &pSvcParam->sDependencyLayers[kiDidx]; + int32_t iCurTemporalIdx = m_uiSpatialLayersInTemporal[kiDidx] - 1; + + int32_t iRefTemporalIdx = (int32_t)g_kuiRefTemporalIdx[pSvcParam->iDecompStages][pParamInternal->iCodingIndex & + (pSvcParam->uiGopSize - 1)]; + if (pCtx->uiTemporalId == 0 && pCtx->pLtr[pCtx->uiDependencyId].bReceivedT0LostFlag) + iRefTemporalIdx = m_uiSpatialLayersInTemporal[kiDidx] + pCtx->pVaa->uiValidLongTermPicIdx; + + SPicture* pCurPic = m_pSpatialPic[kiDidx][iCurTemporalIdx]; + bool bCalculateVar = (pSvcParam->iRCMode >= RC_BITRATE_MODE && pCtx->eSliceType == I_SLICE); + + if (pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + SPicture* pRefPic = GetBestRefPic (pSvcParam->iUsageType, pCtx->bCurFrameMarkedAsSceneLtr, pCtx->eSliceType, kiDidx, + iRefTemporalIdx); + + VaaCalculation (pCtx->pVaa, pCurPic, pRefPic, false, bCalculateVar, bCalculateBGD); + + if (pSvcParam->bEnableBackgroundDetection) { + BackgroundDetection (pCtx->pVaa, pCurPic, pRefPic, bCalculateBGD && pRefPic->iPictureType != I_SLICE); + } + if (bNeededMbAq) { + AdaptiveQuantCalculation (pCtx->pVaa, pCurPic, pRefPic); + } + } else { + SPicture* pRefPic = GetBestRefPic (kiDidx, iRefTemporalIdx); + SPicture* pLastPic = m_pLastSpatialPicture[kiDidx][0]; + bool bCalculateSQDiff = ((pLastPic->pData[0] == pRefPic->pData[0]) && bNeededMbAq); + + VaaCalculation (pCtx->pVaa, pCurPic, pRefPic, bCalculateSQDiff, bCalculateVar, bCalculateBGD); + + if (pSvcParam->bEnableBackgroundDetection) { + BackgroundDetection (pCtx->pVaa, pCurPic, pRefPic, bCalculateBGD && pRefPic->iPictureType != I_SLICE); + } + + if (bNeededMbAq) { + AdaptiveQuantCalculation (pCtx->pVaa, m_pLastSpatialPicture[kiDidx][1], m_pLastSpatialPicture[kiDidx][0]); + } + } + return 0; +} + +int32_t CWelsPreProcess::GetCurPicPosition (const int32_t kiDidx) { + return (m_uiSpatialLayersInTemporal[kiDidx] - 1); +} + +int32_t CWelsPreProcess::UpdateSpatialPictures (sWelsEncCtx* pCtx, SWelsSvcCodingParam* pParam, + const int8_t iCurTid, const int32_t kiDidx) { + if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) + return 0; + + WelsExchangeSpatialPictures (&m_pLastSpatialPicture[kiDidx][1], &m_pLastSpatialPicture[kiDidx][0]); + + const int32_t kiCurPos = GetCurPicPosition (kiDidx); + if (iCurTid < kiCurPos || pParam->iDecompStages == 0) { + if ((iCurTid >= MAX_TEMPORAL_LEVEL) || (kiCurPos > MAX_TEMPORAL_LEVEL)) { + InitLastSpatialPictures (pCtx); + return 1; + } + if (pCtx->bRefOfCurTidIsLtr[kiDidx][iCurTid]) { + const int32_t kiAvailableLtrPos = m_uiSpatialLayersInTemporal[kiDidx] + pCtx->pVaa->uiMarkLongTermPicIdx; + WelsExchangeSpatialPictures (&m_pSpatialPic[kiDidx][kiAvailableLtrPos], + &m_pSpatialPic[kiDidx][iCurTid]); + pCtx->bRefOfCurTidIsLtr[kiDidx][iCurTid] = false; + } + WelsExchangeSpatialPictures (&m_pSpatialPic[kiDidx][kiCurPos], + &m_pSpatialPic[kiDidx][iCurTid]); + + + } + return 0; +} + + +/* + * SingleLayerPreprocess: down sampling if applicable + * @return: exact number of spatial layers need to encoder indeed + */ +int32_t CWelsPreProcess::SingleLayerPreprocess (sWelsEncCtx* pCtx, const SSourcePicture* kpSrc, + Scaled_Picture* pScaledPicture) { + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + int8_t iDependencyId = pSvcParam->iSpatialLayerNum - 1; + + SPicture* pSrcPic = NULL; // large + SPicture* pDstPic = NULL; // small + SSpatialLayerConfig* pDlayerParam = NULL; + SSpatialLayerInternal* pDlayerParamInternal = NULL; + int32_t iSpatialNum = 0; + int32_t iSrcWidth = 0; + int32_t iSrcHeight = 0; + int32_t iTargetWidth = 0; + int32_t iTargetHeight = 0; + int32_t iTemporalId = 0; + int32_t iClosestDid = iDependencyId; + pDlayerParamInternal = &pSvcParam->sDependencyLayers[iDependencyId]; + pDlayerParam = &pSvcParam->sSpatialLayers[iDependencyId]; + iTargetWidth = pDlayerParam->iVideoWidth; + iTargetHeight = pDlayerParam->iVideoHeight; + + iSrcWidth = pSvcParam->SUsedPicRect.iWidth; + iSrcHeight = pSvcParam->SUsedPicRect.iHeight; + if (pSvcParam->uiIntraPeriod) { + pCtx->pVaa->bIdrPeriodFlag = (1 + pDlayerParamInternal->iFrameIndex >= (int32_t)pSvcParam->uiIntraPeriod) ? true : + false; + if (pCtx->pVaa->bIdrPeriodFlag) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, + "pSvcParam->uiIntraPeriod=%d, pCtx->pVaa->bIdrPeriodFlag=%d", + pSvcParam->uiIntraPeriod, + pCtx->pVaa->bIdrPeriodFlag); + } + } + + pSrcPic = pScaledPicture->pScaledInputPicture ? pScaledPicture->pScaledInputPicture : GetCurrentOrigFrame ( + iDependencyId); + + WelsMoveMemoryWrapper (pSvcParam, pSrcPic, kpSrc, iSrcWidth, iSrcHeight); + + if (pSvcParam->bEnableDenoise) + BilateralDenoising (pSrcPic, iSrcWidth, iSrcHeight); + + // different scaling in between input picture and dst highest spatial picture. + int32_t iShrinkWidth = iSrcWidth; + int32_t iShrinkHeight = iSrcHeight; + pDstPic = pSrcPic; + if (pScaledPicture->pScaledInputPicture) { + // for highest downsampling + pDstPic = GetCurrentOrigFrame (iDependencyId); + iShrinkWidth = pScaledPicture->iScaledWidth[iDependencyId]; + iShrinkHeight = pScaledPicture->iScaledHeight[iDependencyId]; + } + DownsamplePadding (pSrcPic, pDstPic, iSrcWidth, iSrcHeight, iShrinkWidth, iShrinkHeight, iTargetWidth, iTargetHeight, + false); + + if (pSvcParam->bEnableSceneChangeDetect && !pCtx->pVaa->bIdrPeriodFlag) { + if (pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + pCtx->pVaa->eSceneChangeIdc = (pDlayerParamInternal->bEncCurFrmAsIdrFlag ? LARGE_CHANGED_SCENE : + DetectSceneChange (pDstPic)); + pCtx->pVaa->bSceneChangeFlag = (LARGE_CHANGED_SCENE == pCtx->pVaa->eSceneChangeIdc); + } else { + if ((!pDlayerParamInternal->bEncCurFrmAsIdrFlag) + && ! (pDlayerParamInternal->iCodingIndex & (pSvcParam->uiGopSize - 1))) { + SPicture* pRefPic = pCtx->pLtr[iDependencyId].bReceivedT0LostFlag ? + m_pSpatialPic[iDependencyId][m_uiSpatialLayersInTemporal[iDependencyId] + + pCtx->pVaa->uiValidLongTermPicIdx] : m_pLastSpatialPicture[iDependencyId][0]; + //pCtx->pVaa->eSceneChangeIdc = DetectSceneChange (pDstPic, pRefPic); + pCtx->pVaa->bSceneChangeFlag = GetSceneChangeFlag (DetectSceneChange (pDstPic, pRefPic)); + } + } + } + + for (int32_t i = 0; i < pSvcParam->iSpatialLayerNum; i++) { + pDlayerParamInternal = &pSvcParam->sDependencyLayers[i]; + iTemporalId = pDlayerParamInternal->uiCodingIdx2TemporalId[pDlayerParamInternal->iCodingIndex & + (pSvcParam->uiGopSize - 1)]; + if (iTemporalId != INVALID_TEMPORAL_ID) { + ++ iSpatialNum; + } + } + pDlayerParamInternal = &pSvcParam->sDependencyLayers[iDependencyId]; + iTemporalId = pDlayerParamInternal->uiCodingIdx2TemporalId[pDlayerParamInternal->iCodingIndex & + (pSvcParam->uiGopSize - 1)]; + int iActualSpatialNum = iSpatialNum - 1; + if (iTemporalId != INVALID_TEMPORAL_ID) { + WelsUpdateSpatialIdxMap (pCtx, iActualSpatialNum, pDstPic, iDependencyId); + -- iActualSpatialNum; + } + + m_pLastSpatialPicture[iDependencyId][1] = GetCurrentOrigFrame (iDependencyId); + -- iDependencyId; + + + // generate other spacial layer + // pSrc is + // -- padded input pic, if downsample should be applied to generate highest layer, [if] block above + // -- highest layer, if no downsampling, [else] block above + if (pSvcParam->iSpatialLayerNum > 1) { + while (iDependencyId >= 0) { + pDlayerParamInternal = &pSvcParam->sDependencyLayers[iDependencyId]; + pDlayerParam = &pSvcParam->sSpatialLayers[iDependencyId]; + SPicture* pSrcPic = m_pLastSpatialPicture[iClosestDid][1]; // large + iTargetWidth = pDlayerParam->iVideoWidth; + iTargetHeight = pDlayerParam->iVideoHeight; + iTemporalId = pDlayerParamInternal->uiCodingIdx2TemporalId[pDlayerParamInternal->iCodingIndex & + (pSvcParam->uiGopSize - 1)]; + + // down sampling performed + int32_t iSrcWidth = pScaledPicture->iScaledWidth[iClosestDid]; + int32_t iSrcHeight = pScaledPicture->iScaledHeight[iClosestDid]; + pDstPic = GetCurrentOrigFrame (iDependencyId); // small + iShrinkWidth = pScaledPicture->iScaledWidth[iDependencyId]; + iShrinkHeight = pScaledPicture->iScaledHeight[iDependencyId]; + DownsamplePadding (pSrcPic, pDstPic, iSrcWidth, iSrcHeight, iShrinkWidth, iShrinkHeight, iTargetWidth, iTargetHeight, + true); + + if ((iTemporalId != INVALID_TEMPORAL_ID)) { + WelsUpdateSpatialIdxMap (pCtx, iActualSpatialNum, pDstPic, iDependencyId); + iActualSpatialNum--; + } + + m_pLastSpatialPicture[iDependencyId][1] = pDstPic; + + iClosestDid = iDependencyId; + -- iDependencyId; + + } + } + return iSpatialNum; + +} + + +/*! + * \brief Whether input picture need be scaled? + */ +bool JudgeNeedOfScaling (SWelsSvcCodingParam* pParam, Scaled_Picture* pScaledPicture) { + const int32_t kiInputPicWidth = pParam->SUsedPicRect.iWidth; + const int32_t kiInputPicHeight = pParam->SUsedPicRect.iHeight; + const int32_t kiDstPicWidth = pParam->sDependencyLayers[pParam->iSpatialLayerNum - 1].iActualWidth; + const int32_t kiDstPicHeight = pParam->sDependencyLayers[pParam->iSpatialLayerNum - 1].iActualHeight; + bool bNeedDownsampling = true; + + int32_t iSpatialIdx = pParam->iSpatialLayerNum - 1; + + if (kiDstPicWidth >= kiInputPicWidth && kiDstPicHeight >= kiInputPicHeight) { + bNeedDownsampling = false; + } + + for (; iSpatialIdx >= 0; iSpatialIdx --) { + SSpatialLayerInternal* pCurLayer = &pParam->sDependencyLayers[iSpatialIdx]; + int32_t iCurDstWidth = pCurLayer->iActualWidth; + int32_t iCurDstHeight = pCurLayer->iActualHeight; + int32_t iInputWidthXDstHeight = kiInputPicWidth * iCurDstHeight; + int32_t iInputHeightXDstWidth = kiInputPicHeight * iCurDstWidth; + + if (iInputWidthXDstHeight > iInputHeightXDstWidth) { + pScaledPicture->iScaledWidth[iSpatialIdx] = WELS_MAX (iCurDstWidth, 4); + pScaledPicture->iScaledHeight[iSpatialIdx] = WELS_MAX (iInputHeightXDstWidth / kiInputPicWidth, 4); + } else { + pScaledPicture->iScaledWidth[iSpatialIdx] = WELS_MAX (iInputWidthXDstHeight / kiInputPicHeight, 4); + pScaledPicture->iScaledHeight[iSpatialIdx] = WELS_MAX (iCurDstHeight, 4); + } + } + + return bNeedDownsampling; +} + +int32_t WelsInitScaledPic (SWelsSvcCodingParam* pParam, Scaled_Picture* pScaledPicture, CMemoryAlign* pMemoryAlign) { + bool bInputPicNeedScaling = JudgeNeedOfScaling (pParam, pScaledPicture); + if (bInputPicNeedScaling) { + pScaledPicture->pScaledInputPicture = AllocPicture (pMemoryAlign, pParam->SUsedPicRect.iWidth, + pParam->SUsedPicRect.iHeight, false, 0); + if (pScaledPicture->pScaledInputPicture == NULL) + return -1; + + // Avoid valgrind false positives. + // + // X86 SIMD downsampling routines may, for convenience, read slightly beyond + // the input data and into the alignment padding area beyond each line. This + // causes valgrind to warn about uninitialized values even if these values + // only affect lanes of a SIMD vector that are effectively never used. + // + // Avoid these false positives by zero-initializing the padding area beyond + // each line of the source buffer used for downsampling. + SPicture* pPic = pScaledPicture->pScaledInputPicture; + ClearEndOfLinePadding (pPic->pData[0], pPic->iLineSize[0], pPic->iWidthInPixel, pPic->iHeightInPixel); + ClearEndOfLinePadding (pPic->pData[1], pPic->iLineSize[1], pPic->iWidthInPixel >> 1, pPic->iHeightInPixel >> 1); + ClearEndOfLinePadding (pPic->pData[2], pPic->iLineSize[2], pPic->iWidthInPixel >> 1, pPic->iHeightInPixel >> 1); + } + return 0; +} + +void FreeScaledPic (Scaled_Picture* pScaledPicture, CMemoryAlign* pMemoryAlign) { + if (pScaledPicture->pScaledInputPicture) { + FreePicture (pMemoryAlign, &pScaledPicture->pScaledInputPicture); + pScaledPicture->pScaledInputPicture = NULL; + } +} + +int32_t CWelsPreProcess::InitLastSpatialPictures (sWelsEncCtx* pCtx) { + SWelsSvcCodingParam* pParam = pCtx->pSvcParam; + const int32_t kiDlayerCount = pParam->iSpatialLayerNum; + int32_t iDlayerIndex = 0; + if (pParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + for (; iDlayerIndex < MAX_DEPENDENCY_LAYER; iDlayerIndex++) { + m_pLastSpatialPicture[iDlayerIndex][0] = m_pLastSpatialPicture[iDlayerIndex][1] = NULL; + } + } else { + for (; iDlayerIndex < kiDlayerCount; iDlayerIndex++) { + const int32_t kiLayerInTemporal = m_uiSpatialLayersInTemporal[iDlayerIndex]; + m_pLastSpatialPicture[iDlayerIndex][0] = m_pSpatialPic[iDlayerIndex][kiLayerInTemporal - 2]; + m_pLastSpatialPicture[iDlayerIndex][1] = NULL; + } + for (; iDlayerIndex < MAX_DEPENDENCY_LAYER; iDlayerIndex++) { + m_pLastSpatialPicture[iDlayerIndex][0] = m_pLastSpatialPicture[iDlayerIndex][1] = NULL; + } + } + return 0; +} +//*********************************************************************************************************/ + +int32_t CWelsPreProcess::ColorspaceConvert (SWelsSvcCodingParam* pSvcParam, SPicture* pDstPic, + const SSourcePicture* kpSrc, const int32_t kiWidth, const int32_t kiHeight) { + return 1; + //not support yet +} + +void CWelsPreProcess::BilateralDenoising (SPicture* pSrc, const int32_t kiWidth, const int32_t kiHeight) { + int32_t iMethodIdx = METHOD_DENOISE; + SPixMap sSrcPixMap; + memset (&sSrcPixMap, 0, sizeof (sSrcPixMap)); + sSrcPixMap.pPixel[0] = pSrc->pData[0]; + sSrcPixMap.pPixel[1] = pSrc->pData[1]; + sSrcPixMap.pPixel[2] = pSrc->pData[2]; + sSrcPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sSrcPixMap.sRect.iRectWidth = kiWidth; + sSrcPixMap.sRect.iRectHeight = kiHeight; + sSrcPixMap.iStride[0] = pSrc->iLineSize[0]; + sSrcPixMap.iStride[1] = pSrc->iLineSize[1]; + sSrcPixMap.iStride[2] = pSrc->iLineSize[2]; + sSrcPixMap.eFormat = VIDEO_FORMAT_I420; + + m_pInterfaceVp->Process (iMethodIdx, &sSrcPixMap, NULL); +} + +ESceneChangeIdc CWelsPreProcessVideo::DetectSceneChange (SPicture* pCurPicture, SPicture* pRefPicture) { + int32_t iMethodIdx = METHOD_SCENE_CHANGE_DETECTION_VIDEO; + SSceneChangeResult sSceneChangeDetectResult = { SIMILAR_SCENE }; + SPixMap sSrcPixMap; + SPixMap sRefPixMap; + memset (&sSrcPixMap, 0, sizeof (sSrcPixMap)); + memset (&sRefPixMap, 0, sizeof (sRefPixMap)); + sSrcPixMap.pPixel[0] = pCurPicture->pData[0]; + sSrcPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sSrcPixMap.iStride[0] = pCurPicture->iLineSize[0]; + sSrcPixMap.sRect.iRectWidth = pCurPicture->iWidthInPixel; + sSrcPixMap.sRect.iRectHeight = pCurPicture->iHeightInPixel; + sSrcPixMap.eFormat = VIDEO_FORMAT_I420; + + sRefPixMap.pPixel[0] = pRefPicture->pData[0]; + sRefPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sRefPixMap.iStride[0] = pRefPicture->iLineSize[0]; + sRefPixMap.sRect.iRectWidth = pRefPicture->iWidthInPixel; + sRefPixMap.sRect.iRectHeight = pRefPicture->iHeightInPixel; + sRefPixMap.eFormat = VIDEO_FORMAT_I420; + + int32_t iRet = m_pInterfaceVp->Process (iMethodIdx, &sSrcPixMap, &sRefPixMap); + if (iRet == 0) { + m_pInterfaceVp->Get (iMethodIdx, (void*)&sSceneChangeDetectResult); + //bSceneChangeFlag = (sSceneChangeDetectResult.eSceneChangeIdc == LARGE_CHANGED_SCENE) ? true : false; + } + return sSceneChangeDetectResult.eSceneChangeIdc; +} + +SPicture* CWelsPreProcessVideo::GetCurrentOrigFrame (int32_t iDIdx) { + return m_pSpatialPic[iDIdx][GetCurPicPosition (iDIdx)]; +} + +int32_t CWelsPreProcess::DownsamplePadding (SPicture* pSrc, SPicture* pDstPic, int32_t iSrcWidth, int32_t iSrcHeight, + int32_t iShrinkWidth, int32_t iShrinkHeight, int32_t iTargetWidth, int32_t iTargetHeight, bool bForceCopy) { + int32_t iRet = 0; + SPixMap sSrcPixMap; + SPixMap sDstPicMap; + memset (&sSrcPixMap, 0, sizeof (sSrcPixMap)); + memset (&sDstPicMap, 0, sizeof (sDstPicMap)); + sSrcPixMap.pPixel[0] = pSrc->pData[0]; + sSrcPixMap.pPixel[1] = pSrc->pData[1]; + sSrcPixMap.pPixel[2] = pSrc->pData[2]; + sSrcPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sSrcPixMap.sRect.iRectWidth = iSrcWidth; + sSrcPixMap.sRect.iRectHeight = iSrcHeight; + sSrcPixMap.iStride[0] = pSrc->iLineSize[0]; + sSrcPixMap.iStride[1] = pSrc->iLineSize[1]; + sSrcPixMap.iStride[2] = pSrc->iLineSize[2]; + sSrcPixMap.eFormat = VIDEO_FORMAT_I420; + + if (iSrcWidth != iShrinkWidth || iSrcHeight != iShrinkHeight || bForceCopy) { + int32_t iMethodIdx = METHOD_DOWNSAMPLE; + sDstPicMap.pPixel[0] = pDstPic->pData[0]; + sDstPicMap.pPixel[1] = pDstPic->pData[1]; + sDstPicMap.pPixel[2] = pDstPic->pData[2]; + sDstPicMap.iSizeInBits = g_kiPixMapSizeInBits; + sDstPicMap.sRect.iRectWidth = iShrinkWidth; + sDstPicMap.sRect.iRectHeight = iShrinkHeight; + sDstPicMap.iStride[0] = pDstPic->iLineSize[0]; + sDstPicMap.iStride[1] = pDstPic->iLineSize[1]; + sDstPicMap.iStride[2] = pDstPic->iLineSize[2]; + sDstPicMap.eFormat = VIDEO_FORMAT_I420; + + if (iSrcWidth != iShrinkWidth || iSrcHeight != iShrinkHeight) { + iRet = m_pInterfaceVp->Process (iMethodIdx, &sSrcPixMap, &sDstPicMap); + } else { + WelsMoveMemory_c (pDstPic->pData[0], pDstPic->pData[1], pDstPic->pData[2], pDstPic->iLineSize[0], pDstPic->iLineSize[1], + pSrc->pData[0], pSrc->pData[1], pSrc->pData[2], pSrc->iLineSize[0], pSrc->iLineSize[1], + iSrcWidth, iSrcHeight); + } + } else { + memcpy (&sDstPicMap, &sSrcPixMap, sizeof (sDstPicMap)); // confirmed_safe_unsafe_usage + } + + // get rid of odd line + iShrinkWidth -= (iShrinkWidth & 1); + iShrinkHeight -= (iShrinkHeight & 1); + Padding ((uint8_t*)sDstPicMap.pPixel[0], (uint8_t*)sDstPicMap.pPixel[1], (uint8_t*)sDstPicMap.pPixel[2], + sDstPicMap.iStride[0], sDstPicMap.iStride[1], iShrinkWidth, iTargetWidth, iShrinkHeight, iTargetHeight); + + return iRet; +} + +//*********************************************************************************************************/ +void CWelsPreProcess::VaaCalculation (SVAAFrameInfo* pVaaInfo, SPicture* pCurPicture, SPicture* pRefPicture, + bool bCalculateSQDiff, bool bCalculateVar, bool bCalculateBGD) { + pVaaInfo->sVaaCalcInfo.pCurY = pCurPicture->pData[0]; + pVaaInfo->sVaaCalcInfo.pRefY = pRefPicture->pData[0]; + { + int32_t iMethodIdx = METHOD_VAA_STATISTICS; + SPixMap sCurPixMap; + SPixMap sRefPixMap; + memset (&sCurPixMap, 0, sizeof (sCurPixMap)); + memset (&sRefPixMap, 0, sizeof (sRefPixMap)); + SVAACalcParam calc_param = {0}; + + sCurPixMap.pPixel[0] = pCurPicture->pData[0]; + sCurPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sCurPixMap.sRect.iRectWidth = pCurPicture->iWidthInPixel; + sCurPixMap.sRect.iRectHeight = pCurPicture->iHeightInPixel; + sCurPixMap.iStride[0] = pCurPicture->iLineSize[0]; + sCurPixMap.eFormat = VIDEO_FORMAT_I420; + + sRefPixMap.pPixel[0] = pRefPicture->pData[0]; + sRefPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sRefPixMap.sRect.iRectWidth = pRefPicture->iWidthInPixel; + sRefPixMap.sRect.iRectHeight = pRefPicture->iHeightInPixel; + sRefPixMap.iStride[0] = pRefPicture->iLineSize[0]; + sRefPixMap.eFormat = VIDEO_FORMAT_I420; + + calc_param.iCalcVar = bCalculateVar; + calc_param.iCalcBgd = bCalculateBGD; + calc_param.iCalcSsd = bCalculateSQDiff; + calc_param.pCalcResult = &pVaaInfo->sVaaCalcInfo; + + m_pInterfaceVp->Set (iMethodIdx, &calc_param); + m_pInterfaceVp->Process (iMethodIdx, &sCurPixMap, &sRefPixMap); + } +} + +void CWelsPreProcess::BackgroundDetection (SVAAFrameInfo* pVaaInfo, SPicture* pCurPicture, SPicture* pRefPicture, + bool bDetectFlag) { + if (bDetectFlag) { + pVaaInfo->iPicWidth = pCurPicture->iWidthInPixel; + pVaaInfo->iPicHeight = pCurPicture->iHeightInPixel; + + pVaaInfo->iPicStride = pCurPicture->iLineSize[0]; + pVaaInfo->iPicStrideUV = pCurPicture->iLineSize[1]; + pVaaInfo->pCurY = pCurPicture->pData[0]; + pVaaInfo->pRefY = pRefPicture->pData[0]; + pVaaInfo->pCurU = pCurPicture->pData[1]; + pVaaInfo->pRefU = pRefPicture->pData[1]; + pVaaInfo->pCurV = pCurPicture->pData[2]; + pVaaInfo->pRefV = pRefPicture->pData[2]; + + int32_t iMethodIdx = METHOD_BACKGROUND_DETECTION; + SPixMap sSrcPixMap; + SPixMap sRefPixMap; + memset (&sSrcPixMap, 0, sizeof (sSrcPixMap)); + memset (&sRefPixMap, 0, sizeof (sRefPixMap)); + SBGDInterface BGDParam = {0}; + + sSrcPixMap.pPixel[0] = pCurPicture->pData[0]; + sSrcPixMap.pPixel[1] = pCurPicture->pData[1]; + sSrcPixMap.pPixel[2] = pCurPicture->pData[2]; + sSrcPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sSrcPixMap.iStride[0] = pCurPicture->iLineSize[0]; + sSrcPixMap.iStride[1] = pCurPicture->iLineSize[1]; + sSrcPixMap.iStride[2] = pCurPicture->iLineSize[2]; + sSrcPixMap.sRect.iRectWidth = pCurPicture->iWidthInPixel; + sSrcPixMap.sRect.iRectHeight = pCurPicture->iHeightInPixel; + sSrcPixMap.eFormat = VIDEO_FORMAT_I420; + + sRefPixMap.pPixel[0] = pRefPicture->pData[0]; + sRefPixMap.pPixel[1] = pRefPicture->pData[1]; + sRefPixMap.pPixel[2] = pRefPicture->pData[2]; + sRefPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sRefPixMap.iStride[0] = pRefPicture->iLineSize[0]; + sRefPixMap.iStride[1] = pRefPicture->iLineSize[1]; + sRefPixMap.iStride[2] = pRefPicture->iLineSize[2]; + sRefPixMap.sRect.iRectWidth = pRefPicture->iWidthInPixel; + sRefPixMap.sRect.iRectHeight = pRefPicture->iHeightInPixel; + sRefPixMap.eFormat = VIDEO_FORMAT_I420; + + BGDParam.pBackgroundMbFlag = pVaaInfo->pVaaBackgroundMbFlag; + BGDParam.pCalcRes = & (pVaaInfo->sVaaCalcInfo); + m_pInterfaceVp->Set (iMethodIdx, (void*)&BGDParam); + m_pInterfaceVp->Process (iMethodIdx, &sSrcPixMap, &sRefPixMap); + } else { + int32_t iPicWidthInMb = (pCurPicture->iWidthInPixel + 15) >> 4; + int32_t iPicHeightInMb = (pCurPicture->iHeightInPixel + 15) >> 4; + memset (pVaaInfo->pVaaBackgroundMbFlag, 0, iPicWidthInMb * iPicHeightInMb); + } +} + +void CWelsPreProcess::AdaptiveQuantCalculation (SVAAFrameInfo* pVaaInfo, SPicture* pCurPicture, SPicture* pRefPicture) { + pVaaInfo->sAdaptiveQuantParam.pCalcResult = & (pVaaInfo->sVaaCalcInfo); + pVaaInfo->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp = 0; + + { + int32_t iMethodIdx = METHOD_ADAPTIVE_QUANT; + SPixMap pSrc; + SPixMap pRef; + memset (&pSrc, 0, sizeof (pSrc)); + memset (&pRef, 0, sizeof (pRef)); + int32_t iRet = 0; + + pSrc.pPixel[0] = pCurPicture->pData[0]; + pSrc.iSizeInBits = g_kiPixMapSizeInBits; + pSrc.iStride[0] = pCurPicture->iLineSize[0]; + pSrc.sRect.iRectWidth = pCurPicture->iWidthInPixel; + pSrc.sRect.iRectHeight = pCurPicture->iHeightInPixel; + pSrc.eFormat = VIDEO_FORMAT_I420; + + pRef.pPixel[0] = pRefPicture->pData[0]; + pRef.iSizeInBits = g_kiPixMapSizeInBits; + pRef.iStride[0] = pRefPicture->iLineSize[0]; + pRef.sRect.iRectWidth = pRefPicture->iWidthInPixel; + pRef.sRect.iRectHeight = pRefPicture->iHeightInPixel; + pRef.eFormat = VIDEO_FORMAT_I420; + + iRet = m_pInterfaceVp->Set (iMethodIdx, (void*) & (pVaaInfo->sAdaptiveQuantParam)); + iRet = m_pInterfaceVp->Process (iMethodIdx, &pSrc, &pRef); + if (iRet == 0) + m_pInterfaceVp->Get (iMethodIdx, (void*) & (pVaaInfo->sAdaptiveQuantParam)); + } +} + +void CWelsPreProcess::SetRefMbType (sWelsEncCtx* pCtx, uint32_t** pRefMbTypeArray, int32_t iRefPicType) { + const uint8_t uiTid = pCtx->uiTemporalId; + const uint8_t uiDid = pCtx->uiDependencyId; + SRefList* pRefPicLlist = pCtx->ppRefPicListExt[uiDid]; + SLTRState* pLtr = &pCtx->pLtr[uiDid]; + uint8_t i = 0; + + if (pCtx->pSvcParam->bEnableLongTermReference && pLtr->bReceivedT0LostFlag && uiTid == 0) { + for (i = 0; i < pRefPicLlist->uiLongRefCount; i++) { + SPicture* pRef = pRefPicLlist->pLongRefList[i]; + if (pRef != NULL && pRef->uiRecieveConfirmed == 1/*RECIEVE_SUCCESS*/) { + *pRefMbTypeArray = pRef->uiRefMbType; + break; + } + } + } else { + for (i = 0; i < pRefPicLlist->uiShortRefCount; i++) { + SPicture* pRef = pRefPicLlist->pShortRefList[i]; + if (pRef != NULL && pRef->bUsedAsRef && pRef->iFramePoc >= 0 && pRef->uiTemporalId <= uiTid) { + *pRefMbTypeArray = pRef->uiRefMbType; + break; + } + } + } +} + + +void CWelsPreProcess::AnalyzePictureComplexity (sWelsEncCtx* pCtx, SPicture* pCurPicture, SPicture* pRefPicture, + const int32_t kiDependencyId, const bool bCalculateBGD) { + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + int32_t iComplexityAnalysisMode = 0; + + if (pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) { + SVAAFrameInfoExt* pVaaExt = static_cast (pCtx->pVaa); + SComplexityAnalysisScreenParam* sComplexityAnalysisParam = &pVaaExt->sComplexityScreenParam; + SWelsSvcRc* pWelsSvcRc = &pCtx->pWelsSvcRc[kiDependencyId]; + + if (pCtx->eSliceType == P_SLICE) + iComplexityAnalysisMode = GOM_SAD; + else if (pCtx->eSliceType == I_SLICE) + iComplexityAnalysisMode = GOM_VAR; + else + return; + + memset (pWelsSvcRc->pGomForegroundBlockNum, 0, pWelsSvcRc->iGomSize * sizeof (int32_t)); + memset (pWelsSvcRc->pCurrentFrameGomSad, 0, pWelsSvcRc->iGomSize * sizeof (int32_t)); + + sComplexityAnalysisParam->iFrameComplexity = 0; + sComplexityAnalysisParam->pGomComplexity = pWelsSvcRc->pCurrentFrameGomSad; + sComplexityAnalysisParam->iGomNumInFrame = pWelsSvcRc->iGomSize; + sComplexityAnalysisParam->iIdrFlag = (pCtx->eSliceType == I_SLICE); + sComplexityAnalysisParam->iMbRowInGom = GOM_H_SCC; + sComplexityAnalysisParam->sScrollResult.bScrollDetectFlag = false; + sComplexityAnalysisParam->sScrollResult.iScrollMvX = 0; + sComplexityAnalysisParam->sScrollResult.iScrollMvY = 0; + + const int32_t iMethodIdx = METHOD_COMPLEXITY_ANALYSIS_SCREEN; + SPixMap sSrcPixMap; + SPixMap sRefPixMap; + memset (&sSrcPixMap, 0, sizeof (SPixMap)); + memset (&sRefPixMap, 0, sizeof (SPixMap)); + int32_t iRet = 0; + + sSrcPixMap.pPixel[0] = pCurPicture->pData[0]; + sSrcPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sSrcPixMap.iStride[0] = pCurPicture->iLineSize[0]; + sSrcPixMap.sRect.iRectWidth = pCurPicture->iWidthInPixel; + sSrcPixMap.sRect.iRectHeight = pCurPicture->iHeightInPixel; + sSrcPixMap.eFormat = VIDEO_FORMAT_I420; + + if (pRefPicture != NULL) { + sRefPixMap.pPixel[0] = pRefPicture->pData[0]; + sRefPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sRefPixMap.iStride[0] = pRefPicture->iLineSize[0]; + sRefPixMap.sRect.iRectWidth = pRefPicture->iWidthInPixel; + sRefPixMap.sRect.iRectHeight = pRefPicture->iHeightInPixel; + sRefPixMap.eFormat = VIDEO_FORMAT_I420; + } + + iRet = m_pInterfaceVp->Set (iMethodIdx, (void*)sComplexityAnalysisParam); + iRet = m_pInterfaceVp->Process (iMethodIdx, &sSrcPixMap, &sRefPixMap); + if (iRet == 0) + m_pInterfaceVp->Get (iMethodIdx, (void*)sComplexityAnalysisParam); + + } else { + SVAAFrameInfo* pVaaInfo = pCtx->pVaa; + SComplexityAnalysisParam* sComplexityAnalysisParam = & (pVaaInfo->sComplexityAnalysisParam); + SWelsSvcRc* SWelsSvcRc = &pCtx->pWelsSvcRc[kiDependencyId]; + + if (pSvcParam->iRCMode == RC_QUALITY_MODE && pCtx->eSliceType == P_SLICE) { + iComplexityAnalysisMode = FRAME_SAD; + } else if (((pSvcParam->iRCMode == RC_BITRATE_MODE) || (pSvcParam->iRCMode == RC_TIMESTAMP_MODE)) + && pCtx->eSliceType == P_SLICE) { + iComplexityAnalysisMode = GOM_SAD; + } else if (((pSvcParam->iRCMode == RC_BITRATE_MODE) || (pSvcParam->iRCMode == RC_TIMESTAMP_MODE)) + && pCtx->eSliceType == I_SLICE) { + iComplexityAnalysisMode = GOM_VAR; + } else { + return; + } + + sComplexityAnalysisParam->iComplexityAnalysisMode = iComplexityAnalysisMode; + sComplexityAnalysisParam->pCalcResult = & (pVaaInfo->sVaaCalcInfo); + sComplexityAnalysisParam->pBackgroundMbFlag = pVaaInfo->pVaaBackgroundMbFlag; + if (pRefPicture) + SetRefMbType (pCtx, & (sComplexityAnalysisParam->uiRefMbType), pRefPicture->iPictureType); + sComplexityAnalysisParam->iCalcBgd = bCalculateBGD; + sComplexityAnalysisParam->iFrameComplexity = 0; + + memset (SWelsSvcRc->pGomForegroundBlockNum, 0, SWelsSvcRc->iGomSize * sizeof (int32_t)); + if (iComplexityAnalysisMode != FRAME_SAD) + memset (SWelsSvcRc->pCurrentFrameGomSad, 0, SWelsSvcRc->iGomSize * sizeof (int32_t)); + + sComplexityAnalysisParam->pGomComplexity = SWelsSvcRc->pCurrentFrameGomSad; + sComplexityAnalysisParam->pGomForegroundBlockNum = SWelsSvcRc->pGomForegroundBlockNum; + sComplexityAnalysisParam->iMbNumInGom = SWelsSvcRc->iNumberMbGom; + + { + int32_t iMethodIdx = METHOD_COMPLEXITY_ANALYSIS; + SPixMap sSrcPixMap; + SPixMap sRefPixMap; + memset (&sSrcPixMap, 0, sizeof (SPixMap)); + memset (&sRefPixMap, 0, sizeof (SPixMap)); + int32_t iRet = 0; + + sSrcPixMap.pPixel[0] = pCurPicture->pData[0]; + sSrcPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sSrcPixMap.iStride[0] = pCurPicture->iLineSize[0]; + sSrcPixMap.sRect.iRectWidth = pCurPicture->iWidthInPixel; + sSrcPixMap.sRect.iRectHeight = pCurPicture->iHeightInPixel; + sSrcPixMap.eFormat = VIDEO_FORMAT_I420; + + if (pRefPicture) { + sRefPixMap.pPixel[0] = pRefPicture->pData[0]; + sRefPixMap.iSizeInBits = g_kiPixMapSizeInBits; + sRefPixMap.iStride[0] = pRefPicture->iLineSize[0]; + sRefPixMap.sRect.iRectWidth = pRefPicture->iWidthInPixel; + sRefPixMap.sRect.iRectHeight = pRefPicture->iHeightInPixel; + } + sRefPixMap.eFormat = VIDEO_FORMAT_I420; + + iRet = m_pInterfaceVp->Set (iMethodIdx, (void*)sComplexityAnalysisParam); + iRet = m_pInterfaceVp->Process (iMethodIdx, &sSrcPixMap, &sRefPixMap); + if (iRet == 0) + m_pInterfaceVp->Get (iMethodIdx, (void*)sComplexityAnalysisParam); + } + } +} + + +void CWelsPreProcess::InitPixMap (const SPicture* pPicture, SPixMap* pPixMap) { + pPixMap->pPixel[0] = pPicture->pData[0]; + pPixMap->pPixel[1] = pPicture->pData[1]; + pPixMap->pPixel[2] = pPicture->pData[2]; + pPixMap->iSizeInBits = sizeof (uint8_t); + pPixMap->iStride[0] = pPicture->iLineSize[0]; + pPixMap->iStride[1] = pPicture->iLineSize[1]; + pPixMap->sRect.iRectWidth = pPicture->iWidthInPixel; + pPixMap->sRect.iRectHeight = pPicture->iHeightInPixel; + + pPixMap->eFormat = VIDEO_FORMAT_I420; +} + +SPicture** CWelsPreProcessScreen::GetReferenceSrcPicList (int32_t iTargetDid) { + return (&m_pSpatialPic[iTargetDid][1]); +} + +void CWelsPreProcessScreen::GetAvailableRefListLosslessScreenRefSelection (SPicture** pRefPicList, uint8_t iCurTid, + const int32_t iClosestLtrFrameNum, + SRefInfoParam* pAvailableRefParam, int32_t& iAvailableRefNum, int32_t& iAvailableSceneRefNum) { + const int32_t iSourcePicNum = m_iAvaliableRefInSpatialPicList; + if (0 >= iSourcePicNum) { + iAvailableRefNum = 0; + iAvailableSceneRefNum = 0; + return ; + } + const bool bCurFrameMarkedAsSceneLtr = m_pEncCtx->bCurFrameMarkedAsSceneLtr; + SPicture* pRefPic = NULL; + uint8_t uiRefTid = 0; + bool bRefRealLtr = false; + + iAvailableRefNum = 1; //zero is left for the closest frame + iAvailableSceneRefNum = 0; + + //the saving order will be depend on pSrcPicList + //TODO: use a frame_idx to find the closer ref in time distance, and correctly sort the ref list + for (int32_t i = iSourcePicNum - 1; i >= 0; --i) { + pRefPic = pRefPicList[i]; + if (NULL == pRefPic || !pRefPic->bUsedAsRef || !pRefPic->bIsLongRef || (bCurFrameMarkedAsSceneLtr + && (!pRefPic->bIsSceneLTR))) { + continue; + } + uiRefTid = pRefPic->uiTemporalId; + bRefRealLtr = pRefPic->bIsSceneLTR; + + if (bRefRealLtr || (0 == iCurTid && 0 == uiRefTid) || (uiRefTid < iCurTid)) { + int32_t idx = (pRefPic->iLongTermPicNum == iClosestLtrFrameNum) ? (0) : (iAvailableRefNum++); + pAvailableRefParam[idx].pRefPicture = pRefPic; + pAvailableRefParam[idx].iSrcListIdx = i + 1; //in SrcList, the idx 0 is reserved for CurPic + iAvailableSceneRefNum += bRefRealLtr; + } + } + + if (pAvailableRefParam[0].pRefPicture == NULL) { + for (int32_t i = 1; i < iAvailableRefNum ; ++i) { + pAvailableRefParam[i - 1].pRefPicture = pAvailableRefParam[i].pRefPicture; + pAvailableRefParam[i - 1].iSrcListIdx = pAvailableRefParam[i].iSrcListIdx; + } + + pAvailableRefParam[iAvailableRefNum - 1].pRefPicture = NULL; + pAvailableRefParam[iAvailableRefNum - 1].iSrcListIdx = 0; + --iAvailableRefNum; + } +} + + +void CWelsPreProcessScreen::GetAvailableRefList (SPicture** pSrcPicList, uint8_t iCurTid, + const int32_t iClosestLtrFrameNum, + SRefInfoParam* pAvailableRefList, int32_t& iAvailableRefNum, int32_t& iAvailableSceneRefNum) { + const int32_t iSourcePicNum = m_iAvaliableRefInSpatialPicList; + if (0 >= iSourcePicNum) { + iAvailableRefNum = 0; + iAvailableSceneRefNum = 0; + return ; + } + SPicture* pRefPic = NULL; + uint8_t uiRefTid = 0; + iAvailableRefNum = 0; + iAvailableSceneRefNum = 0; + + //the saving order will be depend on pSrcPicList + //TODO: use a frame_idx to find the closer ref in time distance, and correctly sort the ref list + for (int32_t i = iSourcePicNum - 1; i >= 0; --i) { + pRefPic = pSrcPicList[i]; + if (NULL == pRefPic || !pRefPic->bUsedAsRef) { + continue; + } + uiRefTid = pRefPic->uiTemporalId; + + if (uiRefTid <= iCurTid) { + pAvailableRefList[iAvailableRefNum].pRefPicture = pRefPic; + pAvailableRefList[iAvailableRefNum].iSrcListIdx = i + 1; //in SrcList, the idx 0 is reserved for CurPic + iAvailableRefNum ++; + } + } +} + + +void CWelsPreProcessScreen::InitRefJudgement (SRefJudgement* pRefJudgement) { + pRefJudgement->iMinFrameComplexity = INT_MAX; + pRefJudgement->iMinFrameComplexity08 = INT_MAX; + pRefJudgement->iMinFrameComplexity11 = INT_MAX; + + pRefJudgement->iMinFrameNumGap = INT_MAX; + pRefJudgement->iMinFrameQp = INT_MAX; +} +bool CWelsPreProcessScreen::JudgeBestRef (SPicture* pRefPic, const SRefJudgement& sRefJudgement, + const int64_t iFrameComplexity, const bool bIsClosestLtrFrame) { + return (bIsClosestLtrFrame ? (iFrameComplexity < sRefJudgement.iMinFrameComplexity11) : + ((iFrameComplexity < sRefJudgement.iMinFrameComplexity08) || ((iFrameComplexity <= sRefJudgement.iMinFrameComplexity11) + && (pRefPic->iFrameAverageQp < sRefJudgement.iMinFrameQp)))); +} + +void CWelsPreProcessScreen::SaveBestRefToJudgement (const int32_t iRefPictureAvQP, const int64_t iComplexity, + SRefJudgement* pRefJudgement) { + pRefJudgement->iMinFrameQp = iRefPictureAvQP; + pRefJudgement->iMinFrameComplexity = iComplexity; + pRefJudgement->iMinFrameComplexity08 = static_cast (iComplexity * 0.8); + pRefJudgement->iMinFrameComplexity11 = static_cast (iComplexity * 1.1); +} +void CWelsPreProcessScreen::SaveBestRefToLocal (SRefInfoParam* pRefPicInfo, + const SSceneChangeResult& sSceneChangeResult, + SRefInfoParam* pRefSaved) { + memcpy (pRefSaved, pRefPicInfo, sizeof (SRefInfoParam)); + pRefSaved->pBestBlockStaticIdc = sSceneChangeResult.pStaticBlockIdc; +} + +void CWelsPreProcessScreen::SaveBestRefToVaa (SRefInfoParam& sRefSaved, SRefInfoParam* pVaaBestRef) { + (*pVaaBestRef) = sRefSaved; +} + +SPicture* CWelsPreProcessScreen::GetCurrentOrigFrame (int32_t iDIdx) { + return m_pSpatialPic[iDIdx][0]; +} + +ESceneChangeIdc CWelsPreProcessScreen::DetectSceneChange (SPicture* pCurPicture, SPicture* pRef) { + sWelsEncCtx* pCtx = m_pEncCtx; +#define STATIC_SCENE_MOTION_RATIO 0.01f + SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; + SVAAFrameInfoExt* pVaaExt = static_cast (pCtx->pVaa); + SSpatialLayerInternal* pParamInternal = &pSvcParam->sDependencyLayers[0]; + if (NULL == pCtx || NULL == pVaaExt || NULL == pCurPicture) { + return LARGE_CHANGED_SCENE; + } + + const int32_t iTargetDid = pSvcParam->iSpatialLayerNum - 1; + if (0 != iTargetDid) { + return LARGE_CHANGED_SCENE; + } + + ESceneChangeIdc iVaaFrameSceneChangeIdc = LARGE_CHANGED_SCENE; + SPicture** pRefPicList = GetReferenceSrcPicList (iTargetDid); + if (NULL == pRefPicList) { + return LARGE_CHANGED_SCENE; + } + + SRefInfoParam sAvailableRefParam[MAX_REF_PIC_COUNT] = { { 0 } }; + int32_t iAvailableRefNum = 0; + int32_t iAvailableSceneRefNum = 0; + + int32_t iSceneChangeMethodIdx = METHOD_SCENE_CHANGE_DETECTION_SCREEN; + SSceneChangeResult sSceneChangeResult = {SIMILAR_SCENE, 0, 0, NULL}; + + SPixMap sSrcMap = { { 0 } }; + SPixMap sRefMap = { { 0 } }; + SRefJudgement sLtrJudgement; + SRefJudgement sSceneLtrJudgement; + SRefInfoParam sLtrSaved = {0}; + SRefInfoParam sSceneLtrSaved = {0}; + + int32_t iNumOfLargeChange = 0, iNumOfMediumChangeToLtr = 0; + + bool bIsClosestLtrFrame = false; + int32_t ret = 1, iScdIdx = 0; + + SPicture* pRefPic = NULL; + SRefInfoParam* pRefPicInfo = NULL; + uint8_t* pCurBlockStaticPointer = NULL; + SLogContext* pLogCtx = & (pCtx->sLogCtx); + const int32_t iNegligibleMotionBlocks = (static_cast ((pCurPicture->iWidthInPixel >> 3) * + (pCurPicture->iHeightInPixel >> 3) * STATIC_SCENE_MOTION_RATIO)); + const uint8_t iCurTid = GetTemporalLevel (&pSvcParam->sDependencyLayers[m_pEncCtx->sSpatialIndexMap[0].iDid], + pParamInternal->iCodingIndex, pSvcParam->uiGopSize); + if (iCurTid == INVALID_TEMPORAL_ID) { + return LARGE_CHANGED_SCENE; + } + const int32_t iClosestLtrFrameNum = pCtx->pLtr[iTargetDid].iLastLtrIdx[iCurTid]; + if (pSvcParam->bEnableLongTermReference) { + GetAvailableRefListLosslessScreenRefSelection (pRefPicList, iCurTid, iClosestLtrFrameNum, &sAvailableRefParam[0], + iAvailableRefNum, + iAvailableSceneRefNum); + } else { + GetAvailableRefList (pRefPicList, iCurTid, iClosestLtrFrameNum, &sAvailableRefParam[0], iAvailableRefNum, + iAvailableSceneRefNum); + } + //after this build, pAvailableRefList[idx].iSrcListIdx is the idx of the ref in h->spatial_pic + if (0 == iAvailableRefNum) { + WelsLog (pLogCtx, WELS_LOG_ERROR, "SceneChangeDetect() iAvailableRefNum=0 but not I."); + return LARGE_CHANGED_SCENE; + } + + InitPixMap (pCurPicture, &sSrcMap); + InitRefJudgement (&sLtrJudgement); + InitRefJudgement (&sSceneLtrJudgement); + + for (iScdIdx = 0; iScdIdx < iAvailableRefNum; iScdIdx ++) { + pCurBlockStaticPointer = pVaaExt->pVaaBlockStaticIdc[iScdIdx]; + sSceneChangeResult.eSceneChangeIdc = SIMILAR_SCENE; + sSceneChangeResult.pStaticBlockIdc = pCurBlockStaticPointer; + sSceneChangeResult.sScrollResult.bScrollDetectFlag = false; + + pRefPicInfo = & (sAvailableRefParam[iScdIdx]); + assert (NULL != pRefPicInfo); + pRefPic = pRefPicInfo->pRefPicture; + InitPixMap (pRefPic, &sRefMap); + + bIsClosestLtrFrame = (pRefPic->iLongTermPicNum == iClosestLtrFrameNum); + if (0 == iScdIdx) { + int32_t ret = 1; + SScrollDetectionParam* pScrollDetectInfo = & (pVaaExt->sScrollDetectInfo); + memset (pScrollDetectInfo, 0, sizeof (SScrollDetectionParam)); + + int32_t iMethodIdx = METHOD_SCROLL_DETECTION; + + m_pInterfaceVp->Set (iMethodIdx, (void*) (pScrollDetectInfo)); + ret = m_pInterfaceVp->Process (iMethodIdx, &sSrcMap, &sRefMap); + + if (ret == 0) { + m_pInterfaceVp->Get (iMethodIdx, (void*) (pScrollDetectInfo)); + } + sSceneChangeResult.sScrollResult = pVaaExt->sScrollDetectInfo; + } + + m_pInterfaceVp->Set (iSceneChangeMethodIdx, (void*) (&sSceneChangeResult)); + ret = m_pInterfaceVp->Process (iSceneChangeMethodIdx, &sSrcMap, &sRefMap); + + if (ret == 0) { + m_pInterfaceVp->Get (iSceneChangeMethodIdx, (void*)&sSceneChangeResult); + + const int64_t iFrameComplexity = sSceneChangeResult.iFrameComplexity; + const int32_t iSceneDetectIdc = sSceneChangeResult.eSceneChangeIdc; + const int32_t iMotionBlockNum = sSceneChangeResult.iMotionBlockNum; + + const bool bCurRefIsSceneLtr = pRefPic->bIsSceneLTR; + const int32_t iRefPicAvQP = pRefPic->iFrameAverageQp; + + //for scene change detection + iNumOfLargeChange += (static_cast (LARGE_CHANGED_SCENE == iSceneDetectIdc)); + iNumOfMediumChangeToLtr += (static_cast ((bCurRefIsSceneLtr) && (iSceneDetectIdc != SIMILAR_SCENE))); + + //for reference selection + //this judge can only be saved when iAvailableRefNum==1, which is very limit + //when LTR is OFF, it can still judge from all available STR + if (JudgeBestRef (pRefPic, sLtrJudgement, iFrameComplexity, bIsClosestLtrFrame)) { + SaveBestRefToJudgement (iRefPicAvQP, iFrameComplexity, &sLtrJudgement); + SaveBestRefToLocal (pRefPicInfo, sSceneChangeResult, &sLtrSaved); + } + if (bCurRefIsSceneLtr && JudgeBestRef (pRefPic, sSceneLtrJudgement, iFrameComplexity, bIsClosestLtrFrame)) { + SaveBestRefToJudgement (iRefPicAvQP, iFrameComplexity, &sSceneLtrJudgement); + SaveBestRefToLocal (pRefPicInfo, sSceneChangeResult, &sSceneLtrSaved); + } + + if (iMotionBlockNum <= iNegligibleMotionBlocks) { + break; + } + } + } + + if (iNumOfLargeChange == iAvailableRefNum) { + iVaaFrameSceneChangeIdc = LARGE_CHANGED_SCENE; + } else if ((iNumOfMediumChangeToLtr == iAvailableSceneRefNum) && (0 != iAvailableSceneRefNum)) { + iVaaFrameSceneChangeIdc = MEDIUM_CHANGED_SCENE; + } else { + iVaaFrameSceneChangeIdc = SIMILAR_SCENE; + } + + WelsLog (pLogCtx, WELS_LOG_DEBUG, "iVaaFrameSceneChangeIdc = %d,codingIdx = %d", iVaaFrameSceneChangeIdc, + pParamInternal->iCodingIndex); + + SaveBestRefToVaa (sLtrSaved, & (pVaaExt->sVaaStrBestRefCandidate[0])); + pVaaExt->iVaaBestRefFrameNum = sLtrSaved.pRefPicture->iFrameNum; + pVaaExt->pVaaBestBlockStaticIdc = sLtrSaved.pBestBlockStaticIdc; + + if (0 < iAvailableSceneRefNum) { + SaveBestRefToVaa (sSceneLtrSaved, & (pVaaExt->sVaaLtrBestRefCandidate[0])); + } + + pVaaExt->iNumOfAvailableRef = 1; + return static_cast (iVaaFrameSceneChangeIdc); +} + +int32_t CWelsPreProcess::GetRefFrameInfo (int32_t iRefIdx, bool bCurrentFrameIsSceneLtr, SPicture*& pRefOri) { + const int32_t iTargetDid = m_pEncCtx->pSvcParam->iSpatialLayerNum - 1; + SVAAFrameInfoExt* pVaaExt = static_cast (m_pEncCtx->pVaa); + SRefInfoParam* pBestRefCandidateParam = (bCurrentFrameIsSceneLtr) ? (& (pVaaExt->sVaaLtrBestRefCandidate[iRefIdx])) : + (& (pVaaExt->sVaaStrBestRefCandidate[iRefIdx])); + pRefOri = m_pSpatialPic[iTargetDid][pBestRefCandidateParam->iSrcListIdx]; + return (m_pSpatialPic[iTargetDid][pBestRefCandidateParam->iSrcListIdx]->iLongTermPicNum); +} +void CWelsPreProcess::Padding (uint8_t* pSrcY, uint8_t* pSrcU, uint8_t* pSrcV, int32_t iStrideY, int32_t iStrideUV, + int32_t iActualWidth, int32_t iPaddingWidth, int32_t iActualHeight, int32_t iPaddingHeight) { + int32_t i; + + if (iPaddingHeight > iActualHeight) { + for (i = iActualHeight; i < iPaddingHeight; i++) { + memset (pSrcY + i * iStrideY, 0, iActualWidth); + + if (! (i & 1)) { + memset (pSrcU + i / 2 * iStrideUV, 0x80, iActualWidth / 2); + memset (pSrcV + i / 2 * iStrideUV, 0x80, iActualWidth / 2); + } + } + } + + if (iPaddingWidth > iActualWidth) { + for (i = 0; i < iPaddingHeight; i++) { + memset (pSrcY + i * iStrideY + iActualWidth, 0, iPaddingWidth - iActualWidth); + if (! (i & 1)) { + memset (pSrcU + i / 2 * iStrideUV + iActualWidth / 2, 0x80, (iPaddingWidth - iActualWidth) / 2); + memset (pSrcV + i / 2 * iStrideUV + iActualWidth / 2, 0x80, (iPaddingWidth - iActualWidth) / 2); + } + } + } +} + + +int32_t CWelsPreProcess::UpdateBlockIdcForScreen (uint8_t* pCurBlockStaticPointer, const SPicture* kpRefPic, + const SPicture* kpSrcPic) { + int32_t iSceneChangeMethodIdx = METHOD_SCENE_CHANGE_DETECTION_SCREEN; + SSceneChangeResult sSceneChangeResult = {SIMILAR_SCENE, 0, 0, NULL}; + sSceneChangeResult.pStaticBlockIdc = pCurBlockStaticPointer; + sSceneChangeResult.sScrollResult.bScrollDetectFlag = false; + + SPixMap sSrcMap = { { 0 } }; + SPixMap sRefMap = { { 0 } }; + InitPixMap (kpSrcPic, &sSrcMap); + InitPixMap (kpRefPic, &sRefMap); + + m_pInterfaceVp->Set (iSceneChangeMethodIdx, (void*) (&sSceneChangeResult)); + int32_t iRet = m_pInterfaceVp->Process (iSceneChangeMethodIdx, &sSrcMap, &sRefMap); + if (iRet == 0) { + m_pInterfaceVp->Get (iSceneChangeMethodIdx, (void*)&sSceneChangeResult); + return 0; + } + return iRet; +} + +/*! +* \brief exchange two picture pData planes +* \param ppPic1 picture pointer to picture 1 +* \param ppPic2 picture pointer to picture 2 +* \return none +*/ +void CWelsPreProcess::WelsExchangeSpatialPictures (SPicture** ppPic1, SPicture** ppPic2) { + SPicture* tmp = *ppPic1; + + assert (*ppPic1 != *ppPic2); + + *ppPic1 = *ppPic2; + *ppPic2 = tmp; +} + +void CWelsPreProcess::UpdateSrcListLosslessScreenRefSelectionWithLtr (SPicture* pCurPicture, const int32_t kiCurDid, + const int32_t kuiMarkLongTermPicIdx, SPicture** pLongRefList) { + SPicture** pLongRefSrcList = &m_pSpatialPic[kiCurDid][0]; + for (int32_t i = 0; i < MAX_REF_PIC_COUNT; ++i) { + if (NULL == pLongRefSrcList[i + 1] || (NULL != pLongRefList[i] && pLongRefList[i]->bUsedAsRef + && pLongRefList[i]->bIsLongRef)) { + continue; + } else { + pLongRefSrcList[i + 1]->SetUnref(); + } + } + WelsExchangeSpatialPictures (&m_pSpatialPic[kiCurDid][0], + &m_pSpatialPic[kiCurDid][1 + kuiMarkLongTermPicIdx]); + m_iAvaliableRefInSpatialPicList = MAX_REF_PIC_COUNT; + (GetCurrentOrigFrame (kiCurDid))->SetUnref(); +} +void CWelsPreProcess::UpdateSrcList (SPicture* pCurPicture, const int32_t kiCurDid, SPicture** pShortRefList, + const uint32_t kuiShortRefCount) { + SPicture** pRefSrcList = &m_pSpatialPic[kiCurDid][0]; + + //pRefSrcList[0] is for current frame + if (pCurPicture->bUsedAsRef || pCurPicture->bIsLongRef) { + if (pCurPicture->iPictureType == P_SLICE && pCurPicture->uiTemporalId != 0) { + for (int iRefIdx = kuiShortRefCount - 1; iRefIdx >= 0; --iRefIdx) { + WelsExchangeSpatialPictures (&pRefSrcList[iRefIdx + 1], + &pRefSrcList[iRefIdx]); + } + m_iAvaliableRefInSpatialPicList = kuiShortRefCount; + } else { + WelsExchangeSpatialPictures (&pRefSrcList[0], &pRefSrcList[1]); + for (int32_t i = MAX_SHORT_REF_COUNT - 1; i > 0 ; --i) { + if (pRefSrcList[i + 1] != NULL) { + pRefSrcList[i + 1]->SetUnref(); + } + } + m_iAvaliableRefInSpatialPicList = 1; + } + } + (GetCurrentOrigFrame (kiCurDid))->SetUnref(); +} + +//TODO: may opti later +//TODO: not use this func? +void* WelsMemcpy (void* dst, const void* kpSrc, uint32_t uiSize) { + return ::memcpy (dst, kpSrc, uiSize); +} +void* WelsMemset (void* p, int32_t val, uint32_t uiSize) { + return ::memset (p, val, uiSize); +} + +//i420_to_i420_c +void WelsMoveMemory_c (uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStrideY, int32_t iDstStrideUV, + uint8_t* pSrcY, uint8_t* pSrcU, uint8_t* pSrcV, int32_t iSrcStrideY, int32_t iSrcStrideUV, int32_t iWidth, + int32_t iHeight) { + int32_t iWidth2 = iWidth >> 1; + int32_t iHeight2 = iHeight >> 1; + int32_t j; + + for (j = iHeight; j; j--) { + WelsMemcpy (pDstY, pSrcY, iWidth); + pDstY += iDstStrideY; + pSrcY += iSrcStrideY; + } + + for (j = iHeight2; j; j--) { + WelsMemcpy (pDstU, pSrcU, iWidth2); + WelsMemcpy (pDstV, pSrcV, iWidth2); + pDstU += iDstStrideUV; + pDstV += iDstStrideUV; + pSrcU += iSrcStrideUV; + pSrcV += iSrcStrideUV; + } +} + +void CWelsPreProcess::WelsMoveMemoryWrapper (SWelsSvcCodingParam* pSvcParam, SPicture* pDstPic, + const SSourcePicture* kpSrc, + const int32_t kiTargetWidth, const int32_t kiTargetHeight) { + if (VIDEO_FORMAT_I420 != (kpSrc->iColorFormat & (~VIDEO_FORMAT_VFlip))) + return; + + int32_t iSrcWidth = kpSrc->iPicWidth; + int32_t iSrcHeight = kpSrc->iPicHeight; + + if (iSrcHeight > kiTargetHeight) iSrcHeight = kiTargetHeight; + if (iSrcWidth > kiTargetWidth) iSrcWidth = kiTargetWidth; + + // copy from fr26 to fix the odd uiSize failed issue + if (iSrcWidth & 0x1) -- iSrcWidth; + if (iSrcHeight & 0x1) -- iSrcHeight; + + const int32_t kiSrcTopOffsetY = pSvcParam->SUsedPicRect.iTop; + const int32_t kiSrcTopOffsetUV = (kiSrcTopOffsetY >> 1); + const int32_t kiSrcLeftOffsetY = pSvcParam->SUsedPicRect.iLeft; + const int32_t kiSrcLeftOffsetUV = (kiSrcLeftOffsetY >> 1); + int32_t iSrcOffset[3] = {0, 0, 0}; + iSrcOffset[0] = kpSrc->iStride[0] * kiSrcTopOffsetY + kiSrcLeftOffsetY; + iSrcOffset[1] = kpSrc->iStride[1] * kiSrcTopOffsetUV + kiSrcLeftOffsetUV ; + iSrcOffset[2] = kpSrc->iStride[2] * kiSrcTopOffsetUV + kiSrcLeftOffsetUV; + + uint8_t* pSrcY = kpSrc->pData[0] + iSrcOffset[0]; + uint8_t* pSrcU = kpSrc->pData[1] + iSrcOffset[1]; + uint8_t* pSrcV = kpSrc->pData[2] + iSrcOffset[2]; + const int32_t kiSrcStrideY = kpSrc->iStride[0]; + const int32_t kiSrcStrideUV = kpSrc->iStride[1]; + + uint8_t* pDstY = pDstPic->pData[0]; + uint8_t* pDstU = pDstPic->pData[1]; + uint8_t* pDstV = pDstPic->pData[2]; + const int32_t kiDstStrideY = pDstPic->iLineSize[0]; + const int32_t kiDstStrideUV = pDstPic->iLineSize[1]; + + if (pSrcY) { + if (iSrcWidth <= 0 || iSrcHeight <= 0 || (iSrcWidth * iSrcHeight > (MAX_MBS_PER_FRAME << 8))) + return; + if (kiSrcTopOffsetY >= iSrcHeight || kiSrcLeftOffsetY >= iSrcWidth || iSrcWidth > kiSrcStrideY) + return; + } + if (pDstY) { + if (kiTargetWidth <= 0 || kiTargetHeight <= 0 || (kiTargetWidth * kiTargetHeight > (MAX_MBS_PER_FRAME << 8))) + return; + if (kiTargetWidth > kiDstStrideY) + return; + } + + if (pSrcY == NULL || pSrcU == NULL || pSrcV == NULL || pDstY == NULL || pDstU == NULL || pDstV == NULL + || (iSrcWidth & 1) || (iSrcHeight & 1)) { + } else { + //i420_to_i420_c + WelsMoveMemory_c (pDstY, pDstU, pDstV, kiDstStrideY, kiDstStrideUV, + pSrcY, pSrcU, pSrcV, kiSrcStrideY, kiSrcStrideUV, iSrcWidth, iSrcHeight); + + //in VP Process + if (kiTargetWidth > iSrcWidth || kiTargetHeight > iSrcHeight) { + Padding (pDstY, pDstU, pDstV, kiDstStrideY, kiDstStrideUV, iSrcWidth, kiTargetWidth, iSrcHeight, kiTargetHeight); + } + } + +} + +bool CWelsPreProcess::GetSceneChangeFlag (ESceneChangeIdc eSceneChangeIdc) { + return ((eSceneChangeIdc == LARGE_CHANGED_SCENE) ? true : false); +} + + +//*********************************************************************************************************/ +} // namespace WelsEnc diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_base.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_base.cpp new file mode 100644 index 000000000..3cc7bd2b3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_base.cpp @@ -0,0 +1,52 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_task_base.cpp + * + * \brief function for base task + * + * \date 5/09/2012 Created + * + ************************************************************************************* + */ +#include "wels_task_base.h" + +namespace WelsEnc { + + +CWelsBaseTask::~CWelsBaseTask() { +} + + +} + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_encoder.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_encoder.cpp new file mode 100644 index 000000000..9b062df3b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_encoder.cpp @@ -0,0 +1,341 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_task_encoder.h + * + * \brief interface for encoder tasks + * + * \date 07/06/2015 Created + * + ************************************************************************************* + */ + +#include +#include + +#include "typedefs.h" +#include "utils.h" +#include "measure_time.h" +#include "WelsTask.h" + +#include "wels_task_base.h" +#include "wels_task_encoder.h" + +#include "svc_enc_golomb.h" +#include "svc_encode_slice.h" +#include "slice_multi_threading.h" + +namespace WelsEnc { + +CWelsSliceEncodingTask::CWelsSliceEncodingTask (WelsCommon::IWelsTaskSink* pSink, sWelsEncCtx* pCtx, + const int32_t iSliceIdx) : CWelsBaseTask (pSink), m_eTaskResult (ENC_RETURN_SUCCESS) { + m_pCtx = pCtx; + m_iSliceIdx = iSliceIdx; +} + +CWelsSliceEncodingTask::~CWelsSliceEncodingTask() { +} + +WelsErrorType CWelsSliceEncodingTask::Execute() { + //fprintf(stdout, "OpenH264Enc_CWelsSliceEncodingTask_Execute, %x, sink=%x\n", this, m_pSink); + + m_eTaskResult = InitTask(); + WELS_VERIFY_RETURN_IFNEQ (m_eTaskResult, ENC_RETURN_SUCCESS) + + m_eTaskResult = ExecuteTask(); + + FinishTask(); + + //fprintf(stdout, "OpenH264Enc_CWelsSliceEncodingTask_Execute Ends\n"); + return m_eTaskResult; +} + +WelsErrorType CWelsSliceEncodingTask::SetBoundary (int32_t iStartIdx, int32_t iEndIdx) { + m_iStartMbIdx = iStartIdx; + m_iEndMbIdx = iEndIdx; + return ENC_RETURN_SUCCESS; +} + +int32_t CWelsSliceEncodingTask::QueryEmptyThread (bool* pThreadBsBufferUsage) { + for (int32_t k = 0; k < MAX_THREADS_NUM; k++) { + if (pThreadBsBufferUsage[k] == false) { + pThreadBsBufferUsage[k] = true; + return k; + } + } + return -1; +} + +WelsErrorType CWelsSliceEncodingTask::InitTask() { + m_eNalType = m_pCtx->eNalType; + m_eNalRefIdc = m_pCtx->eNalPriority; + m_bNeedPrefix = m_pCtx->bNeedPrefixNalFlag; + + WelsMutexLock (&m_pCtx->pSliceThreading->mutexThreadBsBufferUsage); + m_iThreadIdx = QueryEmptyThread (m_pCtx->pSliceThreading->bThreadBsBufferUsage); + WelsMutexUnlock (&m_pCtx->pSliceThreading->mutexThreadBsBufferUsage); + + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG, + "[MT] CWelsSliceEncodingTask()InitTask for m_iSliceIdx %d, lock thread %d", + m_iSliceIdx, m_iThreadIdx); + if (m_iThreadIdx < 0) { + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_WARNING, + "[MT] CWelsSliceEncodingTask InitTask(), Cannot find available thread for m_iSliceIdx = %d", m_iSliceIdx); + return ENC_RETURN_UNEXPECTED; + } + + int32_t iReturn = InitOneSliceInThread (m_pCtx, m_pSlice, m_iThreadIdx, m_pCtx->uiDependencyId, m_iSliceIdx); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + m_pSliceBs = &m_pSlice->sSliceBs; + + iReturn = SetSliceBoundaryInfo (m_pCtx->pCurDqLayer, m_pSlice, m_iSliceIdx); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + + SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, m_pSlice); + + assert ((void*) (&m_pSliceBs->sBsWrite) == (void*)m_pSlice->pSliceBsa); + InitBits (&m_pSliceBs->sBsWrite, m_pSliceBs->pBsBuffer, m_pSliceBs->uiSize); + //printf ("CWelsSliceEncodingTask_InitTask slice %d\n", m_iSliceIdx); + + return ENC_RETURN_SUCCESS; +} + +void CWelsSliceEncodingTask::FinishTask() { + WelsMutexLock (&m_pCtx->pSliceThreading->mutexThreadBsBufferUsage); + m_pCtx->pSliceThreading->bThreadBsBufferUsage[m_iThreadIdx] = false; + WelsMutexUnlock (&m_pCtx->pSliceThreading->mutexThreadBsBufferUsage); + + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG, + "[MT] CWelsSliceEncodingTask()FinishTask for m_iSliceIdx %d, unlock thread %d", + m_iSliceIdx, m_iThreadIdx); + + //sync multi-threading error + WelsMutexLock (&m_pCtx->mutexEncoderError); + if (ENC_RETURN_SUCCESS != m_eTaskResult) { + m_pCtx->iEncoderError |= m_eTaskResult; + } + WelsMutexUnlock (&m_pCtx->mutexEncoderError); +} + +WelsErrorType CWelsSliceEncodingTask::ExecuteTask() { + +#if MT_DEBUG_BS_WR + m_pSliceBs->bSliceCodedFlag = false; +#endif//MT_DEBUG_BS_WR + SSpatialLayerInternal* pParamInternal = &m_pCtx->pSvcParam->sDependencyLayers[m_pCtx->uiDependencyId]; + if (m_bNeedPrefix) { + if (m_eNalRefIdc != NRI_PRI_LOWEST) { + WelsLoadNalForSlice (m_pSliceBs, NAL_UNIT_PREFIX, m_eNalRefIdc); + WelsWriteSVCPrefixNal (&m_pSliceBs->sBsWrite, m_eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == m_eNalType)); + WelsUnloadNalForSlice (m_pSliceBs); + } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension + WelsLoadNalForSlice (m_pSliceBs, NAL_UNIT_PREFIX, m_eNalRefIdc); + // No need write any syntax of prefix NAL Unit RBSP here + WelsUnloadNalForSlice (m_pSliceBs); + } + } + + WelsLoadNalForSlice (m_pSliceBs, m_eNalType, m_eNalRefIdc); + assert (m_iSliceIdx == (int) m_pSlice->iSliceIdx); + int32_t iReturn = WelsCodeOneSlice (m_pCtx, m_pSlice, m_eNalType); + if (ENC_RETURN_SUCCESS != iReturn) { + return iReturn; + } + WelsUnloadNalForSlice (m_pSliceBs); + + m_iSliceSize = 0; + iReturn = WriteSliceBs (m_pCtx, m_pSliceBs, m_iSliceIdx, m_iSliceSize); + if (ENC_RETURN_SUCCESS != iReturn) { + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_WARNING, + "[MT] CWelsSliceEncodingTask ExecuteTask(), WriteSliceBs not successful: coding_idx %d, um_iSliceIdx %d", + pParamInternal->iCodingIndex, + m_iSliceIdx); + return iReturn; + } + + m_pCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (m_pCtx->pCurDqLayer, m_pCtx->pFuncList, m_pSlice); + + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DETAIL, + "@pSlice=%-6d sliceType:%c idc:%d size:%-6d", m_iSliceIdx, + (m_pCtx->eSliceType == P_SLICE ? 'P' : 'I'), + m_eNalRefIdc, + m_iSliceSize); + +#if MT_DEBUG_BS_WR + m_pSliceBs->bSliceCodedFlag = true; +#endif//MT_DEBUG_BS_WR + + return ENC_RETURN_SUCCESS; +} + + +// CWelsLoadBalancingSlicingEncodingTask +WelsErrorType CWelsLoadBalancingSlicingEncodingTask::InitTask() { + WelsErrorType iReturn = CWelsSliceEncodingTask::InitTask(); + if (ENC_RETURN_SUCCESS != iReturn) { + return iReturn; + } + + m_iSliceStart = WelsTime(); + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG, + "[MT] CWelsLoadBalancingSlicingEncodingTask()InitTask for m_iSliceIdx %d at time=%" PRId64, + m_iSliceIdx, m_iSliceStart); + + return ENC_RETURN_SUCCESS; +} + +void CWelsLoadBalancingSlicingEncodingTask::FinishTask() { + CWelsSliceEncodingTask::FinishTask(); + SSpatialLayerInternal* pParamInternal = &m_pCtx->pSvcParam->sDependencyLayers[m_pCtx->uiDependencyId]; + m_pSlice->uiSliceConsumeTime = (uint32_t) (WelsTime() - m_iSliceStart); + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG, + "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, uiSliceConsumeTime %d, m_iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d at time=%" + PRId64, + pParamInternal->iCodingIndex, + m_iSliceIdx, + m_pSlice->uiSliceConsumeTime, + m_iSliceSize, + m_pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice, + m_pSlice->iCountMbNumInSlice, + (m_pSlice->uiSliceConsumeTime + m_iSliceStart)); +} + +//CWelsConstrainedSizeSlicingEncodingTask +WelsErrorType CWelsConstrainedSizeSlicingEncodingTask::ExecuteTask() { + + SDqLayer* pCurDq = m_pCtx->pCurDqLayer; + const int32_t kiSliceIdxStep = m_pCtx->iActiveThreadsNum; + SSpatialLayerInternal* pParamInternal = &m_pCtx->pSvcParam->sDependencyLayers[m_pCtx->uiDependencyId]; + const int32_t kiPartitionId = m_iSliceIdx % kiSliceIdxStep; + const int32_t kiFirstMbInPartition = pCurDq->FirstMbIdxOfPartition[kiPartitionId]; + const int32_t kiEndMbIdxInPartition = pCurDq->EndMbIdxOfPartition[kiPartitionId]; + const int32_t kiCodedSliceNumByThread = pCurDq->sSliceBufferInfo[m_iThreadIdx].iCodedSliceNum; + m_pSlice = &pCurDq->sSliceBufferInfo[m_iThreadIdx].pSliceBuffer[kiCodedSliceNumByThread]; + m_pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = kiFirstMbInPartition; + int32_t iReturn = 0; + bool bNeedReallocate = false; + + int32_t iDiffMbIdx = kiEndMbIdxInPartition - kiFirstMbInPartition; + if (0 == iDiffMbIdx) { + m_pSlice->iSliceIdx = -1; + return ENC_RETURN_SUCCESS; + } + + int32_t iAnyMbLeftInPartition = iDiffMbIdx + 1; + int32_t iLocalSliceIdx = m_iSliceIdx; + while (iAnyMbLeftInPartition > 0) { + bNeedReallocate = (pCurDq->sSliceBufferInfo[m_iThreadIdx].iCodedSliceNum + >= pCurDq->sSliceBufferInfo[m_iThreadIdx].iMaxSliceNum - 1) ? true : false; + if (bNeedReallocate) { + WelsMutexLock (&m_pCtx->pSliceThreading->mutexThreadSlcBuffReallocate); + //for memory statistic variable + iReturn = ReallocateSliceInThread (m_pCtx, pCurDq, m_pCtx->uiDependencyId, m_iThreadIdx); + WelsMutexUnlock (&m_pCtx->pSliceThreading->mutexThreadSlcBuffReallocate); + if (ENC_RETURN_SUCCESS != iReturn) { + return iReturn; + } + } + + iReturn = InitOneSliceInThread (m_pCtx, m_pSlice, m_iThreadIdx, m_pCtx->uiDependencyId, iLocalSliceIdx); + WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) + m_pSliceBs = &m_pSlice->sSliceBs; + InitBits (&m_pSliceBs->sBsWrite, m_pSliceBs->pBsBuffer, m_pSliceBs->uiSize); + + if (m_bNeedPrefix) { + if (m_eNalRefIdc != NRI_PRI_LOWEST) { + WelsLoadNalForSlice (m_pSliceBs, NAL_UNIT_PREFIX, m_eNalRefIdc); + WelsWriteSVCPrefixNal (&m_pSliceBs->sBsWrite, m_eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == m_eNalType)); + WelsUnloadNalForSlice (m_pSliceBs); + } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension + WelsLoadNalForSlice (m_pSliceBs, NAL_UNIT_PREFIX, m_eNalRefIdc); + // No need write any syntax of prefix NAL Unit RBSP here + WelsUnloadNalForSlice (m_pSliceBs); + } + } + + WelsLoadNalForSlice (m_pSliceBs, m_eNalType, m_eNalRefIdc); + + assert (iLocalSliceIdx == (int) m_pSlice->iSliceIdx); + int32_t iReturn = WelsCodeOneSlice (m_pCtx, m_pSlice, m_eNalType); + if (ENC_RETURN_SUCCESS != iReturn) { + return iReturn; + } + WelsUnloadNalForSlice (m_pSliceBs); + + iReturn = WriteSliceBs (m_pCtx, m_pSliceBs, iLocalSliceIdx, m_iSliceSize); + if (ENC_RETURN_SUCCESS != iReturn) { + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_WARNING, + "[MT] CWelsConstrainedSizeSlicingEncodingTask ExecuteTask(), WriteSliceBs not successful: coding_idx %d, uiLocalSliceIdx %d, BufferSize %d, m_iSliceSize %d, iPayloadSize %d", + pParamInternal->iCodingIndex, + iLocalSliceIdx, m_pSliceBs->uiSize, m_iSliceSize, m_pSliceBs->sNalList[0].iPayloadSize); + return iReturn; + } + m_pCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, m_pCtx->pFuncList, m_pSlice); + + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DETAIL, + "@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n", + iLocalSliceIdx, + (m_pCtx->eSliceType == P_SLICE ? 'P' : 'I'), + m_eNalRefIdc, + m_iSliceSize + ); + + WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG, + "[MT] CWelsConstrainedSizeSlicingEncodingTask(), coding_idx %d, iPartitionId %d, m_iThreadIdx %d, iLocalSliceIdx %d, m_iSliceSize %d, ParamValidationExt(), invalid uiMaxNalSizeiEndMbInPartition %d, pCurDq->LastCodedMbIdxOfPartition[%d] %d\n", + pParamInternal->iCodingIndex, kiPartitionId, m_iThreadIdx, iLocalSliceIdx, m_iSliceSize, + kiEndMbIdxInPartition, kiPartitionId, pCurDq->LastCodedMbIdxOfPartition[kiPartitionId]); + + iAnyMbLeftInPartition = kiEndMbIdxInPartition - pCurDq->LastCodedMbIdxOfPartition[kiPartitionId]; + iLocalSliceIdx += kiSliceIdxStep; + m_pCtx->pCurDqLayer->sSliceBufferInfo[m_iThreadIdx].iCodedSliceNum ++; + } + + return ENC_RETURN_SUCCESS; +} + + +CWelsUpdateMbMapTask::CWelsUpdateMbMapTask (WelsCommon::IWelsTaskSink* pSink, sWelsEncCtx* pCtx, + const int32_t iSliceIdx): CWelsBaseTask (pSink) { + m_pCtx = pCtx; + m_iSliceIdx = iSliceIdx; +} + +CWelsUpdateMbMapTask::~CWelsUpdateMbMapTask() { +} + +WelsErrorType CWelsUpdateMbMapTask::Execute() { + UpdateMbListNeighborParallel (m_pCtx->pCurDqLayer, m_pCtx->pCurDqLayer->sMbDataP, m_iSliceIdx); + return ENC_RETURN_SUCCESS; +} + +} + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_management.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_management.cpp new file mode 100644 index 000000000..0aa00dbc2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/src/wels_task_management.cpp @@ -0,0 +1,277 @@ +/*! + * \copy + * Copyright (c) 2009-2015, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_task_management.cpp + * + * \brief function for task management + * + * \date 5/14/2012 Created + * + ************************************************************************************* + */ +#include +#include + +#include "typedefs.h" +#include "utils.h" +#include "WelsLock.h" +#include "memory_align.h" + +#include "wels_common_basis.h" +#include "encoder_context.h" +#include "wels_task_base.h" +#include "wels_task_encoder.h" +#include "wels_task_management.h" + +namespace WelsEnc { + + + +IWelsTaskManage* IWelsTaskManage::CreateTaskManage (sWelsEncCtx* pCtx, const int32_t iSpatialLayer, + const bool bNeedLock) { + if (NULL == pCtx) { + return NULL; + } + + IWelsTaskManage* pTaskManage; + pTaskManage = WELS_NEW_OP (CWelsTaskManageBase(), CWelsTaskManageBase); + WELS_VERIFY_RETURN_IF (NULL, NULL == pTaskManage) + + if (ENC_RETURN_SUCCESS != pTaskManage->Init (pCtx)) { + pTaskManage->Uninit(); + WELS_DELETE_OP (pTaskManage); + } + return pTaskManage; +} + + +CWelsTaskManageBase::CWelsTaskManageBase() + : m_pEncCtx (NULL), + m_pThreadPool (NULL), + m_iWaitTaskNum (0) { + + for (int32_t iDid = 0; iDid < MAX_DEPENDENCY_LAYER; iDid++) { + m_iTaskNum[iDid] = 0; + m_cEncodingTaskList[iDid] = new TASKLIST_TYPE(); + m_cPreEncodingTaskList[iDid] = new TASKLIST_TYPE(); + } + + WelsEventOpen (&m_hTaskEvent); + WelsMutexInit (&m_hEventMutex); +} + +CWelsTaskManageBase::~CWelsTaskManageBase() { + //fprintf(stdout, "~CWelsTaskManageBase\n"); + Uninit(); +} + +WelsErrorType CWelsTaskManageBase::Init (sWelsEncCtx* pEncCtx) { + m_pEncCtx = pEncCtx; + m_iThreadNum = m_pEncCtx->pSvcParam->iMultipleThreadIdc; + + int32_t iReturn = ENC_RETURN_SUCCESS; + //fprintf(stdout, "m_pThreadPool = &(CWelsThreadPool::GetInstance, this=%x\n", this); + iReturn = CWelsThreadPool::SetThreadNum (m_iThreadNum); + m_pThreadPool = (CWelsThreadPool::AddReference()); + if ((iReturn != ENC_RETURN_SUCCESS) && pEncCtx) { + WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_WARNING, "Set Thread Num to %d did not succeed, current thread num in use: %d", + m_iThreadNum, m_pThreadPool->GetThreadNum()); + } + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == m_pThreadPool) + //fprintf(stdout, "m_pThreadPool = &(CWelsThreadPool::GetInstance3\n"); + + iReturn = ENC_RETURN_SUCCESS; + for (int32_t iDid = 0; iDid < MAX_DEPENDENCY_LAYER; iDid++) { + m_pcAllTaskList[CWelsBaseTask::WELS_ENC_TASK_ENCODING][iDid] = m_cEncodingTaskList[iDid]; + m_pcAllTaskList[CWelsBaseTask::WELS_ENC_TASK_UPDATEMBMAP][iDid] = m_cPreEncodingTaskList[iDid]; + iReturn |= CreateTasks (pEncCtx, iDid); + } + + //fprintf(stdout, "CWelsTaskManageBase Init m_iThreadNum %d m_iCurrentTaskNum %d pEncCtx->iMaxSliceCount %d\n", m_iThreadNum, m_iCurrentTaskNum, pEncCtx->iMaxSliceCount); + return iReturn; +} + +void CWelsTaskManageBase::Uninit() { + DestroyTasks(); + //fprintf(stdout, "m_pThreadPool = m_pThreadPool->RemoveInstance\n"); + if (m_pThreadPool) + m_pThreadPool->RemoveInstance(); + //WELS_DELETE_OP (m_pThreadPool); + + //fprintf(stdout, "m_pThreadPool = m_pThreadPool->RemoveInstance2\n"); + + for (int32_t iDid = 0; iDid < MAX_DEPENDENCY_LAYER; iDid++) { + WELS_DELETE_OP (m_cEncodingTaskList[iDid]); + WELS_DELETE_OP (m_cPreEncodingTaskList[iDid]); + } + WelsEventClose (&m_hTaskEvent); + WelsMutexDestroy (&m_hEventMutex); +} + +WelsErrorType CWelsTaskManageBase::CreateTasks (sWelsEncCtx* pEncCtx, const int32_t kiCurDid) { + CWelsBaseTask* pTask = NULL; + int32_t kiTaskCount; + uint32_t uiSliceMode = pEncCtx->pSvcParam->sSpatialLayers[kiCurDid].sSliceArgument.uiSliceMode; + + if (uiSliceMode != SM_SIZELIMITED_SLICE) { + kiTaskCount = m_iTaskNum[kiCurDid] = pEncCtx->pSvcParam->sSpatialLayers[kiCurDid].sSliceArgument.uiSliceNum; + } else { + kiTaskCount = m_iTaskNum[kiCurDid] = pEncCtx->iActiveThreadsNum; + } + + for (int idx = 0; idx < kiTaskCount; idx++) { + pTask = WELS_NEW_OP (CWelsUpdateMbMapTask (this, pEncCtx, idx), CWelsUpdateMbMapTask); + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pTask) + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, true != m_cPreEncodingTaskList[kiCurDid]->push_back (pTask)); + } + + for (int idx = 0; idx < kiTaskCount; idx++) { + if (uiSliceMode == SM_SIZELIMITED_SLICE) { + pTask = WELS_NEW_OP (CWelsConstrainedSizeSlicingEncodingTask (this, pEncCtx, idx), + CWelsConstrainedSizeSlicingEncodingTask); + } else { + if (pEncCtx->pSvcParam->bUseLoadBalancing) { + pTask = WELS_NEW_OP (CWelsLoadBalancingSlicingEncodingTask (this, pEncCtx, idx), CWelsLoadBalancingSlicingEncodingTask); + } else { + pTask = WELS_NEW_OP (CWelsSliceEncodingTask (this, pEncCtx, idx), CWelsSliceEncodingTask); + } + } + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pTask) + WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, true != m_cEncodingTaskList[kiCurDid]->push_back (pTask)); + } + + //fprintf(stdout, "CWelsTaskManageBase CreateTasks m_iThreadNum %d kiTaskCount=%d\n", m_iThreadNum, kiTaskCount); + return ENC_RETURN_SUCCESS; +} + +void CWelsTaskManageBase::DestroyTaskList (TASKLIST_TYPE* pTargetTaskList) { + //fprintf(stdout, "CWelsTaskManageBase: pTargetTaskList size=%d m_iTotalTaskNum=%d\n", static_cast (pTargetTaskList->size()), m_iTotalTaskNum); + while (NULL != pTargetTaskList->begin()) { + CWelsBaseTask* pTask = pTargetTaskList->begin(); + WELS_DELETE_OP (pTask); + pTargetTaskList->pop_front(); + } + pTargetTaskList = NULL; +} + +void CWelsTaskManageBase::DestroyTasks() { + for (int32_t iDid = 0; iDid < MAX_DEPENDENCY_LAYER; iDid++) { + if (m_iTaskNum[iDid] > 0) { + DestroyTaskList (m_cEncodingTaskList[iDid]); + DestroyTaskList (m_cPreEncodingTaskList[iDid]); + m_iTaskNum[iDid] = 0; + m_pcAllTaskList[CWelsBaseTask::WELS_ENC_TASK_ENCODING][iDid] = NULL; + } + } + //fprintf(stdout, "[MT] CWelsTaskManageBase() DestroyTasks, cleaned %d tasks\n", m_iTotalTaskNum); +} + +void CWelsTaskManageBase::OnTaskMinusOne() { + //fprintf(stdout, "OnTaskMinusOne event %x m_iWaitTaskNum=%d\n", &m_hEventMutex, m_iWaitTaskNum); + WelsCommon::CWelsAutoLock cAutoLock (m_cWaitTaskNumLock); + WelsEventSignal (&m_hTaskEvent, &m_hEventMutex, &m_iWaitTaskNum); + /*WelsMutexLock(&m_hEventMutex); + m_iWaitTaskNum --; + WelsMutexUnlock(&m_hEventMutex); + + if (m_iWaitTaskNum <= 0) { + WelsEventSignal (&m_hTaskEvent); + fprintf(stdout, "OnTaskMinusOne WelsEventSignal m_iWaitTaskNum=%d\n", m_iWaitTaskNum); + }*/ + //fprintf(stdout, "OnTaskMinusOne m_iWaitTaskNum=%d\n", m_iWaitTaskNum); +} + +WelsErrorType CWelsTaskManageBase::OnTaskCancelled() { + OnTaskMinusOne(); + return ENC_RETURN_SUCCESS; +} + +WelsErrorType CWelsTaskManageBase::OnTaskExecuted() { + OnTaskMinusOne(); + return ENC_RETURN_SUCCESS; +} + +WelsErrorType CWelsTaskManageBase::ExecuteTaskList (TASKLIST_TYPE** pTaskList) { + m_iWaitTaskNum = m_iTaskNum[m_iCurDid]; + TASKLIST_TYPE* pTargetTaskList = (pTaskList[m_iCurDid]); + //fprintf(stdout, "ExecuteTaskList m_iWaitTaskNum=%d\n", m_iWaitTaskNum); + if (0 == m_iWaitTaskNum) { + return ENC_RETURN_SUCCESS; + } + + int32_t iCurrentTaskCount = m_iWaitTaskNum; //if directly use m_iWaitTaskNum in the loop make cause sync problem + int32_t iIdx = 0; + while (iIdx < iCurrentTaskCount) { + m_pThreadPool->QueueTask (pTargetTaskList->getNode (iIdx)); + iIdx ++; + } + + WelsEventWait (&m_hTaskEvent, &m_hEventMutex, m_iWaitTaskNum); + + return ENC_RETURN_SUCCESS; +} + +void CWelsTaskManageBase::InitFrame (const int32_t kiCurDid) { + m_iCurDid = kiCurDid; + if (m_pEncCtx->pCurDqLayer->bNeedAdjustingSlicing) { + ExecuteTaskList (m_pcAllTaskList[CWelsBaseTask::WELS_ENC_TASK_UPDATEMBMAP]); + } +} + +WelsErrorType CWelsTaskManageBase::ExecuteTasks (const CWelsBaseTask::ETaskType iTaskType) { + return ExecuteTaskList (m_pcAllTaskList[iTaskType]); +} + +int32_t CWelsTaskManageBase::GetThreadPoolThreadNum() { + return m_pThreadPool->GetThreadNum(); +} + +// CWelsTaskManageOne is for test +WelsErrorType CWelsTaskManageOne::Init (sWelsEncCtx* pEncCtx) { + m_pEncCtx = pEncCtx; + + return CreateTasks (pEncCtx, pEncCtx->iMaxSliceCount); +} + +WelsErrorType CWelsTaskManageOne::ExecuteTasks (const CWelsBaseTask::ETaskType iTaskType) { + while (NULL != m_cEncodingTaskList[0]->begin()) { + (m_cEncodingTaskList[0]->begin())->Execute(); + m_cEncodingTaskList[0]->pop_front(); + } + return ENC_RETURN_SUCCESS; +} +// CWelsTaskManageOne is for test + +} + + + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/coeff.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/coeff.asm new file mode 100644 index 000000000..9297aba88 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/coeff.asm @@ -0,0 +1,695 @@ +;*! +;* \copy +;* Copyright (c) 2010-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* memzero.asm +;* +;* Abstract +;* cavlc +;* +;* History +;* 09/08/2010 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +align 16 + +wels_shufb_rev: + db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +; 4-bit table giving number of preceding zeros for each set bit as well as the +; eventual next bit. For the case where all 4 bits are set, this requires 5 +; zeros. The 5th zero can either be read from beyond the final table entry or +; implied via zero-initializing the location being read into. +wels_cavlc_param_cal_run_lut: + db 4, 0, 0, 0 + db 0, 3, 0, 0 + db 1, 2, 0, 0 + db 0, 0, 2, 0 + db 2, 1, 0, 0 + db 0, 1, 1, 0 + db 1, 0, 1, 0 + db 0, 0, 0, 1 + db 3, 0, 0, 0 + db 0, 2, 0, 0 + db 1, 1, 0, 0 + db 0, 0, 1, 0 + db 2, 0, 0, 0 + db 0, 1, 0, 0 + db 1, 0, 0, 0 + db 0, 0, 0, 0 +; db 0 +; 4-bit table giving pshufb vectors for compacting 4-word vectors by removing +; the words that match zero bits and concatenating in reverse order. +wels_cavlc_param_cal_shufb_lut: + db 0, 0, 0, 0, 0, 0, 0, 0 + db 6, 7, 0, 0, 0, 0, 0, 0 + db 4, 5, 0, 0, 0, 0, 0, 0 + db 6, 7, 4, 5, 0, 0, 0, 0 + db 2, 3, 0, 0, 0, 0, 0, 0 + db 6, 7, 2, 3, 0, 0, 0, 0 + db 4, 5, 2, 3, 0, 0, 0, 0 + db 6, 7, 4, 5, 2, 3, 0, 0 + db 0, 1, 0, 0, 0, 0, 0, 0 + db 6, 7, 0, 1, 0, 0, 0, 0 + db 4, 5, 0, 1, 0, 0, 0, 0 + db 6, 7, 4, 5, 0, 1, 0, 0 + db 2, 3, 0, 1, 0, 0, 0, 0 + db 6, 7, 2, 3, 0, 1, 0, 0 + db 4, 5, 2, 3, 0, 1, 0, 0 + db 6, 7, 4, 5, 2, 3, 0, 1 + + +%ifdef X86_32 + +align 16 +sse2_b8 db 8, 8, 8, 8, 8, 8, 8, 8 + +ALIGN 16 +sse2_b_1 db -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1 + +align 16 +byte_1pos_table: + db 0,0,0,0,0,0,0,0, ;0 + db 0,0,0,0,0,0,0,1, ;1 + db 1,0,0,0,0,0,0,1, ;2 + db 1,0,0,0,0,0,0,2, ;3 + db 2,0,0,0,0,0,0,1, ;4 + db 2,0,0,0,0,0,0,2, ;5 + db 2,1,0,0,0,0,0,2, ;6 + db 2,1,0,0,0,0,0,3, ;7 + db 3,0,0,0,0,0,0,1, ;8 + db 3,0,0,0,0,0,0,2, ;9 + db 3,1,0,0,0,0,0,2, ;10 + db 3,1,0,0,0,0,0,3, ;11 + db 3,2,0,0,0,0,0,2, ;12 + db 3,2,0,0,0,0,0,3, ;13 + db 3,2,1,0,0,0,0,3, ;14 + db 3,2,1,0,0,0,0,4, ;15 + db 4,0,0,0,0,0,0,1, ;16 + db 4,0,0,0,0,0,0,2, ;17 + db 4,1,0,0,0,0,0,2, ;18 + db 4,1,0,0,0,0,0,3, ;19 + db 4,2,0,0,0,0,0,2, ;20 + db 4,2,0,0,0,0,0,3, ;21 + db 4,2,1,0,0,0,0,3, ;22 + db 4,2,1,0,0,0,0,4, ;23 + db 4,3,0,0,0,0,0,2, ;24 + db 4,3,0,0,0,0,0,3, ;25 + db 4,3,1,0,0,0,0,3, ;26 + db 4,3,1,0,0,0,0,4, ;27 + db 4,3,2,0,0,0,0,3, ;28 + db 4,3,2,0,0,0,0,4, ;29 + db 4,3,2,1,0,0,0,4, ;30 + db 4,3,2,1,0,0,0,5, ;31 + db 5,0,0,0,0,0,0,1, ;32 + db 5,0,0,0,0,0,0,2, ;33 + db 5,1,0,0,0,0,0,2, ;34 + db 5,1,0,0,0,0,0,3, ;35 + db 5,2,0,0,0,0,0,2, ;36 + db 5,2,0,0,0,0,0,3, ;37 + db 5,2,1,0,0,0,0,3, ;38 + db 5,2,1,0,0,0,0,4, ;39 + db 5,3,0,0,0,0,0,2, ;40 + db 5,3,0,0,0,0,0,3, ;41 + db 5,3,1,0,0,0,0,3, ;42 + db 5,3,1,0,0,0,0,4, ;43 + db 5,3,2,0,0,0,0,3, ;44 + db 5,3,2,0,0,0,0,4, ;45 + db 5,3,2,1,0,0,0,4, ;46 + db 5,3,2,1,0,0,0,5, ;47 + db 5,4,0,0,0,0,0,2, ;48 + db 5,4,0,0,0,0,0,3, ;49 + db 5,4,1,0,0,0,0,3, ;50 + db 5,4,1,0,0,0,0,4, ;51 + db 5,4,2,0,0,0,0,3, ;52 + db 5,4,2,0,0,0,0,4, ;53 + db 5,4,2,1,0,0,0,4, ;54 + db 5,4,2,1,0,0,0,5, ;55 + db 5,4,3,0,0,0,0,3, ;56 + db 5,4,3,0,0,0,0,4, ;57 + db 5,4,3,1,0,0,0,4, ;58 + db 5,4,3,1,0,0,0,5, ;59 + db 5,4,3,2,0,0,0,4, ;60 + db 5,4,3,2,0,0,0,5, ;61 + db 5,4,3,2,1,0,0,5, ;62 + db 5,4,3,2,1,0,0,6, ;63 + db 6,0,0,0,0,0,0,1, ;64 + db 6,0,0,0,0,0,0,2, ;65 + db 6,1,0,0,0,0,0,2, ;66 + db 6,1,0,0,0,0,0,3, ;67 + db 6,2,0,0,0,0,0,2, ;68 + db 6,2,0,0,0,0,0,3, ;69 + db 6,2,1,0,0,0,0,3, ;70 + db 6,2,1,0,0,0,0,4, ;71 + db 6,3,0,0,0,0,0,2, ;72 + db 6,3,0,0,0,0,0,3, ;73 + db 6,3,1,0,0,0,0,3, ;74 + db 6,3,1,0,0,0,0,4, ;75 + db 6,3,2,0,0,0,0,3, ;76 + db 6,3,2,0,0,0,0,4, ;77 + db 6,3,2,1,0,0,0,4, ;78 + db 6,3,2,1,0,0,0,5, ;79 + db 6,4,0,0,0,0,0,2, ;80 + db 6,4,0,0,0,0,0,3, ;81 + db 6,4,1,0,0,0,0,3, ;82 + db 6,4,1,0,0,0,0,4, ;83 + db 6,4,2,0,0,0,0,3, ;84 + db 6,4,2,0,0,0,0,4, ;85 + db 6,4,2,1,0,0,0,4, ;86 + db 6,4,2,1,0,0,0,5, ;87 + db 6,4,3,0,0,0,0,3, ;88 + db 6,4,3,0,0,0,0,4, ;89 + db 6,4,3,1,0,0,0,4, ;90 + db 6,4,3,1,0,0,0,5, ;91 + db 6,4,3,2,0,0,0,4, ;92 + db 6,4,3,2,0,0,0,5, ;93 + db 6,4,3,2,1,0,0,5, ;94 + db 6,4,3,2,1,0,0,6, ;95 + db 6,5,0,0,0,0,0,2, ;96 + db 6,5,0,0,0,0,0,3, ;97 + db 6,5,1,0,0,0,0,3, ;98 + db 6,5,1,0,0,0,0,4, ;99 + db 6,5,2,0,0,0,0,3, ;100 + db 6,5,2,0,0,0,0,4, ;101 + db 6,5,2,1,0,0,0,4, ;102 + db 6,5,2,1,0,0,0,5, ;103 + db 6,5,3,0,0,0,0,3, ;104 + db 6,5,3,0,0,0,0,4, ;105 + db 6,5,3,1,0,0,0,4, ;106 + db 6,5,3,1,0,0,0,5, ;107 + db 6,5,3,2,0,0,0,4, ;108 + db 6,5,3,2,0,0,0,5, ;109 + db 6,5,3,2,1,0,0,5, ;110 + db 6,5,3,2,1,0,0,6, ;111 + db 6,5,4,0,0,0,0,3, ;112 + db 6,5,4,0,0,0,0,4, ;113 + db 6,5,4,1,0,0,0,4, ;114 + db 6,5,4,1,0,0,0,5, ;115 + db 6,5,4,2,0,0,0,4, ;116 + db 6,5,4,2,0,0,0,5, ;117 + db 6,5,4,2,1,0,0,5, ;118 + db 6,5,4,2,1,0,0,6, ;119 + db 6,5,4,3,0,0,0,4, ;120 + db 6,5,4,3,0,0,0,5, ;121 + db 6,5,4,3,1,0,0,5, ;122 + db 6,5,4,3,1,0,0,6, ;123 + db 6,5,4,3,2,0,0,5, ;124 + db 6,5,4,3,2,0,0,6, ;125 + db 6,5,4,3,2,1,0,6, ;126 + db 6,5,4,3,2,1,0,7, ;127 + db 7,0,0,0,0,0,0,1, ;128 + db 7,0,0,0,0,0,0,2, ;129 + db 7,1,0,0,0,0,0,2, ;130 + db 7,1,0,0,0,0,0,3, ;131 + db 7,2,0,0,0,0,0,2, ;132 + db 7,2,0,0,0,0,0,3, ;133 + db 7,2,1,0,0,0,0,3, ;134 + db 7,2,1,0,0,0,0,4, ;135 + db 7,3,0,0,0,0,0,2, ;136 + db 7,3,0,0,0,0,0,3, ;137 + db 7,3,1,0,0,0,0,3, ;138 + db 7,3,1,0,0,0,0,4, ;139 + db 7,3,2,0,0,0,0,3, ;140 + db 7,3,2,0,0,0,0,4, ;141 + db 7,3,2,1,0,0,0,4, ;142 + db 7,3,2,1,0,0,0,5, ;143 + db 7,4,0,0,0,0,0,2, ;144 + db 7,4,0,0,0,0,0,3, ;145 + db 7,4,1,0,0,0,0,3, ;146 + db 7,4,1,0,0,0,0,4, ;147 + db 7,4,2,0,0,0,0,3, ;148 + db 7,4,2,0,0,0,0,4, ;149 + db 7,4,2,1,0,0,0,4, ;150 + db 7,4,2,1,0,0,0,5, ;151 + db 7,4,3,0,0,0,0,3, ;152 + db 7,4,3,0,0,0,0,4, ;153 + db 7,4,3,1,0,0,0,4, ;154 + db 7,4,3,1,0,0,0,5, ;155 + db 7,4,3,2,0,0,0,4, ;156 + db 7,4,3,2,0,0,0,5, ;157 + db 7,4,3,2,1,0,0,5, ;158 + db 7,4,3,2,1,0,0,6, ;159 + db 7,5,0,0,0,0,0,2, ;160 + db 7,5,0,0,0,0,0,3, ;161 + db 7,5,1,0,0,0,0,3, ;162 + db 7,5,1,0,0,0,0,4, ;163 + db 7,5,2,0,0,0,0,3, ;164 + db 7,5,2,0,0,0,0,4, ;165 + db 7,5,2,1,0,0,0,4, ;166 + db 7,5,2,1,0,0,0,5, ;167 + db 7,5,3,0,0,0,0,3, ;168 + db 7,5,3,0,0,0,0,4, ;169 + db 7,5,3,1,0,0,0,4, ;170 + db 7,5,3,1,0,0,0,5, ;171 + db 7,5,3,2,0,0,0,4, ;172 + db 7,5,3,2,0,0,0,5, ;173 + db 7,5,3,2,1,0,0,5, ;174 + db 7,5,3,2,1,0,0,6, ;175 + db 7,5,4,0,0,0,0,3, ;176 + db 7,5,4,0,0,0,0,4, ;177 + db 7,5,4,1,0,0,0,4, ;178 + db 7,5,4,1,0,0,0,5, ;179 + db 7,5,4,2,0,0,0,4, ;180 + db 7,5,4,2,0,0,0,5, ;181 + db 7,5,4,2,1,0,0,5, ;182 + db 7,5,4,2,1,0,0,6, ;183 + db 7,5,4,3,0,0,0,4, ;184 + db 7,5,4,3,0,0,0,5, ;185 + db 7,5,4,3,1,0,0,5, ;186 + db 7,5,4,3,1,0,0,6, ;187 + db 7,5,4,3,2,0,0,5, ;188 + db 7,5,4,3,2,0,0,6, ;189 + db 7,5,4,3,2,1,0,6, ;190 + db 7,5,4,3,2,1,0,7, ;191 + db 7,6,0,0,0,0,0,2, ;192 + db 7,6,0,0,0,0,0,3, ;193 + db 7,6,1,0,0,0,0,3, ;194 + db 7,6,1,0,0,0,0,4, ;195 + db 7,6,2,0,0,0,0,3, ;196 + db 7,6,2,0,0,0,0,4, ;197 + db 7,6,2,1,0,0,0,4, ;198 + db 7,6,2,1,0,0,0,5, ;199 + db 7,6,3,0,0,0,0,3, ;200 + db 7,6,3,0,0,0,0,4, ;201 + db 7,6,3,1,0,0,0,4, ;202 + db 7,6,3,1,0,0,0,5, ;203 + db 7,6,3,2,0,0,0,4, ;204 + db 7,6,3,2,0,0,0,5, ;205 + db 7,6,3,2,1,0,0,5, ;206 + db 7,6,3,2,1,0,0,6, ;207 + db 7,6,4,0,0,0,0,3, ;208 + db 7,6,4,0,0,0,0,4, ;209 + db 7,6,4,1,0,0,0,4, ;210 + db 7,6,4,1,0,0,0,5, ;211 + db 7,6,4,2,0,0,0,4, ;212 + db 7,6,4,2,0,0,0,5, ;213 + db 7,6,4,2,1,0,0,5, ;214 + db 7,6,4,2,1,0,0,6, ;215 + db 7,6,4,3,0,0,0,4, ;216 + db 7,6,4,3,0,0,0,5, ;217 + db 7,6,4,3,1,0,0,5, ;218 + db 7,6,4,3,1,0,0,6, ;219 + db 7,6,4,3,2,0,0,5, ;220 + db 7,6,4,3,2,0,0,6, ;221 + db 7,6,4,3,2,1,0,6, ;222 + db 7,6,4,3,2,1,0,7, ;223 + db 7,6,5,0,0,0,0,3, ;224 + db 7,6,5,0,0,0,0,4, ;225 + db 7,6,5,1,0,0,0,4, ;226 + db 7,6,5,1,0,0,0,5, ;227 + db 7,6,5,2,0,0,0,4, ;228 + db 7,6,5,2,0,0,0,5, ;229 + db 7,6,5,2,1,0,0,5, ;230 + db 7,6,5,2,1,0,0,6, ;231 + db 7,6,5,3,0,0,0,4, ;232 + db 7,6,5,3,0,0,0,5, ;233 + db 7,6,5,3,1,0,0,5, ;234 + db 7,6,5,3,1,0,0,6, ;235 + db 7,6,5,3,2,0,0,5, ;236 + db 7,6,5,3,2,0,0,6, ;237 + db 7,6,5,3,2,1,0,6, ;238 + db 7,6,5,3,2,1,0,7, ;239 + db 7,6,5,4,0,0,0,4, ;240 + db 7,6,5,4,0,0,0,5, ;241 + db 7,6,5,4,1,0,0,5, ;242 + db 7,6,5,4,1,0,0,6, ;243 + db 7,6,5,4,2,0,0,5, ;244 + db 7,6,5,4,2,0,0,6, ;245 + db 7,6,5,4,2,1,0,6, ;246 + db 7,6,5,4,2,1,0,7, ;247 + db 7,6,5,4,3,0,0,5, ;248 + db 7,6,5,4,3,0,0,6, ;249 + db 7,6,5,4,3,1,0,6, ;250 + db 7,6,5,4,3,1,0,7, ;251 + db 7,6,5,4,3,2,0,6, ;252 + db 7,6,5,4,3,2,0,7, ;253 + db 7,6,5,4,3,2,1,7, ;254 + db 7,6,5,4,3,2,1,8, ;255 + +%endif ; X86_32 + +;*********************************************************************** +; Code +;*********************************************************************** +SECTION .text + + +%ifdef X86_32 + +;*********************************************************************** +;int32_t CavlcParamCal_sse2(int16_t*coffLevel, uint8_t* run, int16_t *Level, int32_t* total_coeffs , int32_t endIdx); +;*********************************************************************** +WELS_EXTERN CavlcParamCal_sse2 + push ebx + push edi + push esi + %assign push_num 3 + INIT_X86_32_PIC ebp + + mov eax, arg1 ;coffLevel + mov edi, arg3 ;Level + mov ebx, arg5 ;endIdx + cmp ebx, 3 + jne .Level16 + pxor xmm1, xmm1 + movq xmm0, [eax] ; removed QWORD + jmp .Cal_begin +.Level16: + movdqa xmm0, [eax] + movdqa xmm1, [eax+16] +.Cal_begin: + movdqa xmm2, xmm0 + packsswb xmm0, xmm1 + movdqa xmm4, xmm0 + pxor xmm3, xmm3 + pcmpgtb xmm0, xmm3 + pcmpgtb xmm3, xmm4 + por xmm0, xmm3 + pmovmskb edx, xmm0 + cmp edx, 0 + je near .return + movdqa xmm6, [pic(sse2_b_1)] + pcmpeqw xmm7, xmm7 ;generate -1 + mov ebx, 0xff + ;pinsrw xmm6, ebx, 3 + + mov bl, dh + + lea ebx, [pic(byte_1pos_table+8*ebx)] + movq xmm0, [ebx] + pextrw ecx, xmm0, 3 + shr ecx, 8 + mov dh, cl + +.loopHighFind0: + cmp ecx, 0 + je .loopHighFind0End + ;mov esi, [ebx] + ;and esi, 0xff + movzx esi, byte [ebx] + add esi, 8 + mov esi, [eax+2*esi] + mov [edi], si + add edi, 2 + ;add ebx, 1 + inc ebx + dec ecx + jmp .loopHighFind0 +.loopHighFind0End: + mov cl, dh + cmp cl, 8 + pand xmm0, xmm6 + jne .LowByteFind0 + sub edi, 2 + mov esi, [eax+16] + mov [edi], esi + add edi, 2 +.LowByteFind0: + and edx, 0xff + lea ebx, [pic(byte_1pos_table+8*edx)] + movq xmm1, [ebx] + pextrw esi, xmm1, 3 + or esi, 0xff + or ecx, 0xff00 + and ecx, esi + shr esi, 8 + pand xmm1, xmm6 +.loopLowFind0: + cmp esi, 0 + je .loopLowFind0End + ;mov edx, [ebx] + ;and edx, 0xff + movzx edx, byte [ebx] + mov edx, [eax+2*edx] + mov [edi], dx + add edi, 2 + ;add ebx, 1 + inc ebx + dec esi + jmp .loopLowFind0 +.loopLowFind0End: + cmp ch, 8 + jne .getLevelEnd + sub edi, 2 + mov edx, [eax] + mov [edi], dx +.getLevelEnd: + mov edx, arg4 ;total_coeffs + ;mov ebx, ecx + ;and ebx, 0xff + movzx ebx, byte cl + add cl, ch + mov [edx], cl +;getRun + movq xmm5, [pic(sse2_b8)] + paddb xmm0, xmm5 + pxor xmm2, xmm2 + pxor xmm3, xmm3 + mov eax, 8 + sub eax, ebx + shl eax, 3 + shl ebx, 3 + pinsrw xmm2, ebx, 0 + pinsrw xmm3, eax, 0 + psllq xmm0, xmm3 + psrlq xmm0, xmm3 + movdqa xmm4, xmm1 + psllq xmm1, xmm2 + psrlq xmm4, xmm3 + punpcklqdq xmm1, xmm4 + por xmm0, xmm1 + + pextrw eax, xmm0, 0 + and eax, 0xff + inc eax + sub al, cl + movdqa xmm1, xmm0 + paddb xmm1, xmm7 + psrldq xmm0, 1 + psubb xmm1, xmm0 + mov ecx, arg2 ;run + movdqa [ecx], xmm1 +;getRunEnd +.return: + DEINIT_X86_32_PIC + pop esi + pop edi + pop ebx + ret +%endif ;%ifdef X86_32 + +;*********************************************************************** +;int32_t CavlcParamCal_sse42(int16_t*coffLevel, uint8_t* run, int16_t *Level, int32_t* total_coeffs , int32_t endIdx); +;*********************************************************************** + +WELS_EXTERN CavlcParamCal_sse42 +%define i_endidxd dword arg5d + +%ifdef X86_32 + push r3 + push r4 + push r5 + push r6 + %assign push_num 4 +%ifdef X86_32_PICASM + %define p_total_coeffs r1 +%else + %define p_total_coeffs r0 +%endif + %define r_tmp r1 + %define r_tmpd r1d + %define r_tmpb r1b + %define p_level r2 + %define p_coeff_level r3 + %define p_run r6 + %define r_mask r5 + %define r_maskd r5d + %define p_shufb_lut pic(wels_cavlc_param_cal_shufb_lut) + %define p_run_lut pic(wels_cavlc_param_cal_run_lut) + mov p_coeff_level, arg1 + mov p_run, arg2 + mov p_level, arg3 + mov p_total_coeffs, arg4 +%elifdef WIN64 + push rbx + %assign push_num 1 + %define p_coeff_level r0 + %define p_run r1 + %define p_level r2 + %define p_total_coeffs r3 + %define r_mask rbx + %define r_maskd ebx + %define p_shufb_lut r5 + %define p_run_lut (p_shufb_lut + (wels_cavlc_param_cal_run_lut - wels_cavlc_param_cal_shufb_lut)) + lea p_shufb_lut, [wels_cavlc_param_cal_shufb_lut] + ; Free up rcx/ecx because only cl is accepted as shift amount operand. + mov r6, r0 + %undef p_coeff_level + %define p_coeff_level r6 + %define r_tmp r0 + %define r_tmpd r0d + %define r_tmpb r0b +%else + %assign push_num 0 + %define p_coeff_level r0 + %define p_run r1 + %define p_level r2 + %define p_total_coeffs r3 + %define r_mask rax + %define r_maskd eax + %define p_shufb_lut r5 + %define i_total_zeros r6 + %define p_run_lut (p_shufb_lut + (wels_cavlc_param_cal_run_lut - wels_cavlc_param_cal_shufb_lut)) + lea p_shufb_lut, [wels_cavlc_param_cal_shufb_lut] +%endif + INIT_X86_32_PIC_NOPRESERVE r0 + + ; Acquire a bitmask indicating which words are non-zero. + ; Assume p_coeff_level is 16-byte-aligned and at least 32 bytes if endIdx > 3. + ; Otherwise, assume 8 bytes available. Assume that input beyond endIdx is zero. + ; Assumptions are taken from previous implementations. + pxor xmm1, xmm1 + cmp i_endidxd, 3 + jg .load16 + movq xmm0, [p_coeff_level] + packsswb xmm0, xmm1 + jmp .load_done +.load16: + movdqa xmm0, [p_coeff_level] + packsswb xmm0, [p_coeff_level + 16] +.load_done: + movdqa [p_run], xmm1 ; Zero-initialize because we may read back implied zeros. + pcmpeqb xmm0, xmm1 + pshufb xmm0, [pic(wels_shufb_rev)] + pmovmskb r_maskd, xmm0 + xor r_maskd, 0FFFFh +%undef i_endidxd +%define r_tmp2 r4 +%define r_tmp2d r4d + popcnt r_tmp2d, r_maskd + mov [p_total_coeffs], r_tmp2d + ; Recycle p_total_coeffs. +%ifidni p_total_coeffs, rcx + %define r_tmp rcx + %define r_tmpd ecx + %define r_tmpb cl +%else + %xdefine i_total_zeros p_total_coeffs +%endif +%undef p_total_coeffs +%ifdef X86_32_PICASM + push r_tmp2 + %undef i_total_zeros + %define i_total_zeros dword [esp] +%else + mov i_total_zeros, r_tmp2 +%endif + jz .done + bsf r_tmpd, r_maskd ; Find first set bit. + lea r_tmp2, [r_tmp2 + r_tmp - 16] + neg r_tmp2 + mov i_total_zeros, r_tmp2 + ; Skip trailing zeros. + ; Restrict to multiples of 4 to retain alignment and avoid out-of-bound stores. + and r_tmpd, -4 + shr r_maskd, r_tmpb + add r_tmpd, r_tmpd + sub p_coeff_level, r_tmp + ; Handle first quadruple containing a non-zero value. + mov r_tmp, r_mask + and r_tmpd, 0Fh + movq xmm0, [p_coeff_level + 24] + movq xmm1, [p_shufb_lut + 8 * r_tmp] + pshufb xmm0, xmm1 + mov r_tmp2d, [p_run_lut + 4 * r_tmp] + shr r_tmp2d, 8 ; Skip initial zero run. + movlps [p_level], xmm0 ; Store levels for the first quadruple. + mov [p_run], r_tmp2d ; Store accompanying zero runs thus far. + shr r_maskd, 4 + jz .done +.loop: + ; Increment pointers. + popcnt r_tmpd, r_tmpd ; Number of non-zero values handled. + lea p_level, [p_level + 2 * r_tmp] + add p_run, r_tmp + ; Handle next quadruple. + mov r_tmp, r_mask + and r_tmpd, 0Fh + movq xmm0, [p_coeff_level + 16] + sub p_coeff_level, 8 + movq xmm1, [p_shufb_lut + 8 * r_tmp] + pshufb xmm0, xmm1 + movzx r_tmp2d, byte [p_run - 1] + add r_tmp2d, [p_run_lut + 4 * r_tmp] ; Add to previous run and get eventual new runs. + movlps [p_level], xmm0 ; Store levels (potentially none). + mov [p_run - 1], r_tmp2d ; Update previous run and store eventual new runs. + shr r_maskd, 4 + jnz .loop +.done: +%ifnidni retrq, i_total_zeros + %ifdef X86_32_PICASM + pop retrq + %else + mov retrq, i_total_zeros + %endif +%endif + DEINIT_X86_32_PIC +%ifdef X86_32 + pop r6 + pop r5 + pop r4 + pop r3 +%elifdef WIN64 + pop rbx +%endif + ret +%undef p_coeff_level +%undef p_run +%undef p_level +%undef i_total_zeros +%undef r_mask +%undef r_maskd +%undef r_tmp +%undef r_tmpd +%undef r_tmpb +%undef r_tmp2 +%undef r_tmp2d +%undef p_shufb_lut +%undef p_run_lut diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/dct.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/dct.asm new file mode 100644 index 000000000..68291e224 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/dct.asm @@ -0,0 +1,108 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* ?Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* ?Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* dct.asm +;* +;* History +;* 8/4/2009 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + +SECTION .text + +;*********************************************************************** +; SSE2 functions +;*********************************************************************** + +%macro SSE2_SumSubD 3 + movdqa %3, %2 + paddd %2, %1 + psubd %1, %3 +%endmacro + +%macro SSE2_SumSubDiv2D 4 + paddd %1, %2 + paddd %1, %3 + psrad %1, 1 + movdqa %4, %1 + psubd %4, %2 +%endmacro +%macro SSE2_Load4Col 5 + movsx r2, WORD[%5] + movd %1, r2d + movsx r2, WORD[%5 + 0x20] + movd %2, r2d + punpckldq %1, %2 + movsx r2, WORD[%5 + 0x80] + movd %3, r2d + movsx r2, WORD[%5 + 0xa0] + movd %4, r2d + punpckldq %3, %4 + punpcklqdq %1, %3 +%endmacro + +;*********************************************************************** +;void WelsHadamardT4Dc_sse2( int16_t *luma_dc, int16_t *pDct) +;*********************************************************************** +WELS_EXTERN WelsHadamardT4Dc_sse2 + %assign push_num 0 + LOAD_2_PARA + PUSH_XMM 8 + SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, r1 + SSE2_Load4Col xmm2, xmm5, xmm6, xmm0, r1 + 0x40 + SSE2_Load4Col xmm3, xmm5, xmm6, xmm0, r1 + 0x100 + SSE2_Load4Col xmm4, xmm5, xmm6, xmm0, r1 + 0x140 + + SSE2_SumSubD xmm1, xmm2, xmm7 + SSE2_SumSubD xmm3, xmm4, xmm7 + SSE2_SumSubD xmm2, xmm4, xmm7 + SSE2_SumSubD xmm1, xmm3, xmm7 + + SSE2_Trans4x4D xmm4, xmm2, xmm1, xmm3, xmm5 ; pOut: xmm4,xmm3,xmm5,xmm1 + + SSE2_SumSubD xmm4, xmm3, xmm7 + SSE2_SumSubD xmm5, xmm1, xmm7 + + WELS_DD1 xmm6 + SSE2_SumSubDiv2D xmm3, xmm1, xmm6, xmm0 ; pOut: xmm3 = (xmm3+xmm1+1)/2, xmm0 = (xmm3-xmm1+1)/2 + SSE2_SumSubDiv2D xmm4, xmm5, xmm6, xmm1 ; pOut: xmm4 = (xmm4+xmm5+1)/2, xmm1 = (xmm4-xmm5+1)/2 + SSE2_Trans4x4D xmm3, xmm0, xmm1, xmm4, xmm2 ; pOut: xmm3,xmm4,xmm2,xmm1 + + packssdw xmm3, xmm4 + packssdw xmm2, xmm1 + movdqa [r0+ 0], xmm3 + movdqa [r0+16], xmm2 + + POP_XMM + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/intra_pred.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/intra_pred.asm new file mode 100644 index 000000000..edb2cd318 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/intra_pred.asm @@ -0,0 +1,1129 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* intra_pred.asm +;* +;* Abstract +;* sse2 function for intra predict operations +;* +;* History +;* 18/09/2009 Created +;* +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;*********************************************************************** +; Local Data (Read Only) +;*********************************************************************** + +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +align 16 +sse2_plane_inc_minus dw -7, -6, -5, -4, -3, -2, -1, 0 +align 16 +sse2_plane_inc dw 1, 2, 3, 4, 5, 6, 7, 8 +align 16 +sse2_plane_dec dw 8, 7, 6, 5, 4, 3, 2, 1 + +; for chroma plane mode +sse2_plane_inc_c dw 1, 2, 3, 4 +sse2_plane_dec_c dw 4, 3, 2, 1 +align 16 +sse2_plane_mul_b_c dw -3, -2, -1, 0, 1, 2, 3, 4 + +align 16 +mmx_01bytes: times 16 db 1 + +align 16 +mmx_0x02: dw 0x02, 0x00, 0x00, 0x00 + + +;*********************************************************************** +; macros +;*********************************************************************** +;dB 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +;%1 will keep the last result +%macro SSE_DB_1_2REG 2 + pxor %1, %1 + pcmpeqw %2, %2 + psubb %1, %2 +%endmacro + +;xmm0, xmm1, xmm2, eax, ecx +;lower 64 bits of xmm0 save the result +%macro SSE2_PRED_H_4X4_TWO_LINE 5 + movd %1, [%4-1] + movdqa %3, %1 + punpcklbw %1, %3 + movdqa %3, %1 + punpcklbw %1, %3 + + ;add %4, %5 + movd %2, [%4+%5-1] + movdqa %3, %2 + punpcklbw %2, %3 + movdqa %3, %2 + punpcklbw %2, %3 + punpckldq %1, %2 +%endmacro + +%macro SUMW_HORIZON1 2 + movdqa %2, %1 + psrldq %2, 8 + paddusw %1, %2 + movdqa %2, %1 + psrldq %2, 4 + paddusw %1, %2 + movdqa %2, %1 + psrldq %2, 2 + paddusw %1, %2 +%endmacro + +%macro LOAD_COLUMN 6 + movd %1, [%5] + movd %2, [%5+%6] + punpcklbw %1, %2 + lea %5, [%5+2*%6] + movd %3, [%5] + movd %2, [%5+%6] + punpcklbw %3, %2 + punpcklwd %1, %3 + lea %5, [%5+2*%6] + movd %4, [%5] + movd %2, [%5+%6] + punpcklbw %4, %2 + lea %5, [%5+2*%6] + movd %3, [%5] + movd %2, [%5+%6] + lea %5, [%5+2*%6] + punpcklbw %3, %2 + punpcklwd %4, %3 + punpckhdq %1, %4 +%endmacro + +%macro SUMW_HORIZON 3 + movhlps %2, %1 ; x2 = xx xx xx xx d7 d6 d5 d4 + paddw %1, %2 ; x1 = xx xx xx xx d37 d26 d15 d04 + punpcklwd %1, %3 ; x1 = d37 d26 d15 d04 + movhlps %2, %1 ; x2 = xxxx xxxx d37 d26 + paddd %1, %2 ; x1 = xxxx xxxx d1357 d0246 + pshuflw %2, %1, 0x4e ; x2 = xxxx xxxx d0246 d1357 + paddd %1, %2 ; x1 = xxxx xxxx xxxx d01234567 +%endmacro + + +%macro COPY_16_TIMES 2 + movdqa %2, [%1-16] + psrldq %2, 15 + pmuludq %2, [pic(mmx_01bytes)] + pshufd %2, %2, 0 +%endmacro + +%macro COPY_16_TIMESS 3 + movdqa %2, [%1+%3-16] + psrldq %2, 15 + pmuludq %2, [pic(mmx_01bytes)] + pshufd %2, %2, 0 +%endmacro + +%macro LOAD_COLUMN_C 6 + movd %1, [%5] + movd %2, [%5+%6] + punpcklbw %1,%2 + lea %5, [%5+2*%6] + movd %3, [%5] + movd %2, [%5+%6] + punpcklbw %3, %2 + punpckhwd %1, %3 + lea %5, [%5+2*%6] +%endmacro + +%macro LOAD_2_LEFT_AND_ADD 0 + lea r1, [r1+2*r2] + movzx r4, byte [r1-0x01] + add r3, r4 + movzx r4, byte [r1+r2-0x01] + add r3, r4 +%endmacro + +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + +;*********************************************************************** +; void WelsI4x4LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride) +; +; pred must align to 16 +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredH_sse2 + push r3 + %assign push_num 1 + INIT_X86_32_PIC r4 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + movzx r3, byte [r1-1] + movd xmm0, r3d + pmuludq xmm0, [pic(mmx_01bytes)] + + movzx r3, byte [r1+r2-1] + movd xmm1, r3d + pmuludq xmm1, [pic(mmx_01bytes)] + + unpcklps xmm0, xmm1 + + lea r1, [r1+r2*2] + movzx r3, byte [r1-1] + movd xmm2, r3d + pmuludq xmm2, [pic(mmx_01bytes)] + + movzx r3, byte [r1+r2-1] + movd xmm3, r3d + pmuludq xmm3, [pic(mmx_01bytes)] + + unpcklps xmm2, xmm3 + unpcklpd xmm0, xmm2 + + movdqa [r0], xmm0 + DEINIT_X86_32_PIC + pop r3 + ret + +;*********************************************************************** +; void WelsI16x16LumaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); +;*********************************************************************** +WELS_EXTERN WelsI16x16LumaPredPlane_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_3_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + sub r1, 1 + sub r1, r2 + + ;for H + pxor xmm7, xmm7 + movq xmm0, [r1] + movdqa xmm5, [pic(sse2_plane_dec)] + punpcklbw xmm0, xmm7 + pmullw xmm0, xmm5 + movq xmm1, [r1 + 9] + movdqa xmm6, [pic(sse2_plane_inc)] + punpcklbw xmm1, xmm7 + pmullw xmm1, xmm6 + psubw xmm1, xmm0 + + SUMW_HORIZON xmm1,xmm0,xmm2 + movd r3d, xmm1 ; H += (i + 1) * (top[8 + i] - top[6 - i]); + movsx r3, r3w + imul r3, 5 + add r3, 32 + sar r3, 6 ; b = (5 * H + 32) >> 6; + SSE2_Copy8Times xmm1, r3d ; xmm1 = b,b,b,b,b,b,b,b + + movzx r4, BYTE [r1+16] + sub r1, 3 + LOAD_COLUMN xmm0, xmm2, xmm3, xmm4, r1, r2 + + add r1, 3 + movzx r3, BYTE [r1+8*r2] + add r4, r3 + shl r4, 4 ; a = (left[15*stride] + top[15]) << 4; + + sub r1, 3 + add r1, r2 + LOAD_COLUMN xmm7, xmm2, xmm3, xmm4, r1, r2 + pxor xmm4, xmm4 + punpckhbw xmm0, xmm4 + pmullw xmm0, xmm5 + punpckhbw xmm7, xmm4 + pmullw xmm7, xmm6 + psubw xmm7, xmm0 + + SUMW_HORIZON xmm7,xmm0,xmm2 + movd r3d, xmm7 ; V + movsx r3, r3w + imul r3, 5 + add r3, 32 + sar r3, 6 ; c = (5 * V + 32) >> 6; + SSE2_Copy8Times xmm4, r3d ; xmm4 = c,c,c,c,c,c,c,c + + add r4, 16 + imul r3, -7 + add r3, r4 ; s = a + 16 + (-7)*c + SSE2_Copy8Times xmm0, r3d ; xmm0 = s,s,s,s,s,s,s,s + + xor r3, r3 + movdqa xmm5, [pic(sse2_plane_inc_minus)] + +get_i16x16_luma_pred_plane_sse2_1: + movdqa xmm2, xmm1 + pmullw xmm2, xmm5 + paddw xmm2, xmm0 + psraw xmm2, 5 + movdqa xmm3, xmm1 + pmullw xmm3, xmm6 + paddw xmm3, xmm0 + psraw xmm3, 5 + packuswb xmm2, xmm3 + movdqa [r0], xmm2 + paddw xmm0, xmm4 + add r0, 16 + inc r3 + cmp r3, 16 + jnz get_i16x16_luma_pred_plane_sse2_1 + POP_XMM + DEINIT_X86_32_PIC + pop r4 + pop r3 + ret + +;*********************************************************************** +; void WelsIChromaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); +;*********************************************************************** +WELS_EXTERN WelsIChromaPredPlane_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_3_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2, r2d + sub r1, 1 + sub r1, r2 + + pxor mm7, mm7 + movq mm0, [r1] + movq mm5, [pic(sse2_plane_dec_c)] + punpcklbw mm0, mm7 + pmullw mm0, mm5 + movq mm1, [r1 + 5] + movq mm6, [pic(sse2_plane_inc_c)] + punpcklbw mm1, mm7 + pmullw mm1, mm6 + psubw mm1, mm0 + + movq2dq xmm1, mm1 + pxor xmm2, xmm2 + SUMW_HORIZON xmm1,xmm0,xmm2 + movd r3d, xmm1 + movsx r3, r3w + imul r3, 17 + add r3, 16 + sar r3, 5 ; b = (17 * H + 16) >> 5; + SSE2_Copy8Times xmm1, r3d ; mm1 = b,b,b,b,b,b,b,b + + movzx r3, BYTE [r1+8] + sub r1, 3 + LOAD_COLUMN_C mm0, mm2, mm3, mm4, r1, r2 + + add r1, 3 + movzx r4, BYTE [r1+4*r2] + add r4, r3 + shl r4, 4 ; a = (left[7*stride] + top[7]) << 4; + + sub r1, 3 + add r1, r2 + LOAD_COLUMN_C mm7, mm2, mm3, mm4, r1, r2 + pxor mm4, mm4 + punpckhbw mm0, mm4 + pmullw mm0, mm5 + punpckhbw mm7, mm4 + pmullw mm7, mm6 + psubw mm7, mm0 + + movq2dq xmm7, mm7 + pxor xmm2, xmm2 + SUMW_HORIZON xmm7,xmm0,xmm2 + movd r3d, xmm7 ; V + movsx r3, r3w + imul r3, 17 + add r3, 16 + sar r3, 5 ; c = (17 * V + 16) >> 5; + SSE2_Copy8Times xmm4, r3d ; mm4 = c,c,c,c,c,c,c,c + + add r4, 16 + imul r3, -3 + add r3, r4 ; s = a + 16 + (-3)*c + SSE2_Copy8Times xmm0, r3d ; xmm0 = s,s,s,s,s,s,s,s + + xor r3, r3 + movdqa xmm5, [pic(sse2_plane_mul_b_c)] + +get_i_chroma_pred_plane_sse2_1: + movdqa xmm2, xmm1 + pmullw xmm2, xmm5 + paddw xmm2, xmm0 + psraw xmm2, 5 + packuswb xmm2, xmm2 + movq [r0], xmm2 + paddw xmm0, xmm4 + add r0, 8 + inc r3 + cmp r3, 8 + jnz get_i_chroma_pred_plane_sse2_1 + POP_XMM + DEINIT_X86_32_PIC + pop r4 + pop r3 + WELSEMMS + ret + +;*********************************************************************** +; 0 |1 |2 |3 |4 | +; 6 |7 |8 |9 |10| +; 11|12|13|14|15| +; 16|17|18|19|20| +; 21|22|23|24|25| +; 7 is the start pixel of current 4x4 block +; pred[7] = ([6]+[0]*2+[1]+2)/4 +; +; void WelsI4x4LumaPredDDR_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride) +; +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredDDR_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + movq mm1,[r1+r2-8] ;get value of 11,decreasing 8 is trying to improve the performance of movq mm1[8] = 11 + movq mm2,[r1-8] ;get value of 6 mm2[8] = 6 + sub r1, r2 ;mov eax to above line of current block(postion of 1) + punpckhbw mm2,[r1-8] ;mm2[8](high 8th byte of mm2) = [0](value of 0), mm2[7]= [6] + movd mm3,[r1] ;get value 1, mm3[1] = [1],mm3[2]=[2],mm3[3]=[3] + punpckhwd mm1,mm2 ;mm1[8]=[0],mm1[7]=[6],mm1[6]=[11] + psllq mm3,18h ;mm3[5]=[1] + psrlq mm1,28h ;mm1[3]=[0],mm1[2]=[6],mm1[1]=[11] + por mm3,mm1 ;mm3[6]=[3],mm3[5]=[2],mm3[4]=[1],mm3[3]=[0],mm3[2]=[6],mm3[1]=[11] + movq mm1,mm3 ;mm1[6]=[3],mm1[5]=[2],mm1[4]=[1],mm1[3]=[0],mm1[2]=[6],mm1[1]=[11] + lea r1,[r1+r2*2-8h] ;set eax point to 12 + movq mm4,[r1+r2] ;get value of 16, mm4[8]=[16] + psllq mm3,8 ;mm3[7]=[3],mm3[6]=[2],mm3[5]=[1],mm3[4]=[0],mm3[3]=[6],mm3[2]=[11],mm3[1]=0 + psrlq mm4,38h ;mm4[1]=[16] + por mm3,mm4 ;mm3[7]=[3],mm3[6]=[2],mm3[5]=[1],mm3[4]=[0],mm3[3]=[6],mm3[2]=[11],mm3[1]=[16] + movq mm2,mm3 ;mm2[7]=[3],mm2[6]=[2],mm2[5]=[1],mm2[4]=[0],mm2[3]=[6],mm2[2]=[11],mm2[1]=[16] + movq mm4,[r1+r2*2] ;mm4[8]=[21] + psllq mm3,8 ;mm3[8]=[3],mm3[7]=[2],mm3[6]=[1],mm3[5]=[0],mm3[4]=[6],mm3[3]=[11],mm3[2]=[16],mm3[1]=0 + psrlq mm4,38h ;mm4[1]=[21] + por mm3,mm4 ;mm3[8]=[3],mm3[7]=[2],mm3[6]=[1],mm3[5]=[0],mm3[4]=[6],mm3[3]=[11],mm3[2]=[16],mm3[1]=[21] + movq mm4,mm3 ;mm4[8]=[3],mm4[7]=[2],mm4[6]=[1],mm4[5]=[0],mm4[4]=[6],mm4[3]=[11],mm4[2]=[16],mm4[1]=[21] + pavgb mm3,mm1 ;mm3=([11]+[21]+1)/2 + pxor mm1,mm4 ;find odd value in the lowest bit of each byte + pand mm1,[pic(mmx_01bytes)] ;set the odd bit + psubusb mm3,mm1 ;decrease 1 from odd bytes + pavgb mm2,mm3 ;mm2=(([11]+[21]+1)/2+1+[16])/2 + + movd [r0+12],mm2 + psrlq mm2,8 + movd [r0+8],mm2 + psrlq mm2,8 + movd [r0+4],mm2 + psrlq mm2,8 + movd [r0],mm2 + DEINIT_X86_32_PIC + WELSEMMS + ret + +;*********************************************************************** +; 0 |1 |2 |3 |4 | +; 5 |6 |7 |8 |9 | +; 10|11|12|13|14| +; 15|16|17|18|19| +; 20|21|22|23|24| +; 6 is the start pixel of current 4x4 block +; pred[6] = ([1]+[2]+[3]+[4]+[5]+[10]+[15]+[20]+4)/8 +; +; void WelsI4x4LumaPredDc_sse2(uint8_t *pred,uint8_t *pRef,int32_t stride) +; +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredDc_sse2 + push r3 + push r4 + %assign push_num 2 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + movzx r4, byte [r1-1h] + sub r1, r2 + movd xmm0, [r1] + pxor xmm1, xmm1 + psadbw xmm0, xmm1 + xor r3, r3 + movd r3d, xmm0 + add r3, r4 + movzx r4, byte [r1+r2*2-1h] + add r3, r4 + + lea r1, [r1+r2*2-1] + movzx r4, byte [r1+r2] + add r3, r4 + + movzx r4, byte [r1+r2*2] + add r3, r4 + add r3, 4 + sar r3, 3 + imul r3, 0x01010101 + + movd xmm0, r3d + pshufd xmm0, xmm0, 0 + movdqa [r0], xmm0 + pop r4 + pop r3 + ret + +;*********************************************************************** +; void WelsIChromaPredH_mmx(uint8_t *pred, uint8_t *pRef, int32_t stride) +; copy 8 pixel of 8 line from left +;*********************************************************************** +%macro MMX_PRED_H_8X8_ONE_LINE 4 + movq %1, [%3-8] + psrlq %1, 38h + + ;pmuludq %1, [mmx_01bytes] ;extend to 4 bytes + pmullw %1, [pic(mmx_01bytes)] + pshufw %1, %1, 0 + movq [%4], %1 +%endmacro + +%macro MMX_PRED_H_8X8_ONE_LINEE 4 + movq %1, [%3+r2-8] + psrlq %1, 38h + + ;pmuludq %1, [mmx_01bytes] ;extend to 4 bytes + pmullw %1, [pic(mmx_01bytes)] + pshufw %1, %1, 0 + movq [%4], %1 +%endmacro + +WELS_EXTERN WelsIChromaPredH_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + movq mm0, [r1-8] + psrlq mm0, 38h + + ;pmuludq mm0, [mmx_01bytes] ;extend to 4 bytes + pmullw mm0, [pic(mmx_01bytes)] + pshufw mm0, mm0, 0 + movq [r0], mm0 + + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r1,r0+8 + + lea r1,[r1+r2*2] + MMX_PRED_H_8X8_ONE_LINE mm0, mm1, r1,r0+16 + + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r1,r0+24 + + lea r1,[r1+r2*2] + MMX_PRED_H_8X8_ONE_LINE mm0, mm1, r1,r0+32 + + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r1,r0+40 + + lea r1,[r1+r2*2] + MMX_PRED_H_8X8_ONE_LINE mm0, mm1, r1,r0+48 + + MMX_PRED_H_8X8_ONE_LINEE mm0, mm1, r1,r0+56 + DEINIT_X86_32_PIC + WELSEMMS + ret + +;*********************************************************************** +; void WelsI4x4LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride) +; copy pixels from top 4 pixels +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredV_sse2 + %assign push_num 0 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movd xmm0, [r1] + pshufd xmm0, xmm0, 0 + movdqa [r0], xmm0 + ret + +;*********************************************************************** +; void WelsIChromaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride) +; copy 8 pixels from top 8 pixels +;*********************************************************************** +WELS_EXTERN WelsIChromaPredV_sse2 + %assign push_num 0 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movq xmm0, [r1] + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm1 + movdqa [r0], xmm0 + movdqa [r0+16], xmm0 + movdqa [r0+32], xmm0 + movdqa [r0+48], xmm0 + ret + +;*********************************************************************** +; lt|t0|t1|t2|t3| +; l0| +; l1| +; l2| +; l3| +; t3 will never been used +; destination: +; |a |b |c |d | +; |e |f |a |b | +; |g |h |e |f | +; |i |j |g |h | + +; a = (1 + lt + l0)>>1 +; e = (1 + l0 + l1)>>1 +; g = (1 + l1 + l2)>>1 +; i = (1 + l2 + l3)>>1 + +; d = (2 + t0 + (t1<<1) + t2)>>2 +; c = (2 + lt + (t0<<1) + t1)>>2 +; b = (2 + l0 + (lt<<1) + t0)>>2 + +; f = (2 + l1 + (l0<<1) + lt)>>2 +; h = (2 + l2 + (l1<<1) + l0)>>2 +; j = (2 + l3 + (l2<<1) + l1)>>2 +; [b a f e h g j i] + [d c b a] --> mov to memory +; +; void WelsI4x4LumaPredHD_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride) +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredHD_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movd mm0, [r1-1] ; mm0 = [xx xx xx xx t2 t1 t0 lt] + psllq mm0, 20h ; mm0 = [t2 t1 t0 lt xx xx xx xx] + + movd mm1, [r1+2*r2-4] + punpcklbw mm1, [r1+r2-4] ; mm1[7] = l0, mm1[6] = l1 + lea r1, [r1+2*r2] + movd mm2, [r1+2*r2-4] + punpcklbw mm2, [r1+r2-4] ; mm2[7] = l2, mm2[6] = l3 + punpckhwd mm2, mm1 ; mm2 = [l0 l1 l2 l3 xx xx xx xx] + psrlq mm2, 20h + pxor mm0, mm2 ; mm0 = [t2 t1 t0 lt l0 l1 l2 l3] + + movq mm1, mm0 + psrlq mm1, 10h ; mm1 = [xx xx t2 t1 t0 lt l0 l1] + movq mm2, mm0 + psrlq mm2, 8h ; mm2 = [xx t2 t1 t0 lt l0 l1 l2] + movq mm3, mm2 + movq mm4, mm1 + pavgb mm1, mm0 + + pxor mm4, mm0 ; find odd value in the lowest bit of each byte + pand mm4, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm1, mm4 ; decrease 1 from odd bytes + + pavgb mm2, mm1 ; mm2 = [xx xx d c b f h j] + + movq mm4, mm0 + pavgb mm3, mm4 ; mm3 = [xx xx xx xx a e g i] + punpcklbw mm3, mm2 ; mm3 = [b a f e h g j i] + + psrlq mm2, 20h + psllq mm2, 30h ; mm2 = [d c 0 0 0 0 0 0] + movq mm4, mm3 + psrlq mm4, 10h ; mm4 = [0 0 b a f e h j] + pxor mm2, mm4 ; mm2 = [d c b a xx xx xx xx] + psrlq mm2, 20h ; mm2 = [xx xx xx xx d c b a] + + movd [r0], mm2 + movd [r0+12], mm3 + psrlq mm3, 10h + movd [r0+8], mm3 + psrlq mm3, 10h + movd [r0+4], mm3 + DEINIT_X86_32_PIC + WELSEMMS + ret + +;*********************************************************************** +; lt|t0|t1|t2|t3| +; l0| +; l1| +; l2| +; l3| +; t3 will never been used +; destination: +; |a |b |c |d | +; |c |d |e |f | +; |e |f |g |g | +; |g |g |g |g | + +; a = (1 + l0 + l1)>>1 +; c = (1 + l1 + l2)>>1 +; e = (1 + l2 + l3)>>1 +; g = l3 + +; b = (2 + l0 + (l1<<1) + l2)>>2 +; d = (2 + l1 + (l2<<1) + l3)>>2 +; f = (2 + l2 + (l3<<1) + l3)>>2 + +; [g g f e d c b a] + [g g g g] --> mov to memory +; +; void WelsI4x4LumaPredHU_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride) +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredHU_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + movd mm0, [r1-4] ; mm0[3] = l0 + punpcklbw mm0, [r1+r2-4] ; mm0[7] = l1, mm0[6] = l0 + lea r1, [r1+2*r2] + movd mm2, [r1-4] ; mm2[3] = l2 + movd mm4, [r1+r2-4] ; mm4[3] = l3 + punpcklbw mm2, mm4 + punpckhwd mm0, mm2 ; mm0 = [l3 l2 l1 l0 xx xx xx xx] + + psrlq mm4, 18h + psllq mm4, 38h ; mm4 = [l3 xx xx xx xx xx xx xx] + psrlq mm0, 8h + pxor mm0, mm4 ; mm0 = [l3 l3 l2 l1 l0 xx xx xx] + + movq mm1, mm0 + psllq mm1, 8h ; mm1 = [l3 l2 l1 l0 xx xx xx xx] + movq mm3, mm1 ; mm3 = [l3 l2 l1 l0 xx xx xx xx] + pavgb mm1, mm0 ; mm1 = [g e c a xx xx xx xx] + + movq mm2, mm0 + psllq mm2, 10h ; mm2 = [l2 l1 l0 xx xx xx xx xx] + movq mm5, mm2 + pavgb mm2, mm0 + + pxor mm5, mm0 ; find odd value in the lowest bit of each byte + pand mm5, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm2, mm5 ; decrease 1 from odd bytes + + pavgb mm2, mm3 ; mm2 = [f d b xx xx xx xx xx] + + psrlq mm2, 8h + pxor mm2, mm4 ; mm2 = [g f d b xx xx xx xx] + + punpckhbw mm1, mm2 ; mm1 = [g g f e d c b a] + punpckhbw mm4, mm4 ; mm4 = [g g xx xx xx xx xx xx] + punpckhbw mm4, mm4 ; mm4 = [g g g g xx xx xx xx] + + psrlq mm4, 20h + movd [r0+12], mm4 + + movd [r0], mm1 + psrlq mm1, 10h + movd [r0+4], mm1 + psrlq mm1, 10h + movd [r0+8], mm1 + DEINIT_X86_32_PIC + WELSEMMS + ret + + + +;*********************************************************************** +; lt|t0|t1|t2|t3| +; l0| +; l1| +; l2| +; l3| +; l3 will never been used +; destination: +; |a |b |c |d | +; |e |f |g |h | +; |i |a |b |c | +; |j |e |f |g | + +; a = (1 + lt + t0)>>1 +; b = (1 + t0 + t1)>>1 +; c = (1 + t1 + t2)>>1 +; d = (1 + t2 + t3)>>1 + +; e = (2 + l0 + (lt<<1) + t0)>>2 +; f = (2 + lt + (t0<<1) + t1)>>2 +; g = (2 + t0 + (t1<<1) + t2)>>2 + +; h = (2 + t1 + (t2<<1) + t3)>>2 +; i = (2 + lt + (l0<<1) + l1)>>2 +; j = (2 + l0 + (l1<<1) + l2)>>2 +; +; void WelsI4x4LumaPredVR_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride) +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredVR_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movq mm0, [r1-1] ; mm0 = [xx xx xx t3 t2 t1 t0 lt] + psllq mm0, 18h ; mm0 = [t3 t2 t1 t0 lt xx xx xx] + + movd mm1, [r1+2*r2-4] + punpcklbw mm1, [r1+r2-4] ; mm1[7] = l0, mm1[6] = l1 + lea r1, [r1+2*r2] + movq mm2, [r1+r2-8] ; mm2[7] = l2 + punpckhwd mm2, mm1 ; mm2 = [l0 l1 l2 xx xx xx xx xx] + psrlq mm2, 28h + pxor mm0, mm2 ; mm0 = [t3 t2 t1 t0 lt l0 l1 l2] + + movq mm1, mm0 + psllq mm1, 8h ; mm1 = [t2 t1 t0 lt l0 l1 l2 xx] + pavgb mm1, mm0 ; mm1 = [d c b a xx xx xx xx] + + movq mm2, mm0 + psllq mm2, 10h ; mm2 = [t1 t0 lt l0 l1 l2 xx xx] + movq mm3, mm2 + pavgb mm2, mm0 + + pxor mm3, mm0 ; find odd value in the lowest bit of each byte + pand mm3, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm2, mm3 ; decrease 1 from odd bytes + + movq mm3, mm0 + psllq mm3, 8h ; mm3 = [t2 t1 t0 lt l0 l1 l2 xx] + pavgb mm3, mm2 ; mm3 = [h g f e i j xx xx] + movq mm2, mm3 + + psrlq mm1, 20h ; mm1 = [xx xx xx xx d c b a] + movd [r0], mm1 + + psrlq mm2, 20h ; mm2 = [xx xx xx xx h g f e] + movd [r0+4], mm2 + + movq mm4, mm3 + psllq mm4, 20h + psrlq mm4, 38h ; mm4 = [xx xx xx xx xx xx xx i] + + movq mm5, mm3 + psllq mm5, 28h + psrlq mm5, 38h ; mm5 = [xx xx xx xx xx xx xx j] + + psllq mm1, 8h + pxor mm4, mm1 ; mm4 = [xx xx xx xx c b a i] + movd [r0+8], mm4 + + psllq mm2, 8h + pxor mm5, mm2 ; mm5 = [xx xx xx xx g f e j] + movd [r0+12], mm5 + DEINIT_X86_32_PIC + WELSEMMS + ret + +;*********************************************************************** +; lt|t0|t1|t2|t3|t4|t5|t6|t7 +; l0| +; l1| +; l2| +; l3| +; lt,t0,t1,t2,t3 will never been used +; destination: +; |a |b |c |d | +; |b |c |d |e | +; |c |d |e |f | +; |d |e |f |g | + +; a = (2 + t0 + t2 + (t1<<1))>>2 +; b = (2 + t1 + t3 + (t2<<1))>>2 +; c = (2 + t2 + t4 + (t3<<1))>>2 +; d = (2 + t3 + t5 + (t4<<1))>>2 + +; e = (2 + t4 + t6 + (t5<<1))>>2 +; f = (2 + t5 + t7 + (t6<<1))>>2 +; g = (2 + t6 + t7 + (t7<<1))>>2 + +; [g f e d c b a] --> mov to memory +; +; void WelsI4x4LumaPredDDL_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride) +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredDDL_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movq mm0, [r1] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0] + movq mm1, mm0 + movq mm2, mm0 + + movq mm3, mm0 + psrlq mm3, 38h + psllq mm3, 38h ; mm3 = [t7 xx xx xx xx xx xx xx] + + psllq mm1, 8h ; mm1 = [t6 t5 t4 t3 t2 t1 t0 xx] + psrlq mm2, 8h + pxor mm2, mm3 ; mm2 = [t7 t7 t6 t5 t4 t3 t2 t1] + + movq mm3, mm1 + pavgb mm1, mm2 + pxor mm3, mm2 ; find odd value in the lowest bit of each byte + pand mm3, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm1, mm3 ; decrease 1 from odd bytes + + pavgb mm0, mm1 ; mm0 = [g f e d c b a xx] + + psrlq mm0, 8h + movd [r0], mm0 + psrlq mm0, 8h + movd [r0+4], mm0 + psrlq mm0, 8h + movd [r0+8], mm0 + psrlq mm0, 8h + movd [r0+12], mm0 + DEINIT_X86_32_PIC + WELSEMMS + ret + + +;*********************************************************************** +; lt|t0|t1|t2|t3|t4|t5|t6|t7 +; l0| +; l1| +; l2| +; l3| +; lt,t0,t1,t2,t3 will never been used +; destination: +; |a |b |c |d | +; |e |f |g |h | +; |b |c |d |i | +; |f |g |h |j | + +; a = (1 + t0 + t1)>>1 +; b = (1 + t1 + t2)>>1 +; c = (1 + t2 + t3)>>1 +; d = (1 + t3 + t4)>>1 +; i = (1 + t4 + t5)>>1 + +; e = (2 + t0 + (t1<<1) + t2)>>2 +; f = (2 + t1 + (t2<<1) + t3)>>2 +; g = (2 + t2 + (t3<<1) + t4)>>2 +; h = (2 + t3 + (t4<<1) + t5)>>2 +; j = (2 + t4 + (t5<<1) + t6)>>2 + +; [i d c b a] + [j h g f e] --> mov to memory +; +; void WelsI4x4LumaPredVL_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride) +;*********************************************************************** +WELS_EXTERN WelsI4x4LumaPredVL_mmx + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movq mm0, [r1] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0] + movq mm1, mm0 + movq mm2, mm0 + + psrlq mm1, 8h ; mm1 = [xx t7 t6 t5 t4 t3 t2 t1] + psrlq mm2, 10h ; mm2 = [xx xx t7 t6 t5 t4 t3 t2] + + movq mm3, mm1 + pavgb mm3, mm0 ; mm3 = [xx xx xx i d c b a] + + movq mm4, mm2 + pavgb mm2, mm0 + pxor mm4, mm0 ; find odd value in the lowest bit of each byte + pand mm4, [pic(mmx_01bytes)] ; set the odd bit + psubusb mm2, mm4 ; decrease 1 from odd bytes + + pavgb mm2, mm1 ; mm2 = [xx xx xx j h g f e] + + movd [r0], mm3 + psrlq mm3, 8h + movd [r0+8], mm3 + + movd [r0+4], mm2 + psrlq mm2, 8h + movd [r0+12], mm2 + DEINIT_X86_32_PIC + WELSEMMS + ret + +;*********************************************************************** +; +; void WelsIChromaPredDc_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride) +;*********************************************************************** +WELS_EXTERN WelsIChromaPredDc_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movq mm0, [r1] + + movzx r3, byte [r1+r2-0x01] ; l1 + lea r1, [r1+2*r2] + movzx r4, byte [r1-0x01] ; l2 + add r3, r4 + movzx r4, byte [r1+r2-0x01] ; l3 + add r3, r4 + lea r1, [r1+2*r2] + movzx r4, byte [r1-0x01] ; l4 + add r3, r4 + movd mm1, r3d ; mm1 = l1+l2+l3+l4 + + movzx r3, byte [r1+r2-0x01] ; l5 + lea r1, [r1+2*r2] + movzx r4, byte [r1-0x01] ; l6 + add r3, r4 + movzx r4, byte [r1+r2-0x01] ; l7 + add r3, r4 + lea r1, [r1+2*r2] + movzx r4, byte [r1-0x01] ; l8 + add r3, r4 + movd mm2, r3d ; mm2 = l5+l6+l7+l8 + + movq mm3, mm0 + psrlq mm0, 0x20 + psllq mm3, 0x20 + psrlq mm3, 0x20 + pxor mm4, mm4 + psadbw mm0, mm4 + psadbw mm3, mm4 ; sum1 = mm3+mm1, sum2 = mm0, sum3 = mm2 + + paddq mm3, mm1 + movq mm1, mm2 + paddq mm1, mm0; ; sum1 = mm3, sum2 = mm0, sum3 = mm2, sum4 = mm1 + + movq mm4, [pic(mmx_0x02)] + + paddq mm0, mm4 + psrlq mm0, 0x02 + + paddq mm2, mm4 + psrlq mm2, 0x02 + + paddq mm3, mm4 + paddq mm3, mm4 + psrlq mm3, 0x03 + + paddq mm1, mm4 + paddq mm1, mm4 + psrlq mm1, 0x03 + + pmuludq mm0, [pic(mmx_01bytes)] + pmuludq mm3, [pic(mmx_01bytes)] + psllq mm0, 0x20 + pxor mm0, mm3 ; mm0 = m_up + + pmuludq mm2, [pic(mmx_01bytes)] + pmuludq mm1, [pic(mmx_01bytes)] + psllq mm1, 0x20 + pxor mm1, mm2 ; mm2 = m_down + + movq [r0], mm0 + movq [r0+0x08], mm0 + movq [r0+0x10], mm0 + movq [r0+0x18], mm0 + + movq [r0+0x20], mm1 + movq [r0+0x28], mm1 + movq [r0+0x30], mm1 + movq [r0+0x38], mm1 + + DEINIT_X86_32_PIC + pop r4 + pop r3 + WELSEMMS + ret + + + +;*********************************************************************** +; +; void WelsI16x16LumaPredDc_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride) +;*********************************************************************** +WELS_EXTERN WelsI16x16LumaPredDc_sse2 + push r3 + push r4 + %assign push_num 2 + INIT_X86_32_PIC r5 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movdqa xmm0, [r1] ; read one row + pxor xmm1, xmm1 + psadbw xmm0, xmm1 + movdqa xmm1, xmm0 + psrldq xmm1, 0x08 + pslldq xmm0, 0x08 + psrldq xmm0, 0x08 + paddw xmm0, xmm1 + + movzx r3, byte [r1+r2-0x01] + movzx r4, byte [r1+2*r2-0x01] + add r3, r4 + lea r1, [r1+r2] + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + LOAD_2_LEFT_AND_ADD + add r3, 0x10 + movd xmm1, r3d + paddw xmm0, xmm1 + psrld xmm0, 0x05 + pmuludq xmm0, [pic(mmx_01bytes)] + pshufd xmm0, xmm0, 0 + + movdqa [r0], xmm0 + movdqa [r0+0x10], xmm0 + movdqa [r0+0x20], xmm0 + movdqa [r0+0x30], xmm0 + movdqa [r0+0x40], xmm0 + movdqa [r0+0x50], xmm0 + movdqa [r0+0x60], xmm0 + movdqa [r0+0x70], xmm0 + movdqa [r0+0x80], xmm0 + movdqa [r0+0x90], xmm0 + movdqa [r0+0xa0], xmm0 + movdqa [r0+0xb0], xmm0 + movdqa [r0+0xc0], xmm0 + movdqa [r0+0xd0], xmm0 + movdqa [r0+0xe0], xmm0 + movdqa [r0+0xf0], xmm0 + + DEINIT_X86_32_PIC + pop r4 + pop r3 + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/matrix_transpose.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/matrix_transpose.asm new file mode 100644 index 000000000..98fe000b9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/matrix_transpose.asm @@ -0,0 +1,395 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* ?Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* ?Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;*************************************************************************/ + +%include "asm_inc.asm" + +;in: m0, m1, m2, m3, m4, m5, m6, m7 +;out: m0, m3, m5, m2, m7, m1, m6, m4 +%macro TRANSPOSE_8x8B_MMX 10 + MMX_XSwap bw, %1, %2, %8 + MMX_XSwap bw, %3, %4, %2 + MMX_XSwap bw, %5, %6, %4 + movq %6, %9 + movq %10, %4 + MMX_XSwap bw, %7, %6, %4 + + MMX_XSwap wd, %1, %3, %6 + MMX_XSwap wd, %8, %2, %3 + MMX_XSwap wd, %5, %7, %2 + movq %7, %10 + movq %10, %3 + MMX_XSwap wd, %7, %4, %3 + + MMX_XSwap dq, %1, %5, %4 + MMX_XSwap dq, %6, %2, %5 + MMX_XSwap dq, %8, %7, %2 + movq %7, %10 + movq %10, %5 + MMX_XSwap dq, %7, %3, %5 + + movq %3, %10 +%endmacro + +;in: m0, m3, m5, m2, m7, m1, m6, m4 +%macro TRANSPOSE8x8_WRITE_MMX 2 ; dst, dst_stride + movq [%1], mm0 ; result of line 1, x8 bytes + movq [%1+%2], mm3 ; result of line 2 + lea %1, [%1+2*%2] + movq [%1], mm5 ; result of line 3 + movq [%1+%2], mm2 ; result of line 4 + lea %1, [%1+2*%2] + movq [%1], mm7 ; result of line 5 + movq [%1+%2], mm1 ; result of line 6 + lea %1, [%1+2*%2] + movq [%1], mm6 ; result of line 7 + movq [%1+%2], mm4 ; result of line 8 +%endmacro + +;in: m0, m3, m5, m2, m7, m1, m6, m4 +%macro TRANSPOSE8x8_WRITE_ALT_MMX 3 ; dst, dst_stride, reg32 + movq [%1], mm0 ; result of line 1, x8 bytes + movq [%1+%2], mm3 ; result of line 2 + lea %3, [%1+2*%2] + movq [%3], mm5 ; result of line 3 + movq [%3+%2], mm2 ; result of line 4 + lea %3, [%3+2*%2] + movq [%3], mm7 ; result of line 5 + movq [%3+%2], mm1 ; result of line 6 + lea %3, [%3+2*%2] + movq [%3], mm6 ; result of line 7 + movq [%3+%2], mm4 ; result of line 8 +%endmacro ; end of TRANSPOSE8x8_WRITE_ALT_MMX + +; for transpose 16x8 + +;in: m0, m1, m2, m3, m4, m5, m6, m7 +;out: m4, m2, m3, m7, m5, m1, m6, m0 +%macro TRANSPOSE_8x16B_SSE2 10 + SSE2_XSawp bw, %1, %2, %8 + SSE2_XSawp bw, %3, %4, %2 + SSE2_XSawp bw, %5, %6, %4 + movdqa %6, %9 + movdqa %10, %4 + SSE2_XSawp bw, %7, %6, %4 + + SSE2_XSawp wd, %1, %3, %6 + SSE2_XSawp wd, %8, %2, %3 + SSE2_XSawp wd, %5, %7, %2 + movdqa %7, %10 + movdqa %10, %3 + SSE2_XSawp wd, %7, %4, %3 + + SSE2_XSawp dq, %1, %5, %4 + SSE2_XSawp dq, %6, %2, %5 + SSE2_XSawp dq, %8, %7, %2 + movdqa %7, %10 + movdqa %10, %5 + SSE2_XSawp dq, %7, %3, %5 + + SSE2_XSawp qdq, %1, %8, %3 + SSE2_XSawp qdq, %4, %2, %8 + SSE2_XSawp qdq, %6, %7, %2 + movdqa %7, %10 + movdqa %10, %1 + SSE2_XSawp qdq, %7, %5, %1 + movdqa %5, %10 +%endmacro ; end of TRANSPOSE_8x16B_SSE2 + + +%macro TRANSPOSE8x16_WRITE_SSE2 2 ; dst, dst_stride + movq [%1], xmm4 ; result of line 1, x8 bytes + movq [%1+%2], xmm2 ; result of line 2 + lea %1, [%1+2*%2] + movq [%1], xmm3 ; result of line 3 + movq [%1+%2], xmm7 ; result of line 4 + + lea %1, [%1+2*%2] + movq [%1], xmm5 ; result of line 5 + movq [%1+%2], xmm1 ; result of line 6 + lea %1, [%1+2*%2] + movq [%1], xmm6 ; result of line 7 + movq [%1+%2], xmm0 ; result of line 8 + + lea %1, [%1+2*%2] + movhpd [%1], xmm4 ; result of line 9 + movhpd [%1+%2], xmm2 ; result of line 10 + lea %1, [%1+2*%2] + movhpd [%1], xmm3 ; result of line 11 + movhpd [%1+%2], xmm7 ; result of line 12 + + lea %1, [%1+2*%2] + movhpd [%1], xmm5 ; result of line 13 + movhpd [%1+%2], xmm1 ; result of line 14 + lea %1, [%1+2*%2] + movhpd [%1], xmm6 ; result of line 15 + movhpd [%1+%2], xmm0 ; result of line 16 +%endmacro ; end of TRANSPOSE_WRITE_RESULT_SSE2 + +%macro TRANSPOSE8x16_WRITE_ALT_SSE2 3 ; dst, dst_stride, reg32 + movq [%1], xmm4 ; result of line 1, x8 bytes + movq [%1+%2], xmm2 ; result of line 2 + lea %3, [%1+2*%2] + movq [%3], xmm3 ; result of line 3 + movq [%3+%2], xmm7 ; result of line 4 + + lea %3, [%3+2*%2] + movq [%3], xmm5 ; result of line 5 + movq [%3+%2], xmm1 ; result of line 6 + lea %3, [%3+2*%2] + movq [%3], xmm6 ; result of line 7 + movq [%3+%2], xmm0 ; result of line 8 + + lea %3, [%3+2*%2] + movhpd [%3], xmm4 ; result of line 9 + movhpd [%3+%2], xmm2 ; result of line 10 + lea %3, [%3+2*%2] + movhpd [%3], xmm3 ; result of line 11 + movhpd [%3+%2], xmm7 ; result of line 12 + + lea %3, [%3+2*%2] + movhpd [%3], xmm5 ; result of line 13 + movhpd [%3+%2], xmm1 ; result of line 14 + lea %3, [%3+2*%2] + movhpd [%3], xmm6 ; result of line 15 + movhpd [%3+%2], xmm0 ; result of line 16 +%endmacro ; end of TRANSPOSE8x16_WRITE_ALT_SSE2 + + +SECTION .text + +WELS_EXTERN TransposeMatrixBlock16x16_sse2 +; void TransposeMatrixBlock16x16_sse2( void *dst/*16x16*/, const int32_t dst_stride, void *src/*16x16*/, const int32_t src_stride ); + push r4 + push r5 + %assign push_num 2 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + + mov r4, r7 + and r4, 0Fh + sub r7, 10h + sub r7, r4 + lea r5, [r3+r3*2] + ; top 8x16 block + movdqa xmm0, [r2] + movdqa xmm1, [r2+r3] + movdqa xmm2, [r2+r3*2] + movdqa xmm3, [r2+r5] + lea r2, [r2+r3*4] + movdqa xmm4, [r2] + movdqa xmm5, [r2+r3] + movdqa xmm6, [r2+r3*2] + + ;in: m0, m1, m2, m3, m4, m5, m6, m7 + ;out: m4, m2, m3, m7, m5, m1, m6, m0 + TRANSPOSE_8x16B_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2+r5], [r7] + + TRANSPOSE8x16_WRITE_SSE2 r0, r1 + + ; bottom 8x16 block + lea r2, [r2+r3*4] + movdqa xmm0, [r2] + movdqa xmm1, [r2+r3] + movdqa xmm2, [r2+r3*2] + movdqa xmm3, [r2+r5] + lea r2, [r2+r3*4] + movdqa xmm4, [r2] + movdqa xmm5, [r2+r3] + movdqa xmm6, [r2+r3*2] + + ;in: m0, m1, m2, m3, m4, m5, m6, m7 + ;out: m4, m2, m3, m7, m5, m1, m6, m0 + TRANSPOSE_8x16B_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2+r5], [r7] + + mov r5, r1 + sal r5, 4 + sub r0, r5 + lea r0, [r0+r1*2+8] + TRANSPOSE8x16_WRITE_SSE2 r0, r1 + + add r7, r4 + add r7, 10h + POP_XMM + LOAD_4_PARA_POP + pop r5 + pop r4 + ret + +WELS_EXTERN TransposeMatrixBlocksx16_sse2 +; void TransposeMatrixBlocksx16_sse2( void *dst/*W16x16*/, const int32_t dst_stride, void *src/*16xW16*/, const int32_t src_stride, const int32_t num_blocks ); + push r5 + push r6 + %assign push_num 2 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + mov r5, r7 + and r5, 0Fh + sub r7, 10h + sub r7, r5 +TRANSPOSE_LOOP_SSE2: + ; explictly loading next loop data + lea r6, [r2+r3*8] + push r4 +%rep 8 + mov r4, [r6] + mov r4, [r6+r3] + lea r6, [r6+r3*2] +%endrep + pop r4 + ; top 8x16 block + movdqa xmm0, [r2] + movdqa xmm1, [r2+r3] + lea r2, [r2+r3*2] + movdqa xmm2, [r2] + movdqa xmm3, [r2+r3] + lea r2, [r2+r3*2] + movdqa xmm4, [r2] + movdqa xmm5, [r2+r3] + lea r2, [r2+r3*2] + movdqa xmm6, [r2] + + ;in: m0, m1, m2, m3, m4, m5, m6, m7 + ;out: m4, m2, m3, m7, m5, m1, m6, m0 + TRANSPOSE_8x16B_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2+r3], [r7] + TRANSPOSE8x16_WRITE_ALT_SSE2 r0, r1, r6 + lea r2, [r2+r3*2] + + ; bottom 8x16 block + movdqa xmm0, [r2] + movdqa xmm1, [r2+r3] + lea r2, [r2+r3*2] + movdqa xmm2, [r2] + movdqa xmm3, [r2+r3] + lea r2, [r2+r3*2] + movdqa xmm4, [r2] + movdqa xmm5, [r2+r3] + lea r2, [r2+r3*2] + movdqa xmm6, [r2] + + ;in: m0, m1, m2, m3, m4, m5, m6, m7 + ;out: m4, m2, m3, m7, m5, m1, m6, m0 + TRANSPOSE_8x16B_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r2+r3], [r7] + TRANSPOSE8x16_WRITE_ALT_SSE2 r0+8, r1, r6 + lea r2, [r2+r3*2] + lea r0, [r0+16] + dec r4 + jg near TRANSPOSE_LOOP_SSE2 + + add r7, r5 + add r7, 10h + POP_XMM + LOAD_5_PARA_POP + pop r6 + pop r5 + ret + +WELS_EXTERN TransposeMatrixBlock8x8_mmx +; void TransposeMatrixBlock8x8_mmx( void *dst/*8x8*/, const int32_t dst_stride, void *src/*8x8*/, const int32_t src_stride ); + %assign push_num 0 + LOAD_4_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + sub r7, 8 + + movq mm0, [r2] + movq mm1, [r2+r3] + lea r2, [r2+2*r3] + movq mm2, [r2] + movq mm3, [r2+r3] + lea r2, [r2+2*r3] + movq mm4, [r2] + movq mm5, [r2+r3] + lea r2, [r2+2*r3] + movq mm6, [r2] + + ;in: m0, m1, m2, m3, m4, m5, m6, m7 + ;out: m0, m3, m5, m2, m7, m1, m6, m4 + TRANSPOSE_8x8B_MMX mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, [r2+r3], [r7] + + TRANSPOSE8x8_WRITE_MMX r0, r1 + + emms + add r7, 8 + LOAD_4_PARA_POP + ret + +WELS_EXTERN TransposeMatrixBlocksx8_mmx +; void TransposeMatrixBlocksx8_mmx( void *dst/*8xW8*/, const int32_t dst_stride, void *src/*W8x8*/, const int32_t src_stride, const int32_t num_blocks ); + push r5 + push r6 + %assign push_num 2 + LOAD_5_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + sub r7, 8 + + lea r5, [r2+r3*8] + +TRANSPOSE_BLOCKS_X8_LOOP_MMX: + ; explictly loading next loop data +%rep 4 + mov r6, [r5] + mov r6, [r5+r3] + lea r5, [r5+r3*2] +%endrep + movq mm0, [r2] + movq mm1, [r2+r3] + lea r2, [r2+2*r3] + movq mm2, [r2] + movq mm3, [r2+r3] + lea r2, [r2+2*r3] + movq mm4, [r2] + movq mm5, [r2+r3] + lea r2, [r2+2*r3] + movq mm6, [r2] + + ;in: m0, m1, m2, m3, m4, m5, m6, m7 + ;out: m0, m3, m5, m2, m7, m1, m6, m4 + TRANSPOSE_8x8B_MMX mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, [r2+r3], [r7] + + TRANSPOSE8x8_WRITE_ALT_MMX r0, r1, r6 + lea r0, [r0+8] + lea r2, [r2+2*r3] + dec r4 + jg near TRANSPOSE_BLOCKS_X8_LOOP_MMX + + emms + add r7, 8 + LOAD_5_PARA_POP + pop r6 + pop r5 + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/memzero.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/memzero.asm new file mode 100644 index 000000000..a95c6497b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/memzero.asm @@ -0,0 +1,132 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* memzero.asm +;* +;* Abstract +;* +;* +;* History +;* 9/16/2009 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + +;*********************************************************************** +;void WelsPrefetchZero_mmx(int8_t const*_A); +;*********************************************************************** +WELS_EXTERN WelsPrefetchZero_mmx + %assign push_num 0 + LOAD_1_PARA + prefetchnta [r0] + ret + + +;*********************************************************************** +; void WelsSetMemZeroAligned64_sse2(void *dst, int32_t size) +;*********************************************************************** +WELS_EXTERN WelsSetMemZeroAligned64_sse2 + + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + neg r1 + + pxor xmm0, xmm0 +.memzeroa64_sse2_loops: + movdqa [r0], xmm0 + movdqa [r0+16], xmm0 + movdqa [r0+32], xmm0 + movdqa [r0+48], xmm0 + add r0, 0x40 + + add r1, 0x40 + jnz near .memzeroa64_sse2_loops + + ret + +;*********************************************************************** +; void WelsSetMemZeroSize64_mmx(void *dst, int32_t size) +;*********************************************************************** +WELS_EXTERN WelsSetMemZeroSize64_mmx + + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + neg r1 + + pxor mm0, mm0 +.memzero64_mmx_loops: + movq [r0], mm0 + movq [r0+8], mm0 + movq [r0+16], mm0 + movq [r0+24], mm0 + movq [r0+32], mm0 + movq [r0+40], mm0 + movq [r0+48], mm0 + movq [r0+56], mm0 + add r0, 0x40 + + add r1, 0x40 + jnz near .memzero64_mmx_loops + + WELSEMMS + ret + +;*********************************************************************** +; void WelsSetMemZeroSize8_mmx(void *dst, int32_t size) +;*********************************************************************** +WELS_EXTERN WelsSetMemZeroSize8_mmx + + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + neg r1 + pxor mm0, mm0 + +.memzero8_mmx_loops: + movq [r0], mm0 + add r0, 0x08 + + add r1, 0x08 + jnz near .memzero8_mmx_loops + + WELSEMMS + ret + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/quant.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/quant.asm new file mode 100644 index 000000000..21b56b539 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/quant.asm @@ -0,0 +1,507 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* quant.asm +;* +;* Abstract +;* sse2 quantize inter-block +;* +;* History +;* 7/6/2009 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + + +SECTION .text +;************************************************ +;NEW_QUANT +;************************************************ + +%macro SSE2_Quant8 5 + MOVDQ %1, %5 + pxor %2, %2 + pcmpgtw %2, %1 + pxor %1, %2 + psubw %1, %2 + paddusw %1, %3 + pmulhuw %1, %4 + pxor %1, %2 + psubw %1, %2 + MOVDQ %5, %1 +%endmacro + +%macro SSE2_QuantMax8 6 + MOVDQ %1, %5 + pxor %2, %2 + pcmpgtw %2, %1 + pxor %1, %2 + psubw %1, %2 + paddusw %1, %3 + pmulhuw %1, %4 + pmaxsw %6, %1 + pxor %1, %2 + psubw %1, %2 + MOVDQ %5, %1 +%endmacro + +%define pDct esp + 4 +%define ff esp + 8 +%define mf esp + 12 +%define max esp + 16 +;*********************************************************************** +; void WelsQuant4x4_sse2(int16_t *pDct, int16_t* ff, int16_t *mf); +;*********************************************************************** +WELS_EXTERN WelsQuant4x4_sse2 + %assign push_num 0 + LOAD_3_PARA + movdqa xmm2, [r1] + movdqa xmm3, [r2] + + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10] + + ret + +;*********************************************************************** +;void WelsQuant4x4Dc_sse2(int16_t *pDct, const int16_t ff, int16_t mf); +;*********************************************************************** +WELS_EXTERN WelsQuant4x4Dc_sse2 + %assign push_num 0 + LOAD_3_PARA + SIGN_EXTENSIONW r1, r1w + SIGN_EXTENSIONW r2, r2w + SSE2_Copy8Times xmm3, r2d + + SSE2_Copy8Times xmm2, r1d + + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10] + + ret + +;*********************************************************************** +; void WelsQuantFour4x4_sse2(int16_t *pDct, int16_t* ff, int16_t *mf); +;*********************************************************************** +WELS_EXTERN WelsQuantFour4x4_sse2 + %assign push_num 0 + LOAD_3_PARA + MOVDQ xmm2, [r1] + MOVDQ xmm3, [r2] + + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x20] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x30] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x40] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x50] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x60] + SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x70] + + ret + +;*********************************************************************** +; void WelsQuantFour4x4Max_sse2(int16_t *pDct, int32_t* f, int16_t *mf, int16_t *max); +;*********************************************************************** +WELS_EXTERN WelsQuantFour4x4Max_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + MOVDQ xmm2, [r1] + MOVDQ xmm3, [r2] + + pxor xmm4, xmm4 + pxor xmm5, xmm5 + pxor xmm6, xmm6 + pxor xmm7, xmm7 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 ], xmm4 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10], xmm4 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x20], xmm5 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x30], xmm5 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x40], xmm6 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x50], xmm6 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x60], xmm7 + SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x70], xmm7 + + SSE2_TransTwo4x4W xmm4, xmm5, xmm6, xmm7, xmm0 + pmaxsw xmm0, xmm4 + pmaxsw xmm0, xmm5 + pmaxsw xmm0, xmm7 + movdqa xmm1, xmm0 + punpckhqdq xmm0, xmm1 + pmaxsw xmm0, xmm1 + + movq [r3], xmm0 + POP_XMM + LOAD_4_PARA_POP + ret + +%macro MMX_Copy4Times 2 + movd %1, %2 + punpcklwd %1, %1 + punpckldq %1, %1 +%endmacro + +SECTION .text + +%macro MMX_Quant4 4 + pxor %2, %2 + pcmpgtw %2, %1 + pxor %1, %2 + psubw %1, %2 + paddusw %1, %3 + pmulhuw %1, %4 + pxor %1, %2 + psubw %1, %2 +%endmacro + +;*********************************************************************** +;int32_t WelsHadamardQuant2x2_mmx(int16_t *rs, const int16_t ff, int16_t mf, int16_t * pDct, int16_t * block); +;*********************************************************************** +WELS_EXTERN WelsHadamardQuant2x2_mmx + %assign push_num 0 + LOAD_5_PARA + SIGN_EXTENSIONW r1, r1w + SIGN_EXTENSIONW r2, r2w + movd mm0, [r0] + movd mm1, [r0 + 0x20] + punpcklwd mm0, mm1 + movd mm3, [r0 + 0x40] + movd mm1, [r0 + 0x60] + punpcklwd mm3, mm1 + + ;hdm_2x2, mm0 = dct0 dct1, mm3 = dct2 dct3 + movq mm5, mm3 + paddw mm3, mm0 + psubw mm0, mm5 + punpcklwd mm3, mm0 + movq mm1, mm3 + psrlq mm1, 32 + movq mm5, mm1 + paddw mm1, mm3 + psubw mm3, mm5 + punpcklwd mm1, mm3 + + ;quant_2x2_dc + MMX_Copy4Times mm3, r2d + MMX_Copy4Times mm2, r1d + MMX_Quant4 mm1, mm0, mm2, mm3 + + ; store dct_2x2 + movq [r3], mm1 + movq [r4], mm1 + + ; pNonZeroCount of dct_2x2 + pcmpeqb mm2, mm2 ; mm2 = FF + pxor mm3, mm3 + packsswb mm1, mm3 + pcmpeqb mm1, mm3 ; set FF if equal, 0 if not equal + psubsb mm1, mm2 ; set 0 if equal, 1 if not equal + psadbw mm1, mm3 ; + mov r1w, 0 + mov [r0], r1w + mov [r0 + 0x20], r1w + mov [r0 + 0x40], r1w + mov [r0 + 0x60], r1w + + + movd retrd, mm1 + + WELSEMMS + LOAD_5_PARA_POP + ret + +;*********************************************************************** +;int32_t WelsHadamardQuant2x2Skip_mmx(int16_t *pDct, int16_t ff, int16_t mf); +;*********************************************************************** +WELS_EXTERN WelsHadamardQuant2x2Skip_mmx + %assign push_num 0 + LOAD_3_PARA + SIGN_EXTENSIONW r1, r1w + SIGN_EXTENSIONW r2, r2w + movd mm0, [r0] + movd mm1, [r0 + 0x20] + punpcklwd mm0, mm1 + movd mm3, [r0 + 0x40] + movd mm1, [r0 + 0x60] + punpcklwd mm3, mm1 + + ;hdm_2x2, mm0 = dct0 dct1, mm3 = dct2 dct3 + movq mm5, mm3 + paddw mm3, mm0 + psubw mm0, mm5 + punpcklwd mm3, mm0 + movq mm1, mm3 + psrlq mm1, 32 + movq mm5, mm1 + paddw mm1, mm3 + psubw mm3, mm5 + punpcklwd mm1, mm3 + + ;quant_2x2_dc + MMX_Copy4Times mm3, r2d + MMX_Copy4Times mm2, r1d + MMX_Quant4 mm1, mm0, mm2, mm3 + + ; pNonZeroCount of dct_2x2 + pcmpeqb mm2, mm2 ; mm2 = FF + pxor mm3, mm3 + packsswb mm1, mm3 + pcmpeqb mm1, mm3 ; set FF if equal, 0 if not equal + psubsb mm1, mm2 ; set 0 if equal, 1 if not equal + psadbw mm1, mm3 ; + movd retrd, mm1 + + WELSEMMS + ret + + +%macro SSE2_DeQuant8 3 + MOVDQ %2, %1 + pmullw %2, %3 + MOVDQ %1, %2 +%endmacro + + +;*********************************************************************** +; void WelsDequant4x4_sse2(int16_t *pDct, const uint16_t* mf); +;*********************************************************************** +WELS_EXTERN WelsDequant4x4_sse2 + %assign push_num 0 + LOAD_2_PARA + + movdqa xmm1, [r1] + SSE2_DeQuant8 [r0 ], xmm0, xmm1 + SSE2_DeQuant8 [r0 + 0x10], xmm0, xmm1 + + ret + +;*********************************************************************** +;void WelsDequantFour4x4_sse2(int16_t *pDct, const uint16_t* mf); +;*********************************************************************** + +WELS_EXTERN WelsDequantFour4x4_sse2 + %assign push_num 0 + LOAD_2_PARA + + movdqa xmm1, [r1] + SSE2_DeQuant8 [r0 ], xmm0, xmm1 + SSE2_DeQuant8 [r0+0x10 ], xmm0, xmm1 + SSE2_DeQuant8 [r0+0x20 ], xmm0, xmm1 + SSE2_DeQuant8 [r0+0x30 ], xmm0, xmm1 + SSE2_DeQuant8 [r0+0x40 ], xmm0, xmm1 + SSE2_DeQuant8 [r0+0x50 ], xmm0, xmm1 + SSE2_DeQuant8 [r0+0x60 ], xmm0, xmm1 + SSE2_DeQuant8 [r0+0x70 ], xmm0, xmm1 + + ret + +;*********************************************************************** +;void WelsDequantIHadamard4x4_sse2(int16_t *rs, const uint16_t mf); +;*********************************************************************** +WELS_EXTERN WelsDequantIHadamard4x4_sse2 + %assign push_num 0 + LOAD_2_PARA + %ifndef X86_32 + movzx r1, r1w + %endif + + ; WelsDequantLumaDc4x4 + SSE2_Copy8Times xmm1, r1d + ;psrlw xmm1, 2 ; for the (>>2) in ihdm + MOVDQ xmm0, [r0] + MOVDQ xmm2, [r0+0x10] + pmullw xmm0, xmm1 + pmullw xmm2, xmm1 + + ; ihdm_4x4 + movdqa xmm1, xmm0 + psrldq xmm1, 8 + movdqa xmm3, xmm2 + psrldq xmm3, 8 + + SSE2_SumSub xmm0, xmm3, xmm5 ; xmm0 = xmm0 - xmm3, xmm3 = xmm0 + xmm3 + SSE2_SumSub xmm1, xmm2, xmm5 ; xmm1 = xmm1 - xmm2, xmm2 = xmm1 + xmm2 + SSE2_SumSub xmm3, xmm2, xmm5 ; xmm3 = xmm3 - xmm2, xmm2 = xmm3 + xmm2 + SSE2_SumSub xmm0, xmm1, xmm5 ; xmm0 = xmm0 - xmm1, xmm1 = xmm0 + xmm1 + + SSE2_TransTwo4x4W xmm2, xmm1, xmm3, xmm0, xmm4 + SSE2_SumSub xmm2, xmm4, xmm5 + SSE2_SumSub xmm1, xmm0, xmm5 + SSE2_SumSub xmm4, xmm0, xmm5 + SSE2_SumSub xmm2, xmm1, xmm5 + SSE2_TransTwo4x4W xmm0, xmm1, xmm4, xmm2, xmm3 + + punpcklqdq xmm0, xmm1 + MOVDQ [r0], xmm0 + + punpcklqdq xmm2, xmm3 + MOVDQ [r0+16], xmm2 + ret + + +%ifdef HAVE_AVX2 +; data=%1 abs_out=%2 ff=%3 mf=%4 7FFFh=%5 +%macro AVX2_Quant 5 + vpabsw %2, %1 + vpor %1, %1, %5 ; ensure non-zero before vpsignw + vpaddusw %2, %2, %3 + vpmulhuw %2, %2, %4 + vpsignw %1, %2, %1 +%endmacro + + +;*********************************************************************** +; void WelsQuant4x4_avx2(int16_t *pDct, int16_t* ff, int16_t *mf); +;*********************************************************************** + +WELS_EXTERN WelsQuant4x4_avx2 + %assign push_num 0 + LOAD_3_PARA + PUSH_XMM 5 + vbroadcasti128 ymm0, [r1] + vbroadcasti128 ymm1, [r2] + WELS_DW32767_VEX ymm2 + vmovdqu ymm3, [r0] + AVX2_Quant ymm3, ymm4, ymm0, ymm1, ymm2 + vmovdqu [r0], ymm3 + vzeroupper + POP_XMM + ret + + +;*********************************************************************** +;void WelsQuant4x4Dc_avx2(int16_t *pDct, int16_t ff, int16_t mf); +;*********************************************************************** + +WELS_EXTERN WelsQuant4x4Dc_avx2 + %assign push_num 0 + LOAD_1_PARA + PUSH_XMM 5 +%ifidni r1, arg2 + vmovd xmm0, arg2d + vpbroadcastw ymm0, xmm0 +%else + vpbroadcastw ymm0, arg2 +%endif +%ifidni r2, arg3 + vmovd xmm1, arg3d + vpbroadcastw ymm1, xmm1 +%else + vpbroadcastw ymm1, arg3 +%endif + WELS_DW32767_VEX ymm2 + vmovdqu ymm3, [r0] + AVX2_Quant ymm3, ymm4, ymm0, ymm1, ymm2 + vmovdqu [r0], ymm3 + vzeroupper + POP_XMM + ret + + +;*********************************************************************** +; void WelsQuantFour4x4_avx2(int16_t *pDct, int16_t* ff, int16_t *mf); +;*********************************************************************** + +WELS_EXTERN WelsQuantFour4x4_avx2 + %assign push_num 0 + LOAD_3_PARA + PUSH_XMM 6 + vbroadcasti128 ymm0, [r1] + vbroadcasti128 ymm1, [r2] + WELS_DW32767_VEX ymm4 + vmovdqu ymm3, [r0 + 0x00] + vmovdqu ymm5, [r0 + 0x20] + AVX2_Quant ymm3, ymm2, ymm0, ymm1, ymm4 + vmovdqu [r0 + 0x00], ymm3 + AVX2_Quant ymm5, ymm2, ymm0, ymm1, ymm4 + vmovdqu [r0 + 0x20], ymm5 + vmovdqu ymm3, [r0 + 0x40] + vmovdqu ymm5, [r0 + 0x60] + AVX2_Quant ymm3, ymm2, ymm0, ymm1, ymm4 + vmovdqu [r0 + 0x40], ymm3 + AVX2_Quant ymm5, ymm2, ymm0, ymm1, ymm4 + vmovdqu [r0 + 0x60], ymm5 + vzeroupper + POP_XMM + ret + + +;*********************************************************************** +; void WelsQuantFour4x4Max_avx2(int16_t *pDct, int32_t* ff, int16_t *mf, int16_t *max); +;*********************************************************************** + +WELS_EXTERN WelsQuantFour4x4Max_avx2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 7 + vbroadcasti128 ymm0, [r1] + vbroadcasti128 ymm1, [r2] + WELS_DW32767_VEX ymm6 + vmovdqu ymm4, [r0 + 0x00] + vmovdqu ymm5, [r0 + 0x20] + AVX2_Quant ymm4, ymm2, ymm0, ymm1, ymm6 + vmovdqu [r0 + 0x00], ymm4 + AVX2_Quant ymm5, ymm3, ymm0, ymm1, ymm6 + vmovdqu [r0 + 0x20], ymm5 + vperm2i128 ymm4, ymm2, ymm3, 00100000b + vperm2i128 ymm3, ymm2, ymm3, 00110001b + vpmaxsw ymm2, ymm4, ymm3 + vmovdqu ymm4, [r0 + 0x40] + vmovdqu ymm5, [r0 + 0x60] + AVX2_Quant ymm4, ymm3, ymm0, ymm1, ymm6 + vmovdqu [r0 + 0x40], ymm4 + AVX2_Quant ymm5, ymm4, ymm0, ymm1, ymm6 + vmovdqu [r0 + 0x60], ymm5 + vperm2i128 ymm5, ymm3, ymm4, 00100000b + vperm2i128 ymm4, ymm3, ymm4, 00110001b + vpmaxsw ymm3, ymm5, ymm4 + vpxor ymm2, ymm2, ymm6 ; flip bits so as to enable use of vphminposuw to find max value. + vpxor ymm3, ymm3, ymm6 ; flip bits so as to enable use of vphminposuw to find max value. + vextracti128 xmm4, ymm2, 1 + vextracti128 xmm5, ymm3, 1 + vphminposuw xmm2, xmm2 + vphminposuw xmm3, xmm3 + vphminposuw xmm4, xmm4 + vphminposuw xmm5, xmm5 + vpunpcklwd xmm2, xmm2, xmm4 + vpunpcklwd xmm3, xmm3, xmm5 + vpunpckldq xmm2, xmm2, xmm3 + vpxor xmm2, xmm2, xmm6 ; restore non-flipped values. + vmovq [r3], xmm2 ; store max values. + vzeroupper + POP_XMM + LOAD_4_PARA_POP + ret +%endif + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/sample_sc.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/sample_sc.asm new file mode 100644 index 000000000..103fed717 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/sample_sc.asm @@ -0,0 +1,1829 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;*********************************************************************** +; Local Data (Read Only) +;*********************************************************************** +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +ALIGN 16 +mv_x_inc_x4 dw 0x10, 0x10, 0x10, 0x10 +mv_y_inc_x4 dw 0x04, 0x04, 0x04, 0x04 +mx_x_offset_x4 dw 0x00, 0x04, 0x08, 0x0C + +SECTION .text +%ifdef X86_32 +;********************************************************************************************************************** +;void SumOf8x8BlockOfFrame_sse2(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;********************************************************************************************************************* +WELS_EXTERN SumOf8x8BlockOfFrame_sse2 +%define pushsize 16 +%define localsize 4 +%define ref esp + pushsize + localsize + 4 +%define sum_ref esp + pushsize + localsize + 20 +%define times_of_sum esp + pushsize + localsize + 24 +%define width esp + pushsize + localsize + 8 +%define height esp + pushsize + localsize + 12 +%define linesize esp + pushsize + localsize + 16 +%define tmp_width esp + 0 + push ebx + push ebp + push esi + push edi + sub esp, localsize + + pxor xmm0, xmm0 + mov esi, [ref] + mov edi, [sum_ref] + mov edx, [times_of_sum] + mov ebx, [linesize] + mov eax, [width] + lea ecx, [ebx+ebx*2] ; 3*linesize + + mov [tmp_width], eax + lea ebp, [esi+ebx*4] +FIRST_ROW: + movq xmm1, [esi] + movq xmm2, [esi+ebx] + movq xmm3, [esi+ebx*2] + movq xmm4, [esi+ecx] + + shufps xmm1, xmm2, 01000100b + shufps xmm3, xmm4, 01000100b + psadbw xmm1, xmm0 + psadbw xmm3, xmm0 + paddd xmm1, xmm3 + + movq xmm2, [ebp] + movq xmm3, [ebp+ebx] + movq xmm4, [ebp+ebx*2] + movq xmm5, [ebp+ecx] + + shufps xmm2, xmm3, 01000100b + shufps xmm4, xmm5, 01000100b + psadbw xmm2, xmm0 + psadbw xmm4, xmm0 + paddd xmm2, xmm4 + + paddd xmm1, xmm2 + pshufd xmm2, xmm1, 00001110b + paddd xmm1, xmm2 + movd eax, xmm1 + mov [edi], ax + inc dword [edx+eax*4] + + inc esi + inc ebp + add edi, 2 + + dec dword [tmp_width] + jg FIRST_ROW + + mov esi, [ref] + mov edi, [sum_ref] + mov ebp, [width] + dec dword [height] +HEIGHT_LOOP: + mov [tmp_width], ebp +WIDTH_LOOP: + movq xmm1, [esi+ebx*8] + movq xmm2, [esi] + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psubd xmm1, xmm2 + movd eax, xmm1 + mov cx, [edi] + add eax, ecx + + mov [edi+ebp*2], ax + inc dword [edx+eax*4] + + inc esi + add edi, 2 + + dec dword [tmp_width] + jg WIDTH_LOOP + + add esi, ebx + sub esi, ebp + + dec dword [height] + jg HEIGHT_LOOP + + add esp, localsize + pop edi + pop esi + pop ebp + pop ebx +%undef pushsize +%undef localsize +%undef ref +%undef sum_ref +%undef times_of_sum +%undef width +%undef height +%undef linesize +%undef tmp_width + ret + + +%macro COUNT_SUM 3 +%define xmm_reg %1 +%define tmp_reg %2 + movd tmp_reg, xmm_reg + inc dword [edx+tmp_reg*4] +%if %3 == 1 + psrldq xmm_reg, 4 +%endif +%endmacro + + +;----------------------------------------------------------------------------- +; requires: width % 8 == 0 && height > 1 +;----------------------------------------------------------------------------- +;void SumOf8x8BlockOfFrame_sse4(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;----------------------------------------------------------------------------- +; read extra (16 - (width % 8) ) mod 16 bytes of every line +; write extra (16 - (width % 8)*2 ) mod 16 bytes in the end of sum_ref +WELS_EXTERN SumOf8x8BlockOfFrame_sse4 +%define pushsize 16 +%define localsize 4 +%define ref esp + pushsize + localsize + 4 +%define sum_ref esp + pushsize + localsize + 20 +%define times_of_sum esp + pushsize + localsize + 24 +%define width esp + pushsize + localsize + 8 +%define height esp + pushsize + localsize + 12 +%define linesize esp + pushsize + localsize + 16 +%define tmp_width esp + 0 + push ebx + push ebp + push esi + push edi + sub esp, localsize + + pxor xmm0, xmm0 + mov esi, [ref] + mov edi, [sum_ref] + mov edx, [times_of_sum] + mov ebx, [linesize] + mov eax, [width] + lea ecx, [ebx+ebx*2] ; 3*linesize + + mov [tmp_width], eax + lea ebp, [esi+ebx*4] +FIRST_ROW_SSE4: + movdqu xmm1, [esi] + movdqu xmm3, [esi+ebx] + movdqu xmm5, [esi+ebx*2] + movdqu xmm7, [esi+ecx] + + movdqa xmm2, xmm1 + mpsadbw xmm1, xmm0, 000b + mpsadbw xmm2, xmm0, 100b + paddw xmm1, xmm2 ; 8 sums of line1 + + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 000b + mpsadbw xmm4, xmm0, 100b + paddw xmm3, xmm4 ; 8 sums of line2 + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 000b + mpsadbw xmm2, xmm0, 100b + paddw xmm5, xmm2 ; 8 sums of line3 + + movdqa xmm4, xmm7 + mpsadbw xmm7, xmm0, 000b + mpsadbw xmm4, xmm0, 100b + paddw xmm7, xmm4 ; 8 sums of line4 + + paddw xmm1, xmm3 + paddw xmm5, xmm7 + paddw xmm1, xmm5 ; sum the upper 4 lines first + + movdqu xmm2, [ebp] + movdqu xmm3, [ebp+ebx] + movdqu xmm4, [ebp+ebx*2] + movdqu xmm5, [ebp+ecx] + + movdqa xmm6, xmm2 + mpsadbw xmm2, xmm0, 000b + mpsadbw xmm6, xmm0, 100b + paddw xmm2, xmm6 + + movdqa xmm7, xmm3 + mpsadbw xmm3, xmm0, 000b + mpsadbw xmm7, xmm0, 100b + paddw xmm3, xmm7 + + movdqa xmm6, xmm4 + mpsadbw xmm4, xmm0, 000b + mpsadbw xmm6, xmm0, 100b + paddw xmm4, xmm6 + + movdqa xmm7, xmm5 + mpsadbw xmm5, xmm0, 000b + mpsadbw xmm7, xmm0, 100b + paddw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm1, xmm2 + paddw xmm1, xmm4 ; sum of lines 1- 8 + + movdqu [edi], xmm1 + + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm0 + punpckhwd xmm2, xmm0 + + COUNT_SUM xmm1, eax, 1 + COUNT_SUM xmm1, eax, 1 + COUNT_SUM xmm1, eax, 1 + COUNT_SUM xmm1, eax, 0 + COUNT_SUM xmm2, eax, 1 + COUNT_SUM xmm2, eax, 1 + COUNT_SUM xmm2, eax, 1 + COUNT_SUM xmm2, eax, 0 + + lea esi, [esi+8] + lea ebp, [ebp+8] + lea edi, [edi+16] ; element size is 2 + + sub dword [tmp_width], 8 + jg near FIRST_ROW_SSE4 + + mov esi, [ref] + mov edi, [sum_ref] + mov ebp, [width] + dec dword [height] +HEIGHT_LOOP_SSE4: + mov ecx, ebp +WIDTH_LOOP_SSE4: + movdqu xmm1, [esi+ebx*8] + movdqu xmm2, [esi] + movdqu xmm7, [edi] + + movdqa xmm3, xmm1 + mpsadbw xmm1, xmm0, 000b + mpsadbw xmm3, xmm0, 100b + paddw xmm1, xmm3 + + movdqa xmm4, xmm2 + mpsadbw xmm2, xmm0, 000b + mpsadbw xmm4, xmm0, 100b + paddw xmm2, xmm4 + + paddw xmm7, xmm1 + psubw xmm7, xmm2 + movdqu [edi+ebp*2], xmm7 + + movdqa xmm6, xmm7 + punpcklwd xmm7, xmm0 + punpckhwd xmm6, xmm0 + + COUNT_SUM xmm7, eax, 1 + COUNT_SUM xmm7, eax, 1 + COUNT_SUM xmm7, eax, 1 + COUNT_SUM xmm7, eax, 0 + COUNT_SUM xmm6, eax, 1 + COUNT_SUM xmm6, eax, 1 + COUNT_SUM xmm6, eax, 1 + COUNT_SUM xmm6, eax, 0 + + lea esi, [esi+8] + lea edi, [edi+16] + + sub ecx, 8 + jg near WIDTH_LOOP_SSE4 + + lea esi, [esi+ebx] + sub esi, ebp + + dec dword [height] + jg near HEIGHT_LOOP_SSE4 + + add esp, localsize + pop edi + pop esi + pop ebp + pop ebx +%undef pushsize +%undef localsize +%undef ref +%undef sum_ref +%undef times_of_sum +%undef width +%undef height +%undef linesize +%undef tmp_width + ret + + +;**************************************************************************************************************************************************** +;void SumOf16x16BlockOfFrame_sse2(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;**************************************************************************************************************************************************** +WELS_EXTERN SumOf16x16BlockOfFrame_sse2 +%define pushsize 16 +%define localsize 4 +%define ref esp + pushsize + localsize + 4 +%define sum_ref esp + pushsize + localsize + 20 +%define times_of_sum esp + pushsize + localsize + 24 +%define width esp + pushsize + localsize + 8 +%define height esp + pushsize + localsize + 12 +%define linesize esp + pushsize + localsize + 16 +%define tmp_width esp + push ebx + push ebp + push esi + push edi + sub esp, localsize + + pxor xmm0, xmm0 + mov esi, [ref] + mov edi, [sum_ref] + mov edx, [times_of_sum] + mov ebx, [linesize] + mov eax, [width] + + lea ecx, [ebx+ebx*2] + mov [tmp_width], eax +FIRST_ROW_X16H: + movdqu xmm1, [esi] + movdqu xmm2, [esi+ebx] + movdqu xmm3, [esi+ebx*2] + movdqu xmm4, [esi+ecx] + + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + paddw xmm1, xmm2 + paddw xmm3, xmm4 + paddw xmm1, xmm3 + + lea ebp, [esi+ebx*4] + movdqu xmm2, [ebp] + movdqu xmm3, [ebp+ebx] + movdqu xmm4, [ebp+ebx*2] + movdqu xmm5, [ebp+ecx] + + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + + lea ebp, [ebp+ebx*4] + movdqu xmm2, [ebp] + movdqu xmm3, [ebp+ebx] + movdqu xmm4, [ebp+ebx*2] + movdqu xmm5, [ebp+ecx] + + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + + lea ebp, [ebp+ebx*4] + movdqu xmm2, [ebp] + movdqu xmm3, [ebp+ebx] + movdqu xmm4, [ebp+ebx*2] + movdqu xmm5, [ebp+ecx] + + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + movdqa xmm2, xmm1 + punpckhwd xmm2, xmm0 + paddw xmm1, xmm2 + movd eax, xmm1 + mov [edi], ax + inc dword [edx+eax*4] + + inc esi + lea edi, [edi+2] + + dec dword [tmp_width] + jg near FIRST_ROW_X16H + + mov esi, [ref] + mov edi, [sum_ref] + mov ebp, [width] + dec dword [height] + + mov ecx, ebx + sal ecx, 4 ; succeeded 16th line +HEIGHT_LOOP_X16: + mov [tmp_width], ebp +WIDTH_LOOP_X16: + movdqu xmm1, [esi+ecx] + movdqu xmm2, [esi] + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psubw xmm1, xmm2 + movdqa xmm2, xmm1 + punpckhwd xmm2, xmm0 + paddw xmm1, xmm2 + movd eax, xmm1 + add ax, word [edi] + mov [edi+ebp*2], ax + inc dword [edx+eax*4] + + inc esi + add edi, 2 + + dec dword [tmp_width] + jg near WIDTH_LOOP_X16 + + add esi, ebx + sub esi, ebp + + dec dword [height] + jg near HEIGHT_LOOP_X16 + + add esp, localsize + pop edi + pop esi + pop ebp + pop ebx +%undef pushsize +%undef localsize +%undef ref +%undef sum_ref +%undef times_of_sum +%undef width +%undef height +%undef linesize +%undef tmp_width + ret + +; requires: width % 16 == 0 && height > 1 +;----------------------------------------------------------------------------------------------------------------------------- +;void SumOf16x16BlockOfFrame_sse4(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;----------------------------------------------------------------------------------------------------------------------------- +; try 8 mv via offset +%macro SUM_LINE_X16_SSE41 5 ; ref, dst0, dst1, tmp0, tmp1 + movdqu %2, [%1] + movdqu %3, [%1+8h] + movdqa %4, %2 + movdqa %5, %3 + + mpsadbw %2, xmm0, 0 ; 000 B + mpsadbw %4, xmm0, 5 ; 101 B + mpsadbw %3, xmm0, 2 ; 010 B + mpsadbw %5, xmm0, 7 ; 111 B + paddw %2, %4 + paddw %3, %5 + paddw %2, %3 ; accumulate cost +%endmacro ; end of SAD_16x16_LINE_SSE41 + +WELS_EXTERN SumOf16x16BlockOfFrame_sse4 +%define pushsize 16 +%define localsize 4 +%define ref esp + pushsize + localsize + 4 +%define sum_ref esp + pushsize + localsize + 20 +%define times_of_sum esp + pushsize + localsize + 24 +%define width esp + pushsize + localsize + 8 +%define height esp + pushsize + localsize + 12 +%define linesize esp + pushsize + localsize + 16 +%define tmp_width esp + push ebx + push ebp + push esi + push edi + sub esp, localsize + + pxor xmm0, xmm0 + mov esi, [ref] + mov edi, [sum_ref] + mov edx, [times_of_sum] + mov ebx, [linesize] + mov eax, [width] + + lea ecx, [ebx+ebx*2] + mov [tmp_width], eax +FIRST_ROW_X16_SSE4: + SUM_LINE_X16_SSE41 esi, xmm1, xmm2, xmm3, xmm4 + SUM_LINE_X16_SSE41 esi+ebx, xmm2, xmm3, xmm4, xmm5 + SUM_LINE_X16_SSE41 esi+ebx*2, xmm3, xmm4, xmm5, xmm6 + SUM_LINE_X16_SSE41 esi+ecx, xmm4, xmm5, xmm6, xmm7 + paddw xmm1, xmm2 + paddw xmm3, xmm4 + paddw xmm1, xmm3 + + lea ebp, [esi+ebx*4] + SUM_LINE_X16_SSE41 ebp, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ebx, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ebx*2, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ecx, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + + lea ebp, [ebp+ebx*4] + SUM_LINE_X16_SSE41 ebp, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ebx, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ebx*2, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ecx, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + + lea ebp, [ebp+ebx*4] + SUM_LINE_X16_SSE41 ebp, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ebx, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ebx*2, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 ebp+ecx, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + + movdqa [edi], xmm1 + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm0 + punpckhwd xmm2, xmm0 + + COUNT_SUM xmm1, eax, 1 + COUNT_SUM xmm1, eax, 1 + COUNT_SUM xmm1, eax, 1 + COUNT_SUM xmm1, eax, 0 + COUNT_SUM xmm2, eax, 1 + COUNT_SUM xmm2, eax, 1 + COUNT_SUM xmm2, eax, 1 + COUNT_SUM xmm2, eax, 0 + + lea esi, [esi+8] + lea edi, [edi+16] ; element size is 2 + + sub dword [tmp_width], 8 + jg near FIRST_ROW_X16_SSE4 + + mov esi, [ref] + mov edi, [sum_ref] + mov ebp, [width] + dec dword [height] + + mov ecx, ebx + sal ecx, 4 ; succeeded 16th line + +HEIGHT_LOOP_X16_SSE4: + mov [tmp_width], ebp +WIDTH_LOOP_X16_SSE4: + movdqa xmm7, [edi] + SUM_LINE_X16_SSE41 esi+ecx, xmm1, xmm2, xmm3, xmm4 + SUM_LINE_X16_SSE41 esi, xmm2, xmm3, xmm4, xmm5 + + paddw xmm7, xmm1 + psubw xmm7, xmm2 + movdqa [edi+ebp*2], xmm7 + + movdqa xmm6, xmm7 + punpcklwd xmm7, xmm0 + punpckhwd xmm6, xmm0 + + COUNT_SUM xmm7, eax, 1 + COUNT_SUM xmm7, eax, 1 + COUNT_SUM xmm7, eax, 1 + COUNT_SUM xmm7, eax, 0 + COUNT_SUM xmm6, eax, 1 + COUNT_SUM xmm6, eax, 1 + COUNT_SUM xmm6, eax, 1 + COUNT_SUM xmm6, eax, 0 + + lea esi, [esi+8] + lea edi, [edi+16] + + sub dword [tmp_width], 8 + jg near WIDTH_LOOP_X16_SSE4 + + add esi, ebx + sub esi, ebp + + dec dword [height] + jg near HEIGHT_LOOP_X16_SSE4 + + add esp, localsize + pop edi + pop esi + pop ebp + pop ebx +%undef pushsize +%undef localsize +%undef ref +%undef sum_ref +%undef times_of_sum +%undef width +%undef height +%undef linesize +%undef tmp_width + ret + + +;----------------------------------------------------------------------------------------------------------------------------- +; void FillQpelLocationByFeatureValue_sse2(uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList) +;----------------------------------------------------------------------------------------------------------------------------- +WELS_EXTERN FillQpelLocationByFeatureValue_sse2 + push esi + push edi + push ebx + push ebp + + %define _ps 16 ; push size + %define _ls 4 ; local size + %define sum_ref esp+_ps+_ls+4 + %define pos_list esp+_ps+_ls+16 + %define width esp+_ps+_ls+8 + %define height esp+_ps+_ls+12 + %define i_height esp + sub esp, _ls + + mov esi, [sum_ref] + mov edi, [pos_list] + mov ebp, [width] + mov ebx, [height] + mov [i_height], ebx + + %assign push_num 5 + INIT_X86_32_PIC_NOPRESERVE ecx + movq xmm7, [pic(mv_x_inc_x4)] ; x_qpel inc + movq xmm6, [pic(mv_y_inc_x4)] ; y_qpel inc + movq xmm5, [pic(mx_x_offset_x4)] ; x_qpel vector + DEINIT_X86_32_PIC + pxor xmm4, xmm4 + pxor xmm3, xmm3 ; y_qpel vector +HASH_HEIGHT_LOOP_SSE2: + movdqa xmm2, xmm5 ; x_qpel vector + mov ecx, ebp +HASH_WIDTH_LOOP_SSE2: + movq xmm0, [esi] ; load x8 sum + punpcklwd xmm0, xmm4 + movdqa xmm1, xmm2 + punpcklwd xmm1, xmm3 +%rep 3 + movd edx, xmm0 + lea ebx, [edi+edx*4] + mov eax, [ebx] + movd [eax], xmm1 + mov edx, [eax+4] ; explictly load eax+4 due cache miss from vtune observation + lea eax, [eax+4] + mov [ebx], eax + psrldq xmm1, 4 + psrldq xmm0, 4 +%endrep + movd edx, xmm0 + lea ebx, [edi+edx*4] + mov eax, [ebx] + movd [eax], xmm1 + mov edx, [eax+4] ; explictly load eax+4 due cache miss from vtune observation + lea eax, [eax+4] + mov [ebx], eax + + paddw xmm2, xmm7 + lea esi, [esi+8] + sub ecx, 4 + jnz near HASH_WIDTH_LOOP_SSE2 + paddw xmm3, xmm6 + dec dword [i_height] + jnz near HASH_HEIGHT_LOOP_SSE2 + + add esp, _ls + %undef _ps + %undef _ls + %undef sum_ref + %undef pos_list + %undef width + %undef height + %undef i_height + pop ebp + pop ebx + pop edi + pop esi + ret + +;--------------------------------------------------------------------------------------------------------------------------------------------------- +; void InitializeHashforFeature_sse2( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, +; uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList ) +;--------------------------------------------------------------------------------------------------------------------------------------------------- +WELS_EXTERN InitializeHashforFeature_sse2 + push ebx + push esi + push edi + push ebp + %define _ps 16 ; push size + mov edi, [esp+_ps+16] ; pPositionOfSum + mov ebp, [esp+_ps+20] ; sum_idx_list + mov esi, [esp+_ps+4] ; pTimesOfSum + mov ebx, [esp+_ps+8] ; pBuf + mov edx, [esp+_ps+12] ; list_sz + sar edx, 2 + mov ecx, 0 + pxor xmm7, xmm7 +hash_assign_loop_x4_sse2: + movdqa xmm0, [esi+ecx] + pslld xmm0, 2 + + movdqa xmm1, xmm0 + pcmpeqd xmm1, xmm7 + movmskps eax, xmm1 + cmp eax, 0x0f + je near hash_assign_with_copy_sse2 + +%assign x 0 +%rep 4 + lea eax, [edi+ecx+x] + mov [eax], ebx + lea eax, [ebp+ecx+x] + mov [eax], ebx + movd eax, xmm0 + add ebx, eax + psrldq xmm0, 4 +%assign x x+4 +%endrep + jmp near assign_next_sse2 + +hash_assign_with_copy_sse2: + movd xmm1, ebx + pshufd xmm2, xmm1, 0 + movdqa [edi+ecx], xmm2 + movdqa [ebp+ecx], xmm2 + +assign_next_sse2: + add ecx, 16 + dec edx + jnz near hash_assign_loop_x4_sse2 + + mov edx, [esp+_ps+12] ; list_sz + and edx, 3 + jz near hash_assign_no_rem_sse2 +hash_assign_loop_x4_rem_sse2: + lea eax, [edi+ecx] + mov [eax], ebx + lea eax, [ebp+ecx] + mov [eax], ebx + mov eax, [esi+ecx] + sal eax, 2 + add ebx, eax + add ecx, 4 + dec edx + jnz near hash_assign_loop_x4_rem_sse2 + +hash_assign_no_rem_sse2: + %undef _ps + pop ebp + pop edi + pop esi + pop ebx + ret +%else + +;********************************************************************************************************************** +;void SumOf8x8BlockOfFrame_sse2(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;********************************************************************************************************************* +WELS_EXTERN SumOf8x8BlockOfFrame_sse2 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 6 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + push r12 + push r13 + push r0 + push r2 + push r4 + + pxor xmm0, xmm0 + lea r6, [r3+r3*2] + + mov r12, r1 ;r12:tmp_width + lea r13, [r0+r3*4] ;rbp:r13 +FIRST_ROW: + movq xmm1, [r0] + movq xmm2, [r0+r3] + movq xmm3, [r0+r3*2] + movq xmm4, [r0+r6] + + shufps xmm1, xmm2, 01000100b + shufps xmm3, xmm4, 01000100b + psadbw xmm1, xmm0 + psadbw xmm3, xmm0 + paddd xmm1, xmm3 + + movq xmm2, [r13] + movq xmm3, [r13+r3] + movq xmm4, [r13+r3*2] + movq xmm5, [r13+r6] + + shufps xmm2, xmm3, 01000100b + shufps xmm4, xmm5, 01000100b + psadbw xmm2, xmm0 + psadbw xmm4, xmm0 + paddd xmm2, xmm4 + + paddd xmm1, xmm2 + pshufd xmm2, xmm1, 00001110b + paddd xmm1, xmm2 + movd r2d, xmm1 + mov [r4], r2w + inc dword [r5+r2*4] + + inc r0 + inc r13 + add r4, 2 + + dec r12 + jg FIRST_ROW + + pop r4 + pop r2 + pop r0 + mov r13, r2 + dec r13 +HEIGHT_LOOP: + mov r12, r1 +WIDTH_LOOP: + movq xmm1, [r0+r3*8] + movq xmm2, [r0] + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psubd xmm1, xmm2 + movd r2d, xmm1 + mov r6w, [r4] + add r2d, r6d + mov [r4+r1*2], r2w + inc dword [r5+r2*4] + + inc r0 + add r4, 2 + + dec r12 + jg WIDTH_LOOP + + add r0, r3 + sub r0, r1 + + + dec r13 + jg HEIGHT_LOOP + + pop r13 + pop r12 + POP_XMM + LOAD_6_PARA_POP + ret + + +%macro COUNT_SUM 4 +%define xmm_reg %1 +%define tmp_dreg %2 +%define tmp_qreg %3 + movd tmp_dreg, xmm_reg + inc dword [r5+tmp_qreg*4] +%if %4 == 1 + psrldq xmm_reg, 4 +%endif +%endmacro + + +;----------------------------------------------------------------------------- +; requires: width % 8 == 0 && height > 1 +;----------------------------------------------------------------------------- +;void SumOf8x8BlockOfFrame_sse4(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;----------------------------------------------------------------------------- +; read extra (16 - (width % 8) ) mod 16 bytes of every line +; write extra (16 - (width % 8)*2 ) mod 16 bytes in the end of sum_ref +WELS_EXTERN SumOf8x8BlockOfFrame_sse4 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + push r12 + push r13 + push r0 + push r2 + push r4 + + pxor xmm0, xmm0 + lea r6, [r3+r3*2] + + mov r12, r1 ;r12:tmp_width + lea r13, [r0+r3*4] ;rbp:r13 +FIRST_ROW_SSE4: + movdqu xmm1, [r0] + movdqu xmm3, [r0+r3] + movdqu xmm5, [r0+r3*2] + movdqu xmm7, [r0+r6] + + movdqa xmm2, xmm1 + mpsadbw xmm1, xmm0, 000b + mpsadbw xmm2, xmm0, 100b + paddw xmm1, xmm2 ; 8 sums of line1 + + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 000b + mpsadbw xmm4, xmm0, 100b + paddw xmm3, xmm4 ; 8 sums of line2 + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 000b + mpsadbw xmm2, xmm0, 100b + paddw xmm5, xmm2 ; 8 sums of line3 + + movdqa xmm4, xmm7 + mpsadbw xmm7, xmm0, 000b + mpsadbw xmm4, xmm0, 100b + paddw xmm7, xmm4 ; 8 sums of line4 + + paddw xmm1, xmm3 + paddw xmm5, xmm7 + paddw xmm1, xmm5 ; sum the upper 4 lines first + + movdqu xmm2, [r13] + movdqu xmm3, [r13+r3] + movdqu xmm4, [r13+r3*2] + movdqu xmm5, [r13+r6] + + movdqa xmm6, xmm2 + mpsadbw xmm2, xmm0, 000b + mpsadbw xmm6, xmm0, 100b + paddw xmm2, xmm6 + + movdqa xmm7, xmm3 + mpsadbw xmm3, xmm0, 000b + mpsadbw xmm7, xmm0, 100b + paddw xmm3, xmm7 + + movdqa xmm6, xmm4 + mpsadbw xmm4, xmm0, 000b + mpsadbw xmm6, xmm0, 100b + paddw xmm4, xmm6 + + movdqa xmm7, xmm5 + mpsadbw xmm5, xmm0, 000b + mpsadbw xmm7, xmm0, 100b + paddw xmm5, xmm7 + + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm1, xmm2 + paddw xmm1, xmm4 ; sum of lines 1- 8 + + movdqu [r4], xmm1 + + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm0 + punpckhwd xmm2, xmm0 + + COUNT_SUM xmm1, r2d, r2, 1 + COUNT_SUM xmm1, r2d, r2, 1 + COUNT_SUM xmm1, r2d, r2, 1 + COUNT_SUM xmm1, r2d, r2, 0 + COUNT_SUM xmm2, r2d, r2 ,1 + COUNT_SUM xmm2, r2d, r2 ,1 + COUNT_SUM xmm2, r2d, r2 ,1 + COUNT_SUM xmm2, r2d, r2 ,0 + + lea r0, [r0+8] + lea r13, [r13+8] + lea r4, [r4+16] ; element size is 2 + + sub r12, 8 + jg near FIRST_ROW_SSE4 + + pop r4 + pop r2 + pop r0 + mov r13, r2 + dec r13 +HEIGHT_LOOP_SSE4: + mov r12, r1 +WIDTH_LOOP_SSE4: + movdqu xmm1, [r0+r3*8] + movdqu xmm2, [r0] + movdqu xmm7, [r4] + + movdqa xmm3, xmm1 + mpsadbw xmm1, xmm0, 000b + mpsadbw xmm3, xmm0, 100b + paddw xmm1, xmm3 + + movdqa xmm4, xmm2 + mpsadbw xmm2, xmm0, 000b + mpsadbw xmm4, xmm0, 100b + paddw xmm2, xmm4 + + paddw xmm7, xmm1 + psubw xmm7, xmm2 + movdqu [r4+r1*2], xmm7 + + movdqa xmm6, xmm7 + punpcklwd xmm7, xmm0 + punpckhwd xmm6, xmm0 + + COUNT_SUM xmm7, r2d, r2, 1 + COUNT_SUM xmm7, r2d, r2, 1 + COUNT_SUM xmm7, r2d, r2, 1 + COUNT_SUM xmm7, r2d, r2, 0 + COUNT_SUM xmm6, r2d, r2, 1 + COUNT_SUM xmm6, r2d, r2, 1 + COUNT_SUM xmm6, r2d, r2, 1 + COUNT_SUM xmm6, r2d, r2, 0 + + lea r0, [r0+8] + lea r4, [r4+16] + + sub r12, 8 + jg near WIDTH_LOOP_SSE4 + + lea r0, [r0+r3] + sub r0, r1 + + dec r13 + jg near HEIGHT_LOOP_SSE4 + + pop r13 + pop r12 + POP_XMM + LOAD_6_PARA_POP + ret + + +;**************************************************************************************************************************************************** +;void SumOf16x16BlockOfFrame_sse2(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;**************************************************************************************************************************************************** +WELS_EXTERN SumOf16x16BlockOfFrame_sse2 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 6 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + push r12 + push r13 + push r0 + push r2 + push r4 + + pxor xmm0, xmm0 + lea r6, [r3+r3*2] + + mov r12, r1 ;r12:tmp_width +FIRST_ROW_X16H: + movdqu xmm1, [r0] + movdqu xmm2, [r0+r3] + movdqu xmm3, [r0+r3*2] + movdqu xmm4, [r0+r6] + + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + paddw xmm1, xmm2 + paddw xmm3, xmm4 + paddw xmm1, xmm3 + + lea r13, [r0+r3*4] ;ebp:r13 + movdqu xmm2, [r13] + movdqu xmm3, [r13+r3] + movdqu xmm4, [r13+r3*2] + movdqu xmm5, [r13+r6] + + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + + lea r13, [r13+r3*4] + movdqu xmm2, [r13] + movdqu xmm3, [r13+r3] + movdqu xmm4, [r13+r3*2] + movdqu xmm5, [r13+r6] + + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + + lea r13, [r13+r3*4] + movdqu xmm2, [r13] + movdqu xmm3, [r13+r3] + movdqu xmm4, [r13+r3*2] + movdqu xmm5, [r13+r6] + + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + movdqa xmm2, xmm1 + punpckhwd xmm2, xmm0 + paddw xmm1, xmm2 + movd r2d, xmm1 + mov [r4], r2w + inc dword [r5+r2*4] + + inc r0 + lea r4, [r4+2] + + dec r12 + jg near FIRST_ROW_X16H + + pop r4 + pop r2 + pop r0 + mov r13, r2 + dec r13 + mov r6, r3 + sal r6, 4 ; succeeded 16th line +HEIGHT_LOOP_X16: + mov r12, r1 +WIDTH_LOOP_X16: + movdqu xmm1, [r0+r6] + movdqu xmm2, [r0] + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psubw xmm1, xmm2 + movdqa xmm2, xmm1 + punpckhwd xmm2, xmm0 + paddw xmm1, xmm2 + movd r2d, xmm1 + add r2w, word [r4] + mov [r4+r1*2], r2w + inc dword [r5+r2*4] + + inc r0 + add r4, 2 + + dec r12 + jg near WIDTH_LOOP_X16 + + add r0, r3 + sub r0, r1 + + dec r13 + jg near HEIGHT_LOOP_X16 + + pop r13 + pop r12 + POP_XMM + LOAD_6_PARA_POP + ret + +; requires: width % 16 == 0 && height > 1 +;----------------------------------------------------------------------------------------------------------------------------- +;void SumOf16x16BlockOfFrame_sse4(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, +; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +;----------------------------------------------------------------------------------------------------------------------------- +; try 8 mv via offset +%macro SUM_LINE_X16_SSE41 5 ; ref, dst0, dst1, tmp0, tmp1 + movdqu %2, [%1] + movdqu %3, [%1+8h] + movdqa %4, %2 + movdqa %5, %3 + + mpsadbw %2, xmm0, 0 ; 000 B + mpsadbw %4, xmm0, 5 ; 101 B + mpsadbw %3, xmm0, 2 ; 010 B + mpsadbw %5, xmm0, 7 ; 111 B + paddw %2, %4 + paddw %3, %5 + paddw %2, %3 ; accumulate cost +%endmacro ; end of SAD_16x16_LINE_SSE41 + +WELS_EXTERN SumOf16x16BlockOfFrame_sse4 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + push r12 + push r13 + push r0 + push r2 + push r4 + + pxor xmm0, xmm0 + lea r6, [r3+r3*2] + + mov r12, r1 ;r12:tmp_width +FIRST_ROW_X16_SSE4: + SUM_LINE_X16_SSE41 r0, xmm1, xmm2, xmm3, xmm4 + SUM_LINE_X16_SSE41 r0+r3, xmm2, xmm3, xmm4, xmm5 + SUM_LINE_X16_SSE41 r0+r3*2,xmm3, xmm4, xmm5, xmm6 + SUM_LINE_X16_SSE41 r0+r6, xmm4, xmm5, xmm6, xmm7 + paddw xmm1, xmm2 + paddw xmm3, xmm4 + paddw xmm1, xmm3 + + lea r13, [r0+r3*4] + SUM_LINE_X16_SSE41 r13, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r3, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r3*2, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r6, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + + lea r13, [r13+r3*4] + SUM_LINE_X16_SSE41 r13, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r3, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r3*2, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r6, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + + lea r13, [r13+r3*4] + SUM_LINE_X16_SSE41 r13, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r3, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r3*2, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + SUM_LINE_X16_SSE41 r13+r6, xmm2, xmm3, xmm4, xmm5 + paddw xmm1, xmm2 + + movdqa [r4], xmm1 + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm0 + punpckhwd xmm2, xmm0 + + COUNT_SUM xmm1, r2d, r2, 1 + COUNT_SUM xmm1, r2d, r2, 1 + COUNT_SUM xmm1, r2d, r2, 1 + COUNT_SUM xmm1, r2d, r2, 0 + COUNT_SUM xmm2, r2d, r2, 1 + COUNT_SUM xmm2, r2d, r2, 1 + COUNT_SUM xmm2, r2d, r2, 1 + COUNT_SUM xmm2, r2d, r2, 0 + + lea r0, [r0+8] + lea r4, [r4+16] ; element size is 2 + + sub r12, 8 + jg near FIRST_ROW_X16_SSE4 + + pop r4 + pop r2 + pop r0 + mov r13, r2 + dec r13 + mov r6, r3 + sal r6, 4 ; succeeded 16th line + +HEIGHT_LOOP_X16_SSE4: + mov r12, r1 +WIDTH_LOOP_X16_SSE4: + movdqa xmm7, [r4] + SUM_LINE_X16_SSE41 r0+r6, xmm1, xmm2, xmm3, xmm4 + SUM_LINE_X16_SSE41 r0, xmm2, xmm3, xmm4, xmm5 + + paddw xmm7, xmm1 + psubw xmm7, xmm2 + movdqa [r4+r1*2], xmm7 + + movdqa xmm6, xmm7 + punpcklwd xmm7, xmm0 + punpckhwd xmm6, xmm0 + + COUNT_SUM xmm7, r2d, r2, 1 + COUNT_SUM xmm7, r2d, r2, 1 + COUNT_SUM xmm7, r2d, r2, 1 + COUNT_SUM xmm7, r2d, r2, 0 + COUNT_SUM xmm6, r2d, r2, 1 + COUNT_SUM xmm6, r2d, r2, 1 + COUNT_SUM xmm6, r2d, r2, 1 + COUNT_SUM xmm6, r2d, r2, 0 + + lea r0, [r0+8] + lea r4, [r4+16] + + sub r12, 8 + jg near WIDTH_LOOP_X16_SSE4 + + add r0, r3 + sub r0, r1 + + dec r13 + jg near HEIGHT_LOOP_X16_SSE4 + + pop r13 + pop r12 + POP_XMM + LOAD_6_PARA_POP + ret + +;----------------------------------------------------------------------------------------------------------------------------- +; void FillQpelLocationByFeatureValue_sse2(uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList) +;----------------------------------------------------------------------------------------------------------------------------- +WELS_EXTERN FillQpelLocationByFeatureValue_sse2 + %assign push_num 0 + LOAD_4_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + push r12 + push r13 + mov r12, r2 + + movq xmm7, [mv_x_inc_x4] ; x_qpel inc + movq xmm6, [mv_y_inc_x4] ; y_qpel inc + movq xmm5, [mx_x_offset_x4] ; x_qpel vector + pxor xmm4, xmm4 + pxor xmm3, xmm3 ; y_qpel vector +HASH_HEIGHT_LOOP_SSE2: + movdqa xmm2, xmm5 ; x_qpel vector + mov r4, r1 +HASH_WIDTH_LOOP_SSE2: + movq xmm0, [r0] ; load x8 sum + punpcklwd xmm0, xmm4 + movdqa xmm1, xmm2 + punpcklwd xmm1, xmm3 +%rep 3 + movd r2d, xmm0 ;edx:r3 + lea r5, [r3+r2*8] ;ebx:r5 + mov r6, [r5] ;eax:r6 + movd [r6], xmm1 + mov r13, [r6+4] ; explictly load eax+4 due cache miss from vtune observation + lea r6, [r6+4] + mov [r5], r6 + psrldq xmm1, 4 + psrldq xmm0, 4 +%endrep + movd r2d, xmm0 + lea r5, [r3+r2*8] ;ebx:r5 + mov r6, [r5] ;eax:r6 + movd [r6], xmm1 + mov r13, [r6+4] ; explictly load eax+4 due cache miss from vtune observation + lea r6, [r6+4] + mov [r5], r6 + + paddw xmm2, xmm7 + lea r0, [r0+8] + sub r4, 4 + jnz near HASH_WIDTH_LOOP_SSE2 + paddw xmm3, xmm6 + dec r12 + jnz near HASH_HEIGHT_LOOP_SSE2 + + pop r13 + pop r12 + POP_XMM + ret + +;--------------------------------------------------------------------------------------------------------------------------------------------------- +; void InitializeHashforFeature_sse2( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, +; uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); +;uint16_t** pPositionOfSum, uint16_t** sum_idx_list, uint32_t* pTimesOfSum, uint16_t* pBuf, const int32_t list_sz ) +;--------------------------------------------------------------------------------------------------------------------------------------------------- +WELS_EXTERN InitializeHashforFeature_sse2 + %assign push_num 0 + LOAD_5_PARA + SIGN_EXTENSION r2, r2d + push r12 + push r13 + mov r12, r2 + sar r2, 2 + mov r5, 0 ;r5:ecx + xor r6, r6 + pxor xmm3, xmm3 +hash_assign_loop_x4_sse2: + movdqa xmm0, [r0+r5] + pslld xmm0, 2 + + movdqa xmm1, xmm0 + pcmpeqd xmm1, xmm3 + movmskps r6, xmm1 + cmp r6, 0x0f + jz near hash_assign_with_copy_sse2 + +%assign x 0 +%rep 4 + lea r13, [r3+r5*2+x] + mov [r13], r1 + lea r13, [r4+r5*2+x] + mov [r13], r1 + movd r6d, xmm0 + add r1, r6 + psrldq xmm0, 4 +%assign x x+8 +%endrep + jmp near assign_next_sse2 + +hash_assign_with_copy_sse2: + movq xmm1, r1 + pshufd xmm2, xmm1, 01000100b + movdqa [r3+r5*2], xmm2 + movdqa [r4+r5*2], xmm2 + movdqa [r3+r5*2+16], xmm2 + movdqa [r4+r5*2+16], xmm2 + +assign_next_sse2: + add r5, 16 + dec r2 + jnz near hash_assign_loop_x4_sse2 + + and r12, 3 + jz near hash_assign_no_rem_sse2 +hash_assign_loop_x4_rem_sse2: + lea r13, [r3+r5*2] + mov [r13], r1 + lea r13, [r4+r5*2] + mov [r13], r1 + mov r6d, [r0+r5] + sal r6, 2 + add r1, r6 + add r5, 4 + dec r12 + jnz near hash_assign_loop_x4_rem_sse2 + +hash_assign_no_rem_sse2: + pop r13 + pop r12 + ret + +%endif + +;********************************************************************************************************************************** +; int32_t SumOf8x8SingleBlock_sse2(uint8_t* ref0, int32_t linesize) +;********************************************************************************************************************************** +WELS_EXTERN SumOf8x8SingleBlock_sse2 + %assign push_num 0 + LOAD_2_PARA + SIGN_EXTENSION r1, r1d + + pxor xmm0, xmm0 + movq xmm1, [r0] + movhps xmm1, [r0+r1] + lea r0, [r0+2*r1] + movq xmm2, [r0] + movhps xmm2, [r0+r1] + lea r0, [r0+2*r1] + movq xmm3, [r0] + movhps xmm3, [r0+r1] + lea r0, [r0+2*r1] + movq xmm4, [r0] + movhps xmm4, [r0+r1] + + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + paddw xmm1, xmm2 + paddw xmm3, xmm4 + paddw xmm1, xmm3 + + movdqa xmm2, xmm1 + punpckhwd xmm2, xmm0 + paddw xmm1, xmm2 + + movd retrd, xmm1 + ret + +;********************************************************************************************************************************** +; int32_t SumOf16x16SingleBlock_sse2(uint8_t* ref0, int32_t linesize) +;********************************************************************************************************************************** +WELS_EXTERN SumOf16x16SingleBlock_sse2 + %assign push_num 0 + LOAD_2_PARA + PUSH_XMM 6 + SIGN_EXTENSION r1, r1d + + pxor xmm0, xmm0 + movdqa xmm1, [r0] + movdqa xmm2, [r0+r1] + lea r0, [r0+2*r1] + movdqa xmm3, [r0] + movdqa xmm4, [r0+r1] + psadbw xmm1, xmm0 + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + paddw xmm1, xmm2 + paddw xmm3, xmm4 + paddw xmm1, xmm3 + + lea r0, [r0+2*r1] + movdqa xmm2, [r0] + movdqa xmm3, [r0+r1] + lea r0, [r0+2*r1] + movdqa xmm4, [r0] + movdqa xmm5, [r0+r1] + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + + lea r0, [r0+2*r1] + movdqa xmm2, [r0] + movdqa xmm3, [r0+r1] + lea r0, [r0+2*r1] + movdqa xmm4, [r0] + movdqa xmm5, [r0+r1] + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + + lea r0, [r0+2*r1] + movdqa xmm2, [r0] + movdqa xmm3, [r0+r1] + lea r0, [r0+2*r1] + movdqa xmm4, [r0] + movdqa xmm5, [r0+r1] + psadbw xmm2, xmm0 + psadbw xmm3, xmm0 + psadbw xmm4, xmm0 + psadbw xmm5, xmm0 + paddw xmm2, xmm3 + paddw xmm4, xmm5 + paddw xmm2, xmm4 + + paddw xmm1, xmm2 + + movdqa xmm2, xmm1 + punpckhwd xmm2, xmm0 + paddw xmm1, xmm2 + + movd retrd, xmm1 + POP_XMM + ret + +;********************************************************************************************************************************** +; +; uint32_t SampleSad16x16Hor8_sse41( uint8_t *src, int32_t stride_src, uint8_t *ref, int32_t stride_ref, uint16 base_cost[8], int32_t *index_min_cost ) +; +; \note: +; src need align with 16 bytes, ref is optional +; \return value: +; return minimal SAD cost, according index carried by index_min_cost +;********************************************************************************************************************************** +; try 8 mv via offset +; xmm7 store sad costs +%macro SAD_16x16_LINE_SSE41 4 ; src, ref, stride_src, stride_ref + movdqa xmm0, [%1] + movdqu xmm1, [%2] + movdqu xmm2, [%2+8h] + movdqa xmm3, xmm1 + movdqa xmm4, xmm2 + + mpsadbw xmm1, xmm0, 0 ; 000 B + paddw xmm7, xmm1 ; accumulate cost + + mpsadbw xmm3, xmm0, 5 ; 101 B + paddw xmm7, xmm3 ; accumulate cost + + mpsadbw xmm2, xmm0, 2 ; 010 B + paddw xmm7, xmm2 ; accumulate cost + + mpsadbw xmm4, xmm0, 7 ; 111 B + paddw xmm7, xmm4 ; accumulate cost + + add %1, %3 + add %2, %4 +%endmacro ; end of SAD_16x16_LINE_SSE41 +%macro SAD_16x16_LINE_SSE41E 4 ; src, ref, stride_src, stride_ref + movdqa xmm0, [%1] + movdqu xmm1, [%2] + movdqu xmm2, [%2+8h] + movdqa xmm3, xmm1 + movdqa xmm4, xmm2 + + mpsadbw xmm1, xmm0, 0 ; 000 B + paddw xmm7, xmm1 ; accumulate cost + + mpsadbw xmm3, xmm0, 5 ; 101 B + paddw xmm7, xmm3 ; accumulate cost + + mpsadbw xmm2, xmm0, 2 ; 010 B + paddw xmm7, xmm2 ; accumulate cost + + mpsadbw xmm4, xmm0, 7 ; 111 B + paddw xmm7, xmm4 ; accumulate cost +%endmacro ; end of SAD_16x16_LINE_SSE41E + +WELS_EXTERN SampleSad16x16Hor8_sse41 + ;push ebx + ;push esi + ;mov eax, [esp+12] ; src + ;mov ecx, [esp+16] ; stride_src + ;mov ebx, [esp+20] ; ref + ;mov edx, [esp+24] ; stride_ref + ;mov esi, [esp+28] ; base_cost + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + pxor xmm7, xmm7 + + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41 r0, r2, r1, r3 + SAD_16x16_LINE_SSE41E r0, r2, r1, r3 + + pxor xmm0, xmm0 + movdqa xmm6, xmm7 + punpcklwd xmm6, xmm0 + punpckhwd xmm7, xmm0 + + movdqa xmm5, [r4] + movdqa xmm4, xmm5 + punpcklwd xmm4, xmm0 + punpckhwd xmm5, xmm0 + + paddd xmm4, xmm6 + paddd xmm5, xmm7 + movdqa xmm3, xmm4 + pminud xmm3, xmm5 + pshufd xmm2, xmm3, 01001110B + pminud xmm2, xmm3 + pshufd xmm3, xmm2, 10110001B + pminud xmm2, xmm3 + movd retrd, xmm2 + pcmpeqd xmm4, xmm2 + movmskps r2d, xmm4 + bsf r1d, r2d + jnz near WRITE_INDEX + + pcmpeqd xmm5, xmm2 + movmskps r2d, xmm5 + bsf r1d, r2d + add r1d, 4 + +WRITE_INDEX: + mov [r5], r1d + POP_XMM + LOAD_6_PARA_POP + ret + +;********************************************************************************************************************************** +; +; uint32_t SampleSad8x8Hor8_sse41( uint8_t *src, int32_t stride_src, uint8_t *ref, int32_t stride_ref, uint16_t base_cost[8], int32_t *index_min_cost ) +; +; \note: +; src and ref is optional to align with 16 due inter 8x8 +; \return value: +; return minimal SAD cost, according index carried by index_min_cost +; +;********************************************************************************************************************************** +; try 8 mv via offset +; xmm7 store sad costs +%macro SAD_8x8_LINE_SSE41 4 ; src, ref, stride_src, stride_ref + movdqu xmm0, [%1] + movdqu xmm1, [%2] + movdqa xmm2, xmm1 + + mpsadbw xmm1, xmm0, 0 ; 000 B + paddw xmm7, xmm1 ; accumulate cost + + mpsadbw xmm2, xmm0, 5 ; 101 B + paddw xmm7, xmm2 ; accumulate cost + + add %1, %3 + add %2, %4 +%endmacro ; end of SAD_8x8_LINE_SSE41 +%macro SAD_8x8_LINE_SSE41E 4 ; src, ref, stride_src, stride_ref + movdqu xmm0, [%1] + movdqu xmm1, [%2] + movdqa xmm2, xmm1 + + mpsadbw xmm1, xmm0, 0 ; 000 B + paddw xmm7, xmm1 ; accumulate cost + + mpsadbw xmm2, xmm0, 5 ; 101 B + paddw xmm7, xmm2 ; accumulate cost +%endmacro ; end of SAD_8x8_LINE_SSE41E + +WELS_EXTERN SampleSad8x8Hor8_sse41 + %assign push_num 0 + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + movdqa xmm7, [r4] ; load base cost list + + SAD_8x8_LINE_SSE41 r0, r2, r1, r3 + SAD_8x8_LINE_SSE41 r0, r2, r1, r3 + SAD_8x8_LINE_SSE41 r0, r2, r1, r3 + SAD_8x8_LINE_SSE41 r0, r2, r1, r3 + + SAD_8x8_LINE_SSE41 r0, r2, r1, r3 + SAD_8x8_LINE_SSE41 r0, r2, r1, r3 + SAD_8x8_LINE_SSE41 r0, r2, r1, r3 + SAD_8x8_LINE_SSE41E r0, r2, r1, r3 + + phminposuw xmm0, xmm7 ; horizon search the minimal sad cost and its index + movd retrd, xmm0 ; for return: DEST[15:0] <- MIN, DEST[31:16] <- INDEX + mov r1d, retrd + and retrd, 0xFFFF + sar r1d, 16 + mov [r5], r1d + + POP_XMM + LOAD_6_PARA_POP + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/score.asm b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/score.asm new file mode 100644 index 000000000..7c98a7825 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/core/x86/score.asm @@ -0,0 +1,363 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* score.asm +;* +;* Abstract +;* scan/score/count of sse2 +;* +;* History +;* 8/21/2009 Created +;* +;* +;*************************************************************************/ + +%include "asm_inc.asm" + +;*********************************************************************** +; Macros +;*********************************************************************** + +;*********************************************************************** +; Local Data (Read Only) +;*********************************************************************** +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +;align 16 +;se2_2 dw 2, 2, 2, 2, 2, 2, 2, 2 +align 16 +sse2_1: dw 1, 1, 1, 1, 1, 1, 1, 1 +align 16 +sse2_b1: db 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +i_ds_table: db 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +align 16 +sse2_plane_inc_minus: dw -7, -6, -5, -4, -3, -2, -1, 0 +align 16 +sse2_plane_inc: dw 1, 2, 3, 4, 5, 6, 7, 8 +align 16 +sse2_plane_dec: dw 8, 7, 6, 5, 4, 3, 2, 1 +align 16 +pb_scanacdc_maska:db 0,1,2,3,8,9,14,15,10,11,4,5,6,7,12,13 +align 16 +pb_scanacdc_maskb:db 2,3,8,9,10,11,4,5,0,1,6,7,12,13,14,15 +align 16 +pb_scandc_maska:db 2,3,8,9,14,15,10,11,4,5,6,7,12,13,0,1 +align 16 +pb_scandc_maskb:db 8,9,10,11,4,5,0,1,6,7,12,13,14,15,128,128 + +align 16 +nozero_count_table: +db 0,1,1,2,1,2,2,3,1,2 +db 2,3,2,3,3,4,1,2,2,3 +db 2,3,3,4,2,3,3,4,3,4 +db 4,5,1,2,2,3,2,3,3,4 +db 2,3,3,4,3,4,4,5,2,3 +db 3,4,3,4,4,5,3,4,4,5 +db 4,5,5,6,1,2,2,3,2,3 +db 3,4,2,3,3,4,3,4,4,5 +db 2,3,3,4,3,4,4,5,3,4 +db 4,5,4,5,5,6,2,3,3,4 +db 3,4,4,5,3,4,4,5,4,5 +db 5,6,3,4,4,5,4,5,5,6 +db 4,5,5,6,5,6,6,7,1,2 +db 2,3,2,3,3,4,2,3,3,4 +db 3,4,4,5,2,3,3,4,3,4 +db 4,5,3,4,4,5,4,5,5,6 +db 2,3,3,4,3,4,4,5,3,4 +db 4,5,4,5,5,6,3,4,4,5 +db 4,5,5,6,4,5,5,6,5,6 +db 6,7,2,3,3,4,3,4,4,5 +db 3,4,4,5,4,5,5,6,3,4 +db 4,5,4,5,5,6,4,5,5,6 +db 5,6,6,7,3,4,4,5,4,5 +db 5,6,4,5,5,6,5,6,6,7 +db 4,5,5,6,5,6,6,7,5,6 +db 6,7,6,7,7,8 + +align 16 +high_mask_table: + db 0, 0, 0, 3, 0, 2, 3, 6, 0, 2 + db 2, 5, 3, 5, 6, 9, 0, 1, 2, 5 + db 2, 4, 5, 8, 3, 5, 5, 8, 6, 8 + db 9,12, 0, 1, 1, 4, 2, 4, 5, 8 + db 2, 4, 4, 7, 5, 7, 8,11, 3, 4 + db 5, 8, 5, 7, 8,11, 6, 8, 8,11 + db 9,11,12,15, 0, 1, 1, 4, 1, 3 + db 4, 7, 2, 4, 4, 7, 5, 7, 8,11 + db 2, 3, 4, 7, 4, 6, 7,10, 5, 7 + db 7,10, 8,10,11,14, 3, 4, 4, 7 + db 5, 7, 8,11, 5, 7, 7,10, 8,10 + db 11,14, 6, 7, 8,11, 8,10,11,14 + db 9,11,11,14,12,14,15,18, 0, 0 + db 1, 4, 1, 3, 4, 7, 1, 3, 3, 6 + db 4, 6, 7,10, 2, 3, 4, 7, 4, 6 + db 7,10, 5, 7, 7,10, 8,10,11,14 + db 2, 3, 3, 6, 4, 6, 7,10, 4, 6 + db 6, 9, 7, 9,10,13, 5, 6, 7,10 + db 7, 9,10,13, 8,10,10,13,11,13 + db 14,17, 3, 4, 4, 7, 4, 6, 7,10 + db 5, 7, 7,10, 8,10,11,14, 5, 6 + db 7,10, 7, 9,10,13, 8,10,10,13 + db 11,13,14,17, 6, 7, 7,10, 8,10 + db 11,14, 8,10,10,13,11,13,14,17 + db 9,10,11,14,11,13,14,17,12,14 + db 14,17,15,17,18,21 + +align 16 +low_mask_table: + db 0, 3, 2, 6, 2, 5, 5, 9, 1, 5 + db 4, 8, 5, 8, 8,12, 1, 4, 4, 8 + db 4, 7, 7,11, 4, 8, 7,11, 8,11 + db 11,15, 1, 4, 3, 7, 4, 7, 7,11 + db 3, 7, 6,10, 7,10,10,14, 4, 7 + db 7,11, 7,10,10,14, 7,11,10,14 + db 11,14,14,18, 0, 4, 3, 7, 3, 6 + db 6,10, 3, 7, 6,10, 7,10,10,14 + db 3, 6, 6,10, 6, 9, 9,13, 6,10 + db 9,13,10,13,13,17, 4, 7, 6,10 + db 7,10,10,14, 6,10, 9,13,10,13 + db 13,17, 7,10,10,14,10,13,13,17 + db 10,14,13,17,14,17,17,21, 0, 3 + db 3, 7, 3, 6, 6,10, 2, 6, 5, 9 + db 6, 9, 9,13, 3, 6, 6,10, 6, 9 + db 9,13, 6,10, 9,13,10,13,13,17 + db 3, 6, 5, 9, 6, 9, 9,13, 5, 9 + db 8,12, 9,12,12,16, 6, 9, 9,13 + db 9,12,12,16, 9,13,12,16,13,16 + db 16,20, 3, 7, 6,10, 6, 9, 9,13 + db 6,10, 9,13,10,13,13,17, 6, 9 + db 9,13, 9,12,12,16, 9,13,12,16 + db 13,16,16,20, 7,10, 9,13,10,13 + db 13,17, 9,13,12,16,13,16,16,20 + db 10,13,13,17,13,16,16,20,13,17 + db 16,20,17,20,20,24 + + +SECTION .text + +;*********************************************************************** +;void WelsScan4x4DcAc_sse2( int16_t level[16], int16_t *pDct ) +;*********************************************************************** +WELS_EXTERN WelsScan4x4DcAc_sse2 + %ifdef X86_32 + push r3 + %assign push_num 1 + %else + %assign push_num 0 + %endif + LOAD_2_PARA + movdqa xmm0, [r1] ; 7 6 5 4 3 2 1 0 + movdqa xmm1, [r1+16] ; f e d c b a 9 8 + pextrw r2d, xmm0, 7 ; ecx = 7 + pextrw r3d, xmm1, 2 ; edx = a + pextrw r1d, xmm0, 5 ; eax = 5 + pinsrw xmm1, r2d, 2 ; f e d c b 7 9 8 + pinsrw xmm0, r1d, 7 ; 5 6 5 4 3 2 1 0 + pextrw r2d, xmm1, 0 ; ecx = 8 + pinsrw xmm0, r2d, 5 ; 5 6 8 4 3 2 1 0 + pinsrw xmm1, r3d, 0 ; f e d c b 7 9 a + pshufd xmm2, xmm0, 0xd8 ; 5 6 3 2 8 4 1 0 + pshufd xmm3, xmm1, 0xd8 ; f e b 7 d c 9 a + pshufhw xmm0, xmm2, 0x93 ; 6 3 2 5 8 4 1 0 + pshuflw xmm1, xmm3, 0x39 ; f e b 7 a d c 9 + movdqa [r0],xmm0 + movdqa [r0+16], xmm1 + %ifdef X86_32 + pop r3 + %endif + ret + +;*********************************************************************** +;void WelsScan4x4DcAc_ssse3( int16_t level[16], int16_t *pDct ) +;*********************************************************************** +WELS_EXTERN WelsScan4x4DcAc_ssse3 + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_2_PARA + movdqa xmm0, [r1] + movdqa xmm1, [r1+16] + pextrw r2d, xmm0, 7 ; ecx = [7] + pextrw r1d, xmm1, 0 ; eax = [8] + pinsrw xmm0, r1d, 7 ; xmm0[7] = [8] + pinsrw xmm1, r2d, 0 ; xmm1[0] = [7] + pshufb xmm1, [pic(pb_scanacdc_maskb)] + pshufb xmm0, [pic(pb_scanacdc_maska)] + + movdqa [r0],xmm0 + movdqa [r0+16], xmm1 + DEINIT_X86_32_PIC + ret +;*********************************************************************** +;void WelsScan4x4Ac_sse2( int16_t* zig_value, int16_t* pDct ) +;*********************************************************************** +WELS_EXTERN WelsScan4x4Ac_sse2 + %assign push_num 0 + LOAD_2_PARA + movdqa xmm0, [r1] + movdqa xmm1, [r1+16] + movdqa xmm2, xmm0 + punpcklqdq xmm0, xmm1 + punpckhqdq xmm2, xmm1 + + movdqa xmm3, xmm0 + punpckldq xmm0, xmm2 + punpckhdq xmm3, xmm2 + pextrw r1d , xmm0, 3 + pextrw r2d , xmm0, 7 + pinsrw xmm0, r1d, 7 + pextrw r1d, xmm3, 4 + pinsrw xmm3, r2d, 4 + pextrw r2d, xmm3, 0 + pinsrw xmm3, r1d, 0 + pinsrw xmm0, r2d, 3 + + pshufhw xmm1, xmm0, 0x93 + pshuflw xmm2, xmm3, 0x39 + + movdqa xmm3, xmm2 + psrldq xmm1, 2 + pslldq xmm3, 14 + por xmm1, xmm3 + psrldq xmm2, 2 + movdqa [r0],xmm1 + movdqa [r0+16], xmm2 + ret + + +;*********************************************************************** +;void int32_t WelsCalculateSingleCtr4x4_sse2( int16_t *pDct ); +;*********************************************************************** +WELS_EXTERN WelsCalculateSingleCtr4x4_sse2 + %ifdef X86_32 + push r3 + %assign push_num 1 + %else + %assign push_num 0 + %endif + INIT_X86_32_PIC r4 + LOAD_1_PARA + movdqa xmm0, [r0] + movdqa xmm1, [r0+16] + + packsswb xmm0, xmm1 + ; below is the register map: r0 - eax, r1 - ebx, r2 - ecx, r3 - edx + xor r3, r3 + pxor xmm3, xmm3 + pcmpeqb xmm0, xmm3 + pmovmskb r3d, xmm0 + + xor r3, 0xffff + + xor r0, r0 + mov r2, 7 + mov r1, 8 +.loop_low8_find1: + bt r3, r2 + jc .loop_high8_find1 + dec r2 + jnz .loop_low8_find1 +.loop_high8_find1: + bt r3, r1 + jc .find1end + inc r1 + cmp r1,16 + jb .loop_high8_find1 +.find1end: + sub r1, r2 + sub r1, 1 + lea r2, [pic(i_ds_table)] + add r0b, [r2+r1] + mov r1, r3 + and r3, 0xff + shr r1, 8 + and r1, 0xff + lea r2 , [pic(low_mask_table)] + add r0b, [r2 +r3] + lea r2, [pic(high_mask_table)] + add r0b, [r2+r1] + DEINIT_X86_32_PIC + %ifdef X86_32 + pop r3 + %else + mov retrd, r0d + %endif + ret + + +;*********************************************************************** +; int32_t WelsGetNoneZeroCount_sse2(int16_t* level); +;*********************************************************************** +WELS_EXTERN WelsGetNoneZeroCount_sse2 + %assign push_num 0 + INIT_X86_32_PIC r3 + LOAD_1_PARA + movdqa xmm0, [r0] + movdqa xmm1, [r0+16] + pxor xmm2, xmm2 + pcmpeqw xmm0, xmm2 + pcmpeqw xmm1, xmm2 + packsswb xmm1, xmm0 + xor r1, r1 + pmovmskb r1d, xmm1 + xor r1d, 0xffff + mov r2, r1 + and r1, 0xff + shr r2, 8 +; and ecx, 0xff ; we do not need this due to high 16bits equal to 0 yet +; xor retr, retr + ;add al, [nozero_count_table+r2] + lea r0 , [pic(nozero_count_table)] + movzx r2, byte [r0+r2] + movzx r1, byte [r0+r1] + mov retrq, r2 + add retrq, r1 + ;add al, [nozero_count_table+r1] + DEINIT_X86_32_PIC + ret + +;*********************************************************************** +; int32_t WelsGetNoneZeroCount_sse42(int16_t* level); +;*********************************************************************** +WELS_EXTERN WelsGetNoneZeroCount_sse42 + %assign push_num 0 + LOAD_1_PARA + movdqa xmm0, [r0] + packsswb xmm0, [r0 + 16] + pxor xmm1, xmm1 + pcmpeqb xmm0, xmm1 + pmovmskb retrd, xmm0 + xor retrd, 0FFFFh + popcnt retrd, retrd + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/inc/welsEncoderExt.h b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/inc/welsEncoderExt.h new file mode 100644 index 000000000..b281f8ff6 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/inc/welsEncoderExt.h @@ -0,0 +1,129 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * + * Abstract + * Cisco OpenH264 encoder extension utilization interface for T26 + * + * History + * 4/24/2009 Created + * + * + *************************************************************************/ +#if !defined(WELS_PLUS_WELSENCODEREXT_H) +#define WELS_PLUS_WELSENCODEREXT_H + +#include "codec_api.h" +#include "codec_def.h" +#include "codec_app_def.h" +#include "welsCodecTrace.h" +#include "encoder_context.h" +#include "param_svc.h" +#include "extern.h" +#include "cpu.h" + +//#define OUTPUT_BIT_STREAM +//#define DUMP_SRC_PICTURE +//#define REC_FRAME_COUNT + +class ISVCEncoder; +namespace WelsEnc { +class CWelsH264SVCEncoder : public ISVCEncoder { + public: + CWelsH264SVCEncoder(); + virtual ~CWelsH264SVCEncoder(); + + /* Interfaces override from ISVCEncoder */ + /* + * return: CM_RETURN: 0 - success; otherwise - failed; + */ + virtual int EXTAPI Initialize (const SEncParamBase* argv); + virtual int EXTAPI InitializeExt (const SEncParamExt* argv); + + virtual int EXTAPI GetDefaultParams (SEncParamExt* argv); + + virtual int EXTAPI Uninitialize(); + + /* + * return: 0 - success; otherwise - failed; + */ + virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo); + virtual int EncodeFrameInternal (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo); + + /* + * return: 0 - success; otherwise - failed; + */ + virtual int EXTAPI EncodeParameterSets (SFrameBSInfo* pBsInfo); + /* + * return: 0 - success; otherwise - failed; + */ + virtual int EXTAPI ForceIntraFrame (bool bIDR,int32_t iLayerId = -1); + + /************************************************************************ + * InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,.. + ************************************************************************/ + /* + * return: CM_RETURN: 0 - success; otherwise - failed; + */ + virtual int EXTAPI SetOption (ENCODER_OPTION opt_id, void* option); + virtual int EXTAPI GetOption (ENCODER_OPTION opt_id, void* option); + + private: + int InitializeInternal (SWelsSvcCodingParam* argv); + void TraceParamInfo(SEncParamExt *pParam); + void LogStatistics (const int64_t kiCurrentFrameTs,int32_t iMaxDid); + void UpdateStatistics(SFrameBSInfo* pBsInfo, const int64_t kiCurrentFrameMs); + + sWelsEncCtx* m_pEncContext; + + welsCodecTrace* m_pWelsTrace; + int32_t m_iMaxPicWidth; + int32_t m_iMaxPicHeight; + + int32_t m_iCspInternal; + bool m_bInitialFlag; + +#ifdef OUTPUT_BIT_STREAM + FILE* m_pFileBs; + FILE* m_pFileBsSize; + bool m_bSwitch; + int32_t m_iSwitchTimes; +#endif//OUTPUT_BIT_STREAM + +#ifdef REC_FRAME_COUNT + int32_t m_uiCountFrameNum; +#endif//REC_FRAME_COUNT + + void InitEncoder (void); + void DumpSrcPicture (const SSourcePicture* pSrcPic, const int iUsageType); +}; +} +#endif // !defined(WELS_PLUS_WELSENCODEREXT_H) diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/DllEntry.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/DllEntry.cpp new file mode 100644 index 000000000..1376a1b6e --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/DllEntry.cpp @@ -0,0 +1,43 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +///////////////////////////////////////////////////////////////////////////// +// DLL Entry Point + +BOOL WINAPI DllEntryPoint (HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) { + if (DLL_PROCESS_ATTACH == dwReason) { + DisableThreadLibraryCalls (hInstance); + } + return TRUE; +} diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/welsEncoderExt.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/welsEncoderExt.cpp new file mode 100644 index 000000000..52a124a25 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/welsEncoderExt.cpp @@ -0,0 +1,1395 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "welsEncoderExt.h" +#include "welsCodecTrace.h" +#include "typedefs.h" +#include "wels_const.h" +#include "utils.h" +#include "macros.h" +#include "version.h" +#include "crt_util_safe_x.h" // Safe CRT routines like util for cross platforms +#include "ref_list_mgr_svc.h" +#include "codec_ver.h" + +#include +#include +#if defined(_WIN32) /*&& defined(_DEBUG)*/ + +#include +#include +#include +#include +#else +#include +#endif + +namespace WelsEnc { + +/* + * CWelsH264SVCEncoder class implementation + */ +CWelsH264SVCEncoder::CWelsH264SVCEncoder() + : m_pEncContext (NULL), + m_pWelsTrace (NULL), + m_iMaxPicWidth (0), + m_iMaxPicHeight (0), + m_iCspInternal (0), + m_bInitialFlag (false) { +#ifdef REC_FRAME_COUNT + int32_t m_uiCountFrameNum = 0; +#endif//REC_FRAME_COUNT + +#ifdef OUTPUT_BIT_STREAM + char strStreamFileName[1024] = { 0 }; //for .264 + int32_t iBufferUsed = 0; + int32_t iBufferLeft = 1023; + int32_t iCurUsed; + + char strLenFileName[1024] = { 0 }; //for .len + int32_t iBufferUsedSize = 0; + int32_t iBufferLeftSize = 1023; + int32_t iCurUsedSize; +#endif//OUTPUT_BIT_STREAM + +#ifdef OUTPUT_BIT_STREAM + SWelsTime tTime; + + WelsGetTimeOfDay (&tTime); + + iCurUsed = WelsSnprintf (strStreamFileName, iBufferLeft, "enc_bs_0x%p_", (void*)this); + iCurUsedSize = WelsSnprintf (strLenFileName, iBufferLeftSize, "enc_size_0x%p_", (void*)this); + + + iBufferUsed += iCurUsed; + iBufferLeft -= iCurUsed; + if (iBufferLeft > 0) { + iCurUsed = WelsStrftime (&strStreamFileName[iBufferUsed], iBufferLeft, "%y%m%d%H%M%S", &tTime); + iBufferUsed += iCurUsed; + iBufferLeft -= iCurUsed; + } + + iBufferUsedSize += iCurUsedSize; + iBufferLeftSize -= iCurUsedSize; + if (iBufferLeftSize > 0) { + iCurUsedSize = WelsStrftime (&strLenFileName[iBufferUsedSize], iBufferLeftSize, "%y%m%d%H%M%S", &tTime); + iBufferUsedSize += iCurUsedSize; + iBufferLeftSize -= iCurUsedSize; + } + + if (iBufferLeft > 0) { + iCurUsed = WelsSnprintf (&strStreamFileName[iBufferUsed], iBufferLeft, ".%03.3u.264", + WelsGetMillisecond (&tTime)); + iBufferUsed += iCurUsed; + iBufferLeft -= iCurUsed; + } + + if (iBufferLeftSize > 0) { + iCurUsedSize = WelsSnprintf (&strLenFileName[iBufferUsedSize], iBufferLeftSize, ".%03.3u.len", + WelsGetMillisecond (&tTime)); + iBufferUsedSize += iCurUsedSize; + iBufferLeftSize -= iCurUsedSize; + } + + m_pFileBs = WelsFopen (strStreamFileName, "wb"); + m_pFileBsSize = WelsFopen (strLenFileName, "wb"); + + m_bSwitch = false; + m_iSwitchTimes = 0; +#endif//OUTPUT_BIT_STREAM + + InitEncoder(); +} + +CWelsH264SVCEncoder::~CWelsH264SVCEncoder() { + if (m_pWelsTrace) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsH264SVCEncoder::~CWelsH264SVCEncoder()"); + } + +#ifdef REC_FRAME_COUNT + m_uiCountFrameNum = 0; +#endif//REC_FRAME_COUNT + +#ifdef OUTPUT_BIT_STREAM + if (m_pFileBs) { + WelsFclose (m_pFileBs); + m_pFileBs = NULL; + } + if (m_pFileBsSize) { + WelsFclose (m_pFileBsSize); + m_pFileBsSize = NULL; + } + m_bSwitch = false; + m_iSwitchTimes = 0; +#endif//OUTPUT_BIT_STREAM + + Uninitialize(); + + if (m_pWelsTrace) { + delete m_pWelsTrace; + m_pWelsTrace = NULL; + } +} + +void CWelsH264SVCEncoder::InitEncoder (void) { + + m_pWelsTrace = new welsCodecTrace(); + if (m_pWelsTrace == NULL) { + return; + } + m_pWelsTrace->SetCodecInstance (this); +} + +/* Interfaces override from ISVCEncoder */ + +int CWelsH264SVCEncoder::GetDefaultParams (SEncParamExt* argv) { + SWelsSvcCodingParam::FillDefault (*argv); + return cmResultSuccess; +} + +/* + * SVC Encoder Initialization + */ +int CWelsH264SVCEncoder::Initialize (const SEncParamBase* argv) { + if (m_pWelsTrace == NULL) { + return cmMallocMemeError; + } + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsH264SVCEncoder::InitEncoder(), openh264 codec version = %s", + VERSION_NUMBER); + + if (NULL == argv) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsH264SVCEncoder::Initialize(), invalid argv= 0x%p", + argv); + return cmInitParaError; + } + + SWelsSvcCodingParam sConfig; + // Convert SEncParamBase into WelsSVCParamConfig here.. + if (sConfig.ParamBaseTranscode (*argv)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::Initialize(), parameter_translation failed."); + TraceParamInfo (&sConfig); + Uninitialize(); + return cmInitParaError; + } + + return InitializeInternal (&sConfig); +} + +int CWelsH264SVCEncoder::InitializeExt (const SEncParamExt* argv) { + if (m_pWelsTrace == NULL) { + return cmMallocMemeError; + } + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsH264SVCEncoder::InitEncoder(), openh264 codec version = %s", + VERSION_NUMBER); + + if (NULL == argv) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsH264SVCEncoder::InitializeExt(), invalid argv= 0x%p", + argv); + return cmInitParaError; + } + + SWelsSvcCodingParam sConfig; + // Convert SEncParamExt into WelsSVCParamConfig here.. + if (sConfig.ParamTranscode (*argv)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::InitializeExt(), parameter_translation failed."); + TraceParamInfo (&sConfig); + Uninitialize(); + return cmInitParaError; + } + + return InitializeInternal (&sConfig); +} + +int CWelsH264SVCEncoder::InitializeInternal (SWelsSvcCodingParam* pCfg) { + if (NULL == pCfg) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsH264SVCEncoder::Initialize(), invalid argv= 0x%p.", + pCfg); + return cmInitParaError; + } + + if (m_bInitialFlag) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "CWelsH264SVCEncoder::Initialize(), reinitialize, m_bInitialFlag= %d.", + m_bInitialFlag); + Uninitialize(); + } + // Check valid parameters + const int32_t iNumOfLayers = pCfg->iSpatialLayerNum; + if (iNumOfLayers < 1 || iNumOfLayers > MAX_DEPENDENCY_LAYER) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::Initialize(), invalid iSpatialLayerNum= %d, valid at range of [1, %d].", iNumOfLayers, + MAX_DEPENDENCY_LAYER); + Uninitialize(); + return cmInitParaError; + } + if (pCfg->iTemporalLayerNum < 1) + pCfg->iTemporalLayerNum = 1; + if (pCfg->iTemporalLayerNum > MAX_TEMPORAL_LEVEL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::Initialize(), invalid iTemporalLayerNum= %d, valid at range of [1, %d].", + pCfg->iTemporalLayerNum, MAX_TEMPORAL_LEVEL); + Uninitialize(); + return cmInitParaError; + } + + // assert( cfg.uiGopSize >= 1 && ( cfg.uiIntraPeriod && (cfg.uiIntraPeriod % cfg.uiGopSize) == 0) ); + + if (pCfg->uiGopSize < 1 || pCfg->uiGopSize > MAX_GOP_SIZE) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::Initialize(), invalid uiGopSize= %d, valid at range of [1, %d].", pCfg->uiGopSize, + MAX_GOP_SIZE); + Uninitialize(); + return cmInitParaError; + } + + if (!WELS_POWER2_IF (pCfg->uiGopSize)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::Initialize(), invalid uiGopSize= %d, valid at range of [1, %d] and yield to power of 2.", + pCfg->uiGopSize, MAX_GOP_SIZE); + Uninitialize(); + return cmInitParaError; + } + + if (pCfg->uiIntraPeriod && pCfg->uiIntraPeriod < pCfg->uiGopSize) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::Initialize(), invalid uiIntraPeriod= %d, valid in case it equals to 0 for unlimited intra period or exceeds specified uiGopSize= %d.", + pCfg->uiIntraPeriod, pCfg->uiGopSize); + Uninitialize(); + return cmInitParaError; + } + + if ((pCfg->uiIntraPeriod && (pCfg->uiIntraPeriod & (pCfg->uiGopSize - 1)) != 0)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::Initialize(), invalid uiIntraPeriod= %d, valid in case it equals to 0 for unlimited intra period or exceeds specified uiGopSize= %d also multiple of it.", + pCfg->uiIntraPeriod, pCfg->uiGopSize); + Uninitialize(); + return cmInitParaError; + } + if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) { + if (pCfg->bEnableLongTermReference) { + pCfg->iLTRRefNum = LONG_TERM_REF_NUM_SCREEN; + if (pCfg->iNumRefFrame == AUTO_REF_PIC_COUNT) + pCfg->iNumRefFrame = WELS_MAX (1, WELS_LOG2 (pCfg->uiGopSize)) + pCfg->iLTRRefNum; + } else { + pCfg->iLTRRefNum = 0; + if (pCfg->iNumRefFrame == AUTO_REF_PIC_COUNT) + pCfg->iNumRefFrame = WELS_MAX (1, pCfg->uiGopSize >> 1); + } + } else { + pCfg->iLTRRefNum = pCfg->bEnableLongTermReference ? LONG_TERM_REF_NUM : 0; + if (pCfg->iNumRefFrame == AUTO_REF_PIC_COUNT) { + pCfg->iNumRefFrame = ((pCfg->uiGopSize >> 1) > 1) ? ((pCfg->uiGopSize >> 1) + pCfg->iLTRRefNum) : + (MIN_REF_PIC_COUNT + pCfg->iLTRRefNum); + pCfg->iNumRefFrame = WELS_CLIP3 (pCfg->iNumRefFrame, MIN_REF_PIC_COUNT, MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA); + } + } + + if (pCfg->iLtrMarkPeriod == 0) { + pCfg->iLtrMarkPeriod = 30; + } + + const int32_t kiDecStages = WELS_LOG2 (pCfg->uiGopSize); + pCfg->iTemporalLayerNum = (int8_t) (1 + kiDecStages); + pCfg->iLoopFilterAlphaC0Offset = WELS_CLIP3 (pCfg->iLoopFilterAlphaC0Offset, -6, 6); + pCfg->iLoopFilterBetaOffset = WELS_CLIP3 (pCfg->iLoopFilterBetaOffset, -6, 6); + + // decide property list size between INIT_TYPE_PARAMETER_BASED/INIT_TYPE_CONFIG_BASED + m_iMaxPicWidth = pCfg->iPicWidth; + m_iMaxPicHeight = pCfg->iPicHeight; + + TraceParamInfo (pCfg); + if (WelsInitEncoderExt (&m_pEncContext, pCfg, &m_pWelsTrace->m_sLogCtx, NULL)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsH264SVCEncoder::Initialize(), WelsInitEncoderExt failed."); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_DEBUG, + "Problematic Input Base Param: iUsageType=%d, Resolution=%dx%d, FR=%f, TLayerNum=%d, DLayerNum=%d", + pCfg->iUsageType, pCfg->iPicWidth, pCfg->iPicHeight, pCfg->fMaxFrameRate, pCfg->iTemporalLayerNum, + pCfg->iSpatialLayerNum); + Uninitialize(); + return cmInitParaError; + } + + m_bInitialFlag = true; + + return cmResultSuccess; +} + +/* + * SVC Encoder Uninitialization + */ +int32_t CWelsH264SVCEncoder::Uninitialize() { + if (!m_bInitialFlag) { + return 0; + } + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsH264SVCEncoder::Uninitialize(), openh264 codec version = %s.", + VERSION_NUMBER); + + if (NULL != m_pEncContext) { + WelsUninitEncoderExt (&m_pEncContext); + m_pEncContext = NULL; + } + + m_bInitialFlag = false; + + return 0; +} + + +/* + * SVC core encoding + */ +int CWelsH264SVCEncoder::EncodeFrame (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo) { + if (! (kpSrcPic && m_bInitialFlag && pBsInfo)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsH264SVCEncoder::EncodeFrame(), cmInitParaError."); + return cmInitParaError; + } + if (kpSrcPic->iColorFormat != videoFormatI420) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsH264SVCEncoder::EncodeFrame(), wrong iColorFormat %d", + kpSrcPic->iColorFormat); + return cmInitParaError; + } + + const int32_t kiEncoderReturn = EncodeFrameInternal (kpSrcPic, pBsInfo); + + if (kiEncoderReturn != cmResultSuccess) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsH264SVCEncoder::EncodeFrame(), kiEncoderReturn %d", + kiEncoderReturn); + return kiEncoderReturn; + } + +#ifdef REC_FRAME_COUNT + ++ m_uiCountFrameNum; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::EncodeFrame(), m_uiCountFrameNum= %d,", m_uiCountFrameNum); +#endif//REC_FRAME_COUNT + + return kiEncoderReturn; +} + + +int CWelsH264SVCEncoder ::EncodeFrameInternal (const SSourcePicture* pSrcPic, SFrameBSInfo* pBsInfo) { + + if ((pSrcPic->iPicWidth < 16) || ((pSrcPic->iPicHeight < 16))) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Don't support width(%d) or height(%d) which is less than 16!", + pSrcPic->iPicWidth, pSrcPic->iPicHeight); + return cmUnsupportedData; + } + + const int64_t kiBeforeFrameUs = WelsTime(); + const int32_t kiEncoderReturn = WelsEncoderEncodeExt (m_pEncContext, pBsInfo, pSrcPic); + const int64_t kiCurrentFrameMs = (WelsTime() - kiBeforeFrameUs) / 1000; + if ((kiEncoderReturn == ENC_RETURN_MEMALLOCERR) || (kiEncoderReturn == ENC_RETURN_MEMOVERFLOWFOUND) + || (kiEncoderReturn == ENC_RETURN_VLCOVERFLOWFOUND)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_DEBUG, "CWelsH264SVCEncoder::EncodeFrame() not succeed, err=%d", + kiEncoderReturn); + WelsUninitEncoderExt (&m_pEncContext); + return cmMallocMemeError; + } else if ((kiEncoderReturn != ENC_RETURN_SUCCESS) && (kiEncoderReturn == ENC_RETURN_CORRECTED)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "unexpected return(%d) from EncodeFrameInternal()!", + kiEncoderReturn); + return cmUnknownReason; + } + + UpdateStatistics (pBsInfo, kiCurrentFrameMs); + + ///////////////////for test +#ifdef OUTPUT_BIT_STREAM + if (pBsInfo->eFrameType != videoFrameTypeInvalid && pBsInfo->eFrameType != videoFrameTypeSkip) { + SLayerBSInfo* pLayer = NULL; + int32_t i = 0, j = 0, iCurLayerBits = 0, total_bits = 0; + + if (m_bSwitch) { + if (m_pFileBs) { + WelsFclose (m_pFileBs); + m_pFileBs = NULL; + } + if (m_pFileBsSize) { + WelsFclose (m_pFileBsSize); + m_pFileBsSize = NULL; + } + char strStreamFileName[128] = {0}; + WelsSnprintf (strStreamFileName, 128, "adj%d_w%d.264", m_iSwitchTimes, + m_pEncContext->pSvcParam->iPicWidth); + m_pFileBs = WelsFopen (strStreamFileName, "wb"); + WelsSnprintf (strStreamFileName, 128, "adj%d_w%d_size.iLen", m_iSwitchTimes, + m_pEncContext->pSvcParam->iPicWidth); + m_pFileBsSize = WelsFopen (strStreamFileName, "wb"); + + + m_bSwitch = false; + } + + for (i = 0; i < pBsInfo->iLayerNum; i++) { + pLayer = &pBsInfo->sLayerInfo[i]; + + iCurLayerBits = 0; + for (j = 0; j < pLayer->iNalCount; j++) { + iCurLayerBits += pLayer->pNalLengthInByte[j]; + } + total_bits += iCurLayerBits; + if (m_pFileBs != NULL) + WelsFwrite (pLayer->pBsBuf, 1, iCurLayerBits, m_pFileBs); + } + + if (m_pFileBsSize != NULL) + WelsFwrite (&total_bits, sizeof (int32_t), 1, m_pFileBsSize); + } +#endif //OUTPUT_BIT_STREAM +#ifdef DUMP_SRC_PICTURE + DumpSrcPicture (pSrcPic, m_pEncContext->pSvcParam->iUsageType); +#endif // DUMP_SRC_PICTURE + + return cmResultSuccess; + +} + +int CWelsH264SVCEncoder::EncodeParameterSets (SFrameBSInfo* pBsInfo) { + return WelsEncoderEncodeParameterSets (m_pEncContext, pBsInfo); +} + +/* + * Force key frame + */ +int CWelsH264SVCEncoder::ForceIntraFrame (bool bIDR, int iLayerId) { + if (bIDR) { + if (! (m_pEncContext && m_bInitialFlag)) { + return 1; + } + + ForceCodingIDR (m_pEncContext, iLayerId); + } else { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::ForceIntraFrame(),nothing to do as bIDR set to false"); + } + + return 0; +} +void CWelsH264SVCEncoder::TraceParamInfo (SEncParamExt* pParam) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "iUsageType = %d,iPicWidth= %d;iPicHeight= %d;iTargetBitrate= %d;iMaxBitrate= %d;iRCMode= %d;iPaddingFlag= %d;iTemporalLayerNum= %d;iSpatialLayerNum= %d;fFrameRate= %.6ff;uiIntraPeriod= %d;" + "eSpsPpsIdStrategy = %d;bPrefixNalAddingCtrl = %d;bSimulcastAVC=%d;bEnableDenoise= %d;bEnableBackgroundDetection= %d;bEnableSceneChangeDetect = %d;bEnableAdaptiveQuant= %d;bEnableFrameSkip= %d;bEnableLongTermReference= %d;iLtrMarkPeriod= %d, bIsLosslessLink=%d;" + "iComplexityMode = %d;iNumRefFrame = %d;iEntropyCodingModeFlag = %d;uiMaxNalSize = %d;iLTRRefNum = %d;iMultipleThreadIdc = %d;iLoopFilterDisableIdc = %d (offset(alpha/beta): %d,%d;iComplexityMode = %d,iMaxQp = %d;iMinQp = %d)", + pParam->iUsageType, + pParam->iPicWidth, + pParam->iPicHeight, + pParam->iTargetBitrate, + pParam->iMaxBitrate, + pParam->iRCMode, + pParam->iPaddingFlag, + pParam->iTemporalLayerNum, + pParam->iSpatialLayerNum, + pParam->fMaxFrameRate, + pParam->uiIntraPeriod, + pParam->eSpsPpsIdStrategy, + pParam->bPrefixNalAddingCtrl, + pParam->bSimulcastAVC, + pParam->bEnableDenoise, + pParam->bEnableBackgroundDetection, + pParam->bEnableSceneChangeDetect, + pParam->bEnableAdaptiveQuant, + pParam->bEnableFrameSkip, + pParam->bEnableLongTermReference, + pParam->iLtrMarkPeriod, + pParam->bIsLosslessLink, + pParam->iComplexityMode, + pParam->iNumRefFrame, + pParam->iEntropyCodingModeFlag, + pParam->uiMaxNalSize, + pParam->iLTRRefNum, + pParam->iMultipleThreadIdc, + pParam->iLoopFilterDisableIdc, + pParam->iLoopFilterAlphaC0Offset, + pParam->iLoopFilterBetaOffset, + pParam->iComplexityMode, + pParam->iMaxQp, + pParam->iMinQp + ); + int32_t i = 0; + int32_t iSpatialLayers = (pParam->iSpatialLayerNum < MAX_SPATIAL_LAYER_NUM) ? (pParam->iSpatialLayerNum) : + MAX_SPATIAL_LAYER_NUM; + while (i < iSpatialLayers) { + SSpatialLayerConfig* pSpatialCfg = &pParam->sSpatialLayers[i]; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "sSpatialLayers[%d]: .iVideoWidth= %d; .iVideoHeight= %d; .fFrameRate= %.6ff; .iSpatialBitrate= %d; .iMaxSpatialBitrate= %d; .sSliceArgument.uiSliceMode= %d; .sSliceArgument.iSliceNum= %d; .sSliceArgument.uiSliceSizeConstraint= %d;" + "uiProfileIdc = %d;uiLevelIdc = %d;iDLayerQp = %d", + i, pSpatialCfg->iVideoWidth, + pSpatialCfg->iVideoHeight, + pSpatialCfg->fFrameRate, + pSpatialCfg->iSpatialBitrate, + pSpatialCfg->iMaxSpatialBitrate, + pSpatialCfg->sSliceArgument.uiSliceMode, + pSpatialCfg->sSliceArgument.uiSliceNum, + pSpatialCfg->sSliceArgument.uiSliceSizeConstraint, + pSpatialCfg->uiProfileIdc, + pSpatialCfg->uiLevelIdc, + pSpatialCfg->iDLayerQp + ); + ++ i; + } +} + +void CWelsH264SVCEncoder::LogStatistics (const int64_t kiCurrentFrameTs, int32_t iMaxDid) { + for (int32_t iDid = 0; iDid <= iMaxDid; iDid++) { + SEncoderStatistics* pStatistics = & (m_pEncContext->sEncoderStatistics[iDid]); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "EncoderStatistics: SpatialId = %d,%dx%d, SpeedInMs: %f, fAverageFrameRate=%f, " + "LastFrameRate=%f, LatestBitRate=%d, LastFrameQP=%d, uiInputFrameCount=%d, uiSkippedFrameCount=%d, " + "uiResolutionChangeTimes=%d, uIDRReqNum=%d, uIDRSentNum=%d, uLTRSentNum=NA, iTotalEncodedBytes=%lu at Ts = %" PRId64, + iDid, pStatistics->uiWidth, pStatistics->uiHeight, + pStatistics->fAverageFrameSpeedInMs, pStatistics->fAverageFrameRate, + pStatistics->fLatestFrameRate, pStatistics->uiBitRate, pStatistics->uiAverageFrameQP, + pStatistics->uiInputFrameCount, pStatistics->uiSkippedFrameCount, + pStatistics->uiResolutionChangeTimes, pStatistics->uiIDRReqNum, pStatistics->uiIDRSentNum, + pStatistics->iTotalEncodedBytes, kiCurrentFrameTs); + } +} + +void CWelsH264SVCEncoder::UpdateStatistics (SFrameBSInfo* pBsInfo, + const int64_t kiCurrentFrameMs) { + + const int64_t kiCurrentFrameTs = m_pEncContext->uiLastTimestamp = pBsInfo->uiTimeStamp; + const int64_t kiTimeDiff = kiCurrentFrameTs - m_pEncContext->iLastStatisticsLogTs; + + int32_t iMaxDid = m_pEncContext->pSvcParam->iSpatialLayerNum - 1; + SLayerBSInfo* pLayerInfo = &pBsInfo->sLayerInfo[0]; + uint32_t iMaxInputFrame = 0; + float iMaxFrameRate = 0; + for (int32_t iDid = 0; iDid <= iMaxDid; iDid++) { + EVideoFrameType eFrameType = videoFrameTypeSkip; + int32_t kiCurrentFrameSize = 0; + for (int32_t iLayerNum = 0; iLayerNum < pBsInfo->iLayerNum; iLayerNum++) { + pLayerInfo = &pBsInfo->sLayerInfo[iLayerNum]; + if ((pLayerInfo->uiLayerType == VIDEO_CODING_LAYER) && (pLayerInfo->uiSpatialId == iDid)) { + eFrameType = pLayerInfo->eFrameType; + for (int32_t iNalIdx = 0; iNalIdx < pLayerInfo->iNalCount; iNalIdx++) { + kiCurrentFrameSize += pLayerInfo->pNalLengthInByte[iNalIdx]; + } + } + } + SEncoderStatistics* pStatistics = & (m_pEncContext->sEncoderStatistics[iDid]); + SSpatialLayerInternal* pSpatialLayerInternalParam = & (m_pEncContext->pSvcParam->sDependencyLayers[iDid]); + + if ((0 != pStatistics->uiWidth && 0 != pStatistics->uiHeight) + && (pStatistics->uiWidth != (unsigned int) pSpatialLayerInternalParam->iActualWidth + || pStatistics->uiHeight != (unsigned int) pSpatialLayerInternalParam->iActualHeight)) { + pStatistics->uiResolutionChangeTimes ++; + } + pStatistics->uiWidth = pSpatialLayerInternalParam->iActualWidth; + pStatistics->uiHeight = pSpatialLayerInternalParam->iActualHeight; + + const bool kbCurrentFrameSkipped = (videoFrameTypeSkip == eFrameType); + pStatistics->uiInputFrameCount ++; + pStatistics->uiSkippedFrameCount += (kbCurrentFrameSkipped ? 1 : 0); + iMaxInputFrame = WELS_MAX (pStatistics->uiInputFrameCount, iMaxInputFrame); + int32_t iProcessedFrameCount = pStatistics->uiInputFrameCount - pStatistics->uiSkippedFrameCount; + if (!kbCurrentFrameSkipped && iProcessedFrameCount != 0) { + pStatistics->fAverageFrameSpeedInMs += (kiCurrentFrameMs - pStatistics->fAverageFrameSpeedInMs) / iProcessedFrameCount; + } + // rate control related + if (0 != m_pEncContext->uiStartTimestamp) { + if (kiCurrentFrameTs > m_pEncContext->uiStartTimestamp + 800) { + pStatistics->fAverageFrameRate = (static_cast (pStatistics->uiInputFrameCount) * 1000 / + (kiCurrentFrameTs - m_pEncContext->uiStartTimestamp)); + } + } else { + m_pEncContext->uiStartTimestamp = kiCurrentFrameTs; + } + iMaxFrameRate = WELS_MAX (iMaxFrameRate, pStatistics->fAverageFrameRate); + //pStatistics->fLatestFrameRate = m_pEncContext->pWelsSvcRc->fLatestFrameRate; //TODO: finish the calculation in RC + //pStatistics->uiBitRate = m_pEncContext->pWelsSvcRc->iActualBitRate; //TODO: finish the calculation in RC + pStatistics->uiAverageFrameQP = m_pEncContext->pWelsSvcRc[iDid].iAverageFrameQp; + + if (videoFrameTypeIDR == eFrameType || videoFrameTypeI == eFrameType) { + pStatistics->uiIDRSentNum ++; + } + if (m_pEncContext->pLtr->bLTRMarkingFlag) { + pStatistics->uiLTRSentNum ++; + } + + pStatistics->iTotalEncodedBytes += kiCurrentFrameSize; + + const int32_t kiDeltaFrames = static_cast (pStatistics->uiInputFrameCount - + pStatistics->iLastStatisticsFrameCount); + if (kiDeltaFrames > (m_pEncContext->pSvcParam->fMaxFrameRate * 2)) { + if (kiTimeDiff >= m_pEncContext->iStatisticsLogInterval) { + float fTimeDiffSec = kiTimeDiff / 1000.0f; + pStatistics->fLatestFrameRate = static_cast ((pStatistics->uiInputFrameCount - + pStatistics->iLastStatisticsFrameCount) / fTimeDiffSec); + pStatistics->uiBitRate = static_cast ((pStatistics->iTotalEncodedBytes) * 8 / fTimeDiffSec); + + if (WELS_ABS (pStatistics->fLatestFrameRate - m_pEncContext->pSvcParam->fMaxFrameRate) > 30) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "Actual input fLatestFrameRate = %f is quite different from framerate in setting %f, please check setting or timestamp unit (ms), cur_Ts = %" + PRId64 " start_Ts = %" PRId64, + pStatistics->fLatestFrameRate, m_pEncContext->pSvcParam->fMaxFrameRate, kiCurrentFrameTs, + static_cast (m_pEncContext->iLastStatisticsLogTs)); + } + + if (m_pEncContext->pSvcParam->iRCMode == RC_QUALITY_MODE || m_pEncContext->pSvcParam->iRCMode == RC_BITRATE_MODE) { + if ((pStatistics->fLatestFrameRate > 0) + && WELS_ABS (m_pEncContext->pSvcParam->fMaxFrameRate - pStatistics->fLatestFrameRate) > 5) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "Actual input framerate %f is different from framerate in setting %f, suggest to use other rate control modes", + pStatistics->fLatestFrameRate, m_pEncContext->pSvcParam->fMaxFrameRate); + } + } + // update variables + pStatistics->iLastStatisticsBytes = pStatistics->iTotalEncodedBytes; + pStatistics->iLastStatisticsFrameCount = pStatistics->uiInputFrameCount; + m_pEncContext->iLastStatisticsLogTs = kiCurrentFrameTs; + LogStatistics (kiCurrentFrameTs, iMaxDid); + pStatistics->iTotalEncodedBytes = 0; + //TODO: the following statistics will be calculated and added later + //pStatistics->uiLTRSentNum + + } + } + } + +} + +/************************************************************************ +* InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,.. +************************************************************************/ +int CWelsH264SVCEncoder::SetOption (ENCODER_OPTION eOptionId, void* pOption) { + if (NULL == pOption) { + return cmInitParaError; + } + + if ((NULL == m_pEncContext || false == m_bInitialFlag) && eOptionId != ENCODER_OPTION_TRACE_LEVEL + && eOptionId != ENCODER_OPTION_TRACE_CALLBACK && eOptionId != ENCODER_OPTION_TRACE_CALLBACK_CONTEXT) { + return cmInitExpected; + } + + switch (eOptionId) { + case ENCODER_OPTION_INTER_SPATIAL_PRED: { // Inter spatial layer prediction flag + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "ENCODER_OPTION_INTER_SPATIAL_PRED, this feature not supported at present."); + } + break; + case ENCODER_OPTION_DATAFORMAT: { // Input color space + int32_t iValue = * ((int32_t*)pOption); + int32_t iColorspace = iValue; + if (iColorspace == 0) { + return cmInitParaError; + } + + m_iCspInternal = iColorspace; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_DATAFORMAT, m_iCspInternal = 0x%x", m_iCspInternal); + } + break; + case ENCODER_OPTION_IDR_INTERVAL: { // IDR Interval + int32_t iValue = * ((int32_t*)pOption); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_IDR_INTERVAL iValue = %d", iValue); + if (iValue <= -1) { + iValue = 0; + } + if (iValue == (int32_t)m_pEncContext->pSvcParam->uiIntraPeriod) { + return cmResultSuccess; + } + m_pEncContext->pSvcParam->uiIntraPeriod = (uint32_t)iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_IDR_INTERVAL uiIntraPeriod updated to %d", + m_pEncContext->pSvcParam->uiIntraPeriod); + } + break; + case ENCODER_OPTION_SVC_ENCODE_PARAM_BASE: { // SVC Encoding Parameter + SEncParamBase sEncodingParam; + SWelsSvcCodingParam sConfig; + int32_t iTargetWidth = 0; + int32_t iTargetHeight = 0; + + memcpy (&sEncodingParam, pOption, sizeof (SEncParamBase)); // confirmed_safe_unsafe_usage +#ifdef OUTPUT_BIT_STREAM + if ((sEncodingParam.iPicWidth != m_pEncContext->pSvcParam->sDependencyLayers[m_pEncContext->pSvcParam->iSpatialLayerNum + - 1].iActualWidth) || + (sEncodingParam.iPicHeight != m_pEncContext->pSvcParam->sDependencyLayers[m_pEncContext->pSvcParam->iSpatialLayerNum - + 1].iActualHeight)) { + ++m_iSwitchTimes; + m_bSwitch = true; + } +#endif//OUTPUT_BIT_STREAM + if (sConfig.ParamBaseTranscode (sEncodingParam)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_BASE, ParamTranscode failed!"); + return cmInitParaError; + } + /* New configuration available here */ + iTargetWidth = sConfig.iPicWidth; + iTargetHeight = sConfig.iPicHeight; + if (m_iMaxPicWidth != iTargetWidth + || m_iMaxPicHeight != iTargetHeight) { + m_iMaxPicWidth = iTargetWidth; + m_iMaxPicHeight = iTargetHeight; + } + if (sConfig.DetermineTemporalSettings()) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_BASE, DetermineTemporalSettings failed!"); + return cmInitParaError; + } + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_BASE iUsageType = %d,iPicWidth= %d;iPicHeight= %d;iTargetBitrate= %d;fMaxFrameRate= %.6ff;iRCMode= %d", + sEncodingParam.iUsageType, + sEncodingParam.iPicWidth, + sEncodingParam.iPicHeight, + sEncodingParam.iTargetBitrate, + sEncodingParam.fMaxFrameRate, + sEncodingParam.iRCMode); + if (WelsEncoderParamAdjust (&m_pEncContext, &sConfig)) { + return cmInitParaError; + } + + //LogStatistics + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_BASE, LogStatisticsBeforeNewEncoding"); + LogStatistics (m_pEncContext->iLastStatisticsLogTs, 0); + } + break; + + case ENCODER_OPTION_SVC_ENCODE_PARAM_EXT: { // SVC Encoding Parameter + SEncParamExt sEncodingParam; + SWelsSvcCodingParam sConfig; + int32_t iTargetWidth = 0; + int32_t iTargetHeight = 0; + + memcpy (&sEncodingParam, pOption, sizeof (SEncParamExt)); // confirmed_safe_unsafe_usage + TraceParamInfo (&sEncodingParam); +#ifdef OUTPUT_BIT_STREAM + if ((sEncodingParam.sSpatialLayers[sEncodingParam.iSpatialLayerNum - 1].iVideoWidth != + m_pEncContext->pSvcParam->sDependencyLayers[m_pEncContext->pSvcParam->iSpatialLayerNum - 1].iActualWidth) || + (sEncodingParam.sSpatialLayers[sEncodingParam.iSpatialLayerNum - 1].iVideoHeight != + m_pEncContext->pSvcParam->sDependencyLayers[m_pEncContext->pSvcParam->iSpatialLayerNum - 1].iActualHeight)) { + ++ m_iSwitchTimes; + m_bSwitch = true; + } +#endif//OUTPUT_BIT_STREAM + if (sEncodingParam.iSpatialLayerNum < 1 + || sEncodingParam.iSpatialLayerNum > MAX_SPATIAL_LAYER_NUM) { // verify number of spatial layer + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, iSpatialLayerNum(%d) failed!", + sEncodingParam.iSpatialLayerNum); + return cmInitParaError; + } + + if (sConfig.ParamTranscode (sEncodingParam)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, ParamTranscode failed!"); + return cmInitParaError; + } + if (sConfig.iSpatialLayerNum < 1) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, iSpatialLayerNum(%d) failed!", + sConfig.iSpatialLayerNum); + return cmInitParaError; + } + if (sConfig.DetermineTemporalSettings()) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, DetermineTemporalSettings failed!"); + return cmInitParaError; + } + + /* New configuration available here */ + iTargetWidth = sConfig.iPicWidth; + iTargetHeight = sConfig.iPicHeight; + if (m_iMaxPicWidth != iTargetWidth + || m_iMaxPicHeight != iTargetHeight) { + m_iMaxPicWidth = iTargetWidth; + m_iMaxPicHeight = iTargetHeight; + } + /* Check every field whether there is new request for memory block changed or else, Oct. 24, 2008 */ + if (WelsEncoderParamAdjust (&m_pEncContext, &sConfig)) { + return cmInitParaError; + } + + //LogStatistics + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, LogStatisticsBeforeNewEncoding"); + LogStatistics (m_pEncContext->iLastStatisticsLogTs, sEncodingParam.iSpatialLayerNum - 1); + } + break; + case ENCODER_OPTION_FRAME_RATE: { // Maximal input frame rate + float iValue = * ((float*)pOption); + if (iValue <= 0) { + return cmInitParaError; + } + //adjust to valid range + m_pEncContext->pSvcParam->fMaxFrameRate = WELS_CLIP3 (iValue, MIN_FRAME_RATE, MAX_FRAME_RATE); + WelsEncoderApplyFrameRate (m_pEncContext->pSvcParam); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_FRAME_RATE,m_pEncContext->pSvcParam->fMaxFrameRate= %f", + m_pEncContext->pSvcParam->fMaxFrameRate); + } + break; + case ENCODER_OPTION_BITRATE: { // Target bit-rate + SBitrateInfo* pInfo = (static_cast (pOption)); + int32_t iBitrate = pInfo->iBitrate; + if (iBitrate <= 0) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_BITRATE,iBitrate = %d", + iBitrate); + return cmInitParaError; + } + iBitrate = WELS_CLIP3 (iBitrate, MIN_BIT_RATE, MAX_BIT_RATE); + switch (pInfo->iLayer) { + case SPATIAL_LAYER_ALL: + m_pEncContext->pSvcParam->iTargetBitrate = iBitrate; + break; + case SPATIAL_LAYER_0: + m_pEncContext->pSvcParam->sSpatialLayers[0].iSpatialBitrate = iBitrate; + break; + case SPATIAL_LAYER_1: + m_pEncContext->pSvcParam->sSpatialLayers[1].iSpatialBitrate = iBitrate; + break; + case SPATIAL_LAYER_2: + m_pEncContext->pSvcParam->sSpatialLayers[2].iSpatialBitrate = iBitrate; + break; + case SPATIAL_LAYER_3: + m_pEncContext->pSvcParam->sSpatialLayers[3].iSpatialBitrate = iBitrate; + break; + default: + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_BITRATE,iLayer = %d", + pInfo->iLayer); + return cmInitParaError; + break; + } + //adjust to valid range + if (WelsEncoderApplyBitRate (&m_pWelsTrace->m_sLogCtx, m_pEncContext->pSvcParam, pInfo->iLayer)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_BITRATE layerId= %d,iSpatialBitrate = %d", pInfo->iLayer, iBitrate); + return cmInitParaError; + } else { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_BITRATE layerId= %d,iSpatialBitrate = %d", pInfo->iLayer, iBitrate); + + } + + } + break; + case ENCODER_OPTION_MAX_BITRATE: { // Target bit-rate + SBitrateInfo* pInfo = (static_cast (pOption)); + int32_t iBitrate = pInfo->iBitrate; + if (iBitrate <= 0) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_MAX_BITRATE,iBitrate = %d", + iBitrate); + return cmInitParaError; + } + iBitrate = WELS_CLIP3 (iBitrate, MIN_BIT_RATE, MAX_BIT_RATE); + switch (pInfo->iLayer) { + case SPATIAL_LAYER_ALL: + m_pEncContext->pSvcParam->iMaxBitrate = iBitrate; + break; + case SPATIAL_LAYER_0: + m_pEncContext->pSvcParam->sSpatialLayers[0].iMaxSpatialBitrate = iBitrate; + break; + case SPATIAL_LAYER_1: + m_pEncContext->pSvcParam->sSpatialLayers[1].iMaxSpatialBitrate = iBitrate; + break; + case SPATIAL_LAYER_2: + m_pEncContext->pSvcParam->sSpatialLayers[2].iMaxSpatialBitrate = iBitrate; + break; + case SPATIAL_LAYER_3: + m_pEncContext->pSvcParam->sSpatialLayers[3].iMaxSpatialBitrate = iBitrate; + break; + default: + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_MAX_BITRATE,iLayer = %d", + pInfo->iLayer); + return cmInitParaError; + break; + } + //adjust to valid range + if (WelsEncoderApplyBitRate (&m_pWelsTrace->m_sLogCtx, m_pEncContext->pSvcParam, pInfo->iLayer)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_BITRATE layerId= %d,iMaxSpatialBitrate = %d", pInfo->iLayer, iBitrate); + return cmInitParaError; + } else { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_BITRATE layerId= %d,iMaxSpatialBitrate = %d", pInfo->iLayer, iBitrate); + + } + } + break; + case ENCODER_OPTION_RC_MODE: { // 0:quality mode;1:bit-rate mode;2:bitrate limited mode + int32_t iValue = * ((int32_t*)pOption); + m_pEncContext->pSvcParam->iRCMode = (RC_MODES) iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_RC_MODE iRCMode= %d (Note: not suggest changing RC-mode in middle of encoding)", + iValue); + WelsRcInitFuncPointers (m_pEncContext, m_pEncContext->pSvcParam->iRCMode); + } + break; + case ENCODER_OPTION_RC_FRAME_SKIP: { // 0:FRAME-SKIP disabled;1:FRAME-SKIP enabled + bool bValue = * ((bool*)pOption); + if (m_pEncContext->pSvcParam->iRCMode != RC_OFF_MODE) { + m_pEncContext->pSvcParam->bEnableFrameSkip = bValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_RC_FRAME_SKIP, frame-skip setting(%d)", + bValue); + } else { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_RC_FRAME_SKIP, rc off, frame-skip setting(%d) un-useful", + bValue); + } + } + break; + case ENCODER_PADDING_PADDING: { // 0:disable padding;1:padding + int32_t iValue = * ((int32_t*)pOption); + m_pEncContext->pSvcParam->iPaddingFlag = iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_PADDING_PADDING iPaddingFlag= %d ", + iValue); + } + break; + case ENCODER_LTR_RECOVERY_REQUEST: { + SLTRRecoverRequest* pLTR_Recover_Request = (SLTRRecoverRequest*) (pOption); + FilterLTRRecoveryRequest (m_pEncContext, pLTR_Recover_Request); + } + break; + case ENCODER_LTR_MARKING_FEEDBACK: { + SLTRMarkingFeedback* fb = (SLTRMarkingFeedback*) (pOption); + FilterLTRMarkingFeedback (m_pEncContext, fb); + } + break; + case ENCODER_LTR_MARKING_PERIOD: { + uint32_t iValue = * ((uint32_t*) (pOption)); + m_pEncContext->pSvcParam->iLtrMarkPeriod = iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_LTR_MARKING_PERIOD iLtrMarkPeriod= %d ", + iValue); + } + break; + case ENCODER_OPTION_LTR: { + SLTRConfig* pLTRValue = ((SLTRConfig*) (pOption)); + if (WelsEncoderApplyLTR (&m_pWelsTrace->m_sLogCtx, &m_pEncContext, pLTRValue)) { + return cmInitParaError; + } + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_LTR,expected bEnableLongTermReference = %d,expeced iLTRRefNum = %d,actual bEnableLongTermReference = %d,actual iLTRRefNum = %d", + pLTRValue->bEnableLongTermReference, pLTRValue->iLTRRefNum, m_pEncContext->pSvcParam->bEnableLongTermReference, + m_pEncContext->pSvcParam->iLTRRefNum); + } + break; + case ENCODER_OPTION_ENABLE_SSEI: { + bool iValue = * ((bool*)pOption); + m_pEncContext->pSvcParam->bEnableSSEI = iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + " CWelsH264SVCEncoder::SetOption enable SSEI = %d -- this is not supported yet", + m_pEncContext->pSvcParam->bEnableSSEI); + } + break; + case ENCODER_OPTION_ENABLE_PREFIX_NAL_ADDING: { + bool iValue = * ((bool*)pOption); + m_pEncContext->pSvcParam->bPrefixNalAddingCtrl = iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, " CWelsH264SVCEncoder::SetOption bPrefixNalAddingCtrl = %d ", + m_pEncContext->pSvcParam->bPrefixNalAddingCtrl); + } + break; + case ENCODER_OPTION_SPS_PPS_ID_STRATEGY: { + int32_t iValue = * (static_cast (pOption)); + EParameterSetStrategy eNewStrategy = CONSTANT_ID; + switch (iValue) { + case 0: + eNewStrategy = CONSTANT_ID; + break; + case 0x01: + eNewStrategy = INCREASING_ID; + break; + case 0x02: + eNewStrategy = SPS_LISTING; + break; + case 0x03: + eNewStrategy = SPS_LISTING_AND_PPS_INCREASING; + break; + case 0x06: + eNewStrategy = SPS_PPS_LISTING; + break; + default: + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + " CWelsH264SVCEncoder::SetOption eSpsPpsIdStrategy(%d) not in valid range, unchanged! existing=%d", + iValue, m_pEncContext->pSvcParam->eSpsPpsIdStrategy); + break; + } + + if (((eNewStrategy & SPS_LISTING) || (m_pEncContext->pSvcParam->eSpsPpsIdStrategy & SPS_LISTING)) + && m_pEncContext->pSvcParam->eSpsPpsIdStrategy != eNewStrategy) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + " CWelsH264SVCEncoder::SetOption eSpsPpsIdStrategy changing in the middle of call is NOT allowed for eSpsPpsIdStrategy>INCREASING_ID: existing setting is %d and the new one is %d", + m_pEncContext->pSvcParam->eSpsPpsIdStrategy, iValue); + return cmInitParaError; + } + SWelsSvcCodingParam sConfig; + memcpy (&sConfig, m_pEncContext->pSvcParam, sizeof (SWelsSvcCodingParam)); + sConfig.eSpsPpsIdStrategy = eNewStrategy; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, " CWelsH264SVCEncoder::SetOption eSpsPpsIdStrategy = %d ", + sConfig.eSpsPpsIdStrategy); + + if (WelsEncoderParamAdjust (&m_pEncContext, &sConfig)) { + return cmInitParaError; + } + } + break; + case ENCODER_OPTION_CURRENT_PATH: { + if (m_pEncContext->pSvcParam != NULL) { + char* path = static_cast (pOption); + m_pEncContext->pSvcParam->pCurPath = path; + } + } + break; + case ENCODER_OPTION_DUMP_FILE: { +#ifdef ENABLE_FRAME_DUMP + if (m_pEncContext->pSvcParam != NULL) { + SDumpLayer* pDump = (static_cast (pOption)); + WelsStrncpy (m_pEncContext->pSvcParam->sDependencyLayers[pDump->iLayer].sRecFileName, + sizeof (m_pEncContext->pSvcParam->sDependencyLayers[pDump->iLayer].sRecFileName), pDump->pFileName); + } +#endif + } + break; + case ENCODER_OPTION_TRACE_LEVEL: { + if (m_pWelsTrace) { + uint32_t level = * ((uint32_t*)pOption); + m_pWelsTrace->SetTraceLevel (level); + } + } + break; + case ENCODER_OPTION_TRACE_CALLBACK: { + if (m_pWelsTrace) { + WelsTraceCallback callback = * ((WelsTraceCallback*)pOption); + m_pWelsTrace->SetTraceCallback (callback); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_TRACE_CALLBACK callback = %p.", + callback); + } + } + break; + case ENCODER_OPTION_TRACE_CALLBACK_CONTEXT: { + if (m_pWelsTrace) { + void* ctx = * ((void**)pOption); + m_pWelsTrace->SetTraceCallbackContext (ctx); + } + } + break; + case ENCODER_OPTION_PROFILE: { + SProfileInfo* pProfileInfo = (static_cast (pOption)); + if ((pProfileInfo->iLayer < SPATIAL_LAYER_0) || (pProfileInfo->iLayer > SPATIAL_LAYER_3)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_PROFILE,iLayer = %d(rang0-3)", pProfileInfo->iLayer); + return cmInitParaError; + } + CheckProfileSetting (&m_pWelsTrace->m_sLogCtx, m_pEncContext->pSvcParam, pProfileInfo->iLayer, + pProfileInfo->uiProfileIdc); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_PROFILE,layerId = %d,expected profile = %d,actual profile = %d", + pProfileInfo->iLayer, pProfileInfo->uiProfileIdc, + m_pEncContext->pSvcParam->sSpatialLayers[pProfileInfo->iLayer].uiProfileIdc); + } + break; + case ENCODER_OPTION_LEVEL: { + SLevelInfo* pLevelInfo = (static_cast (pOption)); + if ((pLevelInfo->iLayer < SPATIAL_LAYER_0) || (pLevelInfo->iLayer > SPATIAL_LAYER_3)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_PROFILE,iLayer = %d(rang0-3)", pLevelInfo->iLayer); + return cmInitParaError; + } + CheckLevelSetting (&m_pWelsTrace->m_sLogCtx, m_pEncContext->pSvcParam, pLevelInfo->iLayer, pLevelInfo->uiLevelIdc); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_LEVEL,layerId = %d,expected level = %d,actual level = %d", + pLevelInfo->iLayer, pLevelInfo->uiLevelIdc, m_pEncContext->pSvcParam->sSpatialLayers[pLevelInfo->iLayer].uiLevelIdc); + } + break; + case ENCODER_OPTION_NUMBER_REF: { + int32_t iValue = * ((int32_t*)pOption); + CheckReferenceNumSetting (&m_pWelsTrace->m_sLogCtx, m_pEncContext->pSvcParam, iValue); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_NUMBER_REF,expected refNum = %d,actual refnum = %d", iValue, + m_pEncContext->pSvcParam->iNumRefFrame); + } + break; + case ENCODER_OPTION_DELIVERY_STATUS: { + SDeliveryStatus* pValue = (static_cast (pOption)); + m_pEncContext->bDeliveryFlag = pValue->bDeliveryFlag; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_DEBUG, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_DELIVERY_STATUS,bDeliveryFlag = %d", pValue->bDeliveryFlag); + } + break; + case ENCODER_OPTION_COMPLEXITY: { + int32_t iValue = * (static_cast (pOption)); + m_pEncContext->pSvcParam->iComplexityMode = (ECOMPLEXITY_MODE)iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_COMPLEXITY,iComplexityMode = %d", iValue); + } + break; + case ENCODER_OPTION_GET_STATISTICS: { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_GET_STATISTICS: this option is get-only!"); + } + break; + case ENCODER_OPTION_STATISTICS_LOG_INTERVAL: { + int32_t iValue = * (static_cast (pOption)); + m_pEncContext->iStatisticsLogInterval = iValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_STATISTICS_LOG_INTERVAL,iStatisticsLogInterval = %d", iValue); + } + break; + case ENCODER_OPTION_IS_LOSSLESS_LINK: { + bool bValue = * (static_cast (pOption)); + m_pEncContext->pSvcParam->bIsLosslessLink = bValue; + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_IS_LOSSLESS_LINK,bIsLosslessLink = %d", bValue); + } + break; + case ENCODER_OPTION_BITS_VARY_PERCENTAGE: { + int32_t iValue = * (static_cast (pOption)); + m_pEncContext->pSvcParam->iBitsVaryPercentage = WELS_CLIP3 (iValue, 0, 100); + WelsEncoderApplyBitVaryRang (&m_pWelsTrace->m_sLogCtx, m_pEncContext->pSvcParam, + m_pEncContext->pSvcParam->iBitsVaryPercentage); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::SetOption():ENCODER_OPTION_BITS_VARY_PERCENTAGE,iBitsVaryPercentage = %d", iValue); + } + break; + + default: + return cmInitParaError; + } + + return 0; +} + +int CWelsH264SVCEncoder::GetOption (ENCODER_OPTION eOptionId, void* pOption) { + if (NULL == pOption) { + return cmInitParaError; + } + if (NULL == m_pEncContext || false == m_bInitialFlag) { + return cmInitExpected; + } + + switch (eOptionId) { + case ENCODER_OPTION_INTER_SPATIAL_PRED: { // Inter spatial layer prediction flag + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "ENCODER_OPTION_INTER_SPATIAL_PRED, this feature not supported at present."); + } + break; + case ENCODER_OPTION_DATAFORMAT: { // Input color space + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::GetOption():ENCODER_OPTION_DATAFORMAT, m_iCspInternal= 0x%x", m_iCspInternal); + * ((int32_t*)pOption) = m_iCspInternal; + } + break; + case ENCODER_OPTION_IDR_INTERVAL: { // IDR Interval + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::GetOption():ENCODER_OPTION_IDR_INTERVAL, uiIntraPeriod= %d", + m_pEncContext->pSvcParam->uiIntraPeriod); + * ((int32_t*)pOption) = m_pEncContext->pSvcParam->uiIntraPeriod; + } + break; + case ENCODER_OPTION_SVC_ENCODE_PARAM_EXT: { // SVC Encoding Parameter + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::GetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_EXT"); + memcpy (pOption, m_pEncContext->pSvcParam, sizeof (SEncParamExt)); // confirmed_safe_unsafe_usage + } + break; + case ENCODER_OPTION_SVC_ENCODE_PARAM_BASE: { // SVC Encoding Parameter + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::GetOption():ENCODER_OPTION_SVC_ENCODE_PARAM_BASE"); + m_pEncContext->pSvcParam->GetBaseParams ((SEncParamBase*) pOption); + } + break; + + case ENCODER_OPTION_FRAME_RATE: { // Maximal input frame rate + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::GetOption():ENCODER_OPTION_FRAME_RATE, fMaxFrameRate = %.6ff", + m_pEncContext->pSvcParam->fMaxFrameRate); + * ((float*)pOption) = m_pEncContext->pSvcParam->fMaxFrameRate; + } + break; + case ENCODER_OPTION_BITRATE: { // Target bit-rate + + SBitrateInfo* pInfo = (static_cast (pOption)); + if ((pInfo->iLayer != SPATIAL_LAYER_ALL) && (pInfo->iLayer != SPATIAL_LAYER_0) && (pInfo->iLayer != SPATIAL_LAYER_1) + && (pInfo->iLayer != SPATIAL_LAYER_2) && (pInfo->iLayer != SPATIAL_LAYER_3)) + return cmInitParaError; + if (pInfo->iLayer == SPATIAL_LAYER_ALL) { + pInfo->iBitrate = m_pEncContext->pSvcParam->iTargetBitrate; + } else { + pInfo->iBitrate = m_pEncContext->pSvcParam->sSpatialLayers[pInfo->iLayer].iSpatialBitrate; + } + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::GetOption():ENCODER_OPTION_BITRATE, layerId =%d,iBitrate = %d", + pInfo->iLayer, pInfo->iBitrate); + } + break; + case ENCODER_OPTION_MAX_BITRATE: { // Target bit-rate + SBitrateInfo* pInfo = (static_cast (pOption)); + if ((pInfo->iLayer != SPATIAL_LAYER_ALL) && (pInfo->iLayer != SPATIAL_LAYER_0) && (pInfo->iLayer != SPATIAL_LAYER_1) + && (pInfo->iLayer != SPATIAL_LAYER_2) && (pInfo->iLayer != SPATIAL_LAYER_3)) + return cmInitParaError; + if (pInfo->iLayer == SPATIAL_LAYER_ALL) { + pInfo->iBitrate = m_pEncContext->pSvcParam->iMaxBitrate; + } else { + pInfo->iBitrate = m_pEncContext->pSvcParam->sSpatialLayers[pInfo->iLayer].iMaxSpatialBitrate; + } + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsH264SVCEncoder::GetOption():ENCODER_OPTION_MAX_BITRATE,, layerId =%d,iBitrate = %d", + pInfo->iLayer, pInfo->iBitrate); + } + break; + case ENCODER_OPTION_GET_STATISTICS: { + SEncoderStatistics* pStatistics = (static_cast (pOption)); + SEncoderStatistics* pEncStatistics = &m_pEncContext->sEncoderStatistics[m_pEncContext->pSvcParam->iSpatialLayerNum - 1]; + pStatistics->uiWidth = pEncStatistics->uiWidth; + pStatistics->uiHeight = pEncStatistics->uiHeight; + pStatistics->fAverageFrameSpeedInMs = pEncStatistics->fAverageFrameSpeedInMs; + + // rate control related + pStatistics->fAverageFrameRate = pEncStatistics->fAverageFrameRate; + pStatistics->fLatestFrameRate = pEncStatistics->fLatestFrameRate; + pStatistics->uiBitRate = pEncStatistics->uiBitRate; + + pStatistics->uiInputFrameCount = pEncStatistics->uiInputFrameCount; + pStatistics->uiSkippedFrameCount = pEncStatistics->uiSkippedFrameCount; + + pStatistics->uiResolutionChangeTimes = pEncStatistics->uiResolutionChangeTimes; + pStatistics->uiIDRReqNum = pEncStatistics->uiIDRReqNum; + pStatistics->uiIDRSentNum = pEncStatistics->uiIDRSentNum; + pStatistics->uiLTRSentNum = pEncStatistics->uiLTRSentNum; + } + break; + case ENCODER_OPTION_STATISTICS_LOG_INTERVAL: { + * ((int32_t*)pOption) = m_pEncContext->iStatisticsLogInterval; + } + break; + case ENCODER_OPTION_COMPLEXITY: { + * ((int32_t*)pOption) = m_pEncContext->pSvcParam->iComplexityMode; + } + break; + default: + return cmInitParaError; + } + + return 0; +} + +void CWelsH264SVCEncoder::DumpSrcPicture (const SSourcePicture* pSrcPic, const int iUsageType) { +#ifdef DUMP_SRC_PICTURE + FILE* pFile = NULL; + char strFileName[256] = {0}; + const int32_t iDataLength = m_iMaxPicWidth * m_iMaxPicHeight; + + WelsSnprintf (strFileName, sizeof (strFileName), "pic_in_%dx%d.yuv", m_iMaxPicWidth, + m_iMaxPicHeight);// confirmed_safe_unsafe_usage + + switch (pSrcPic->iColorFormat) { + case videoFormatI420: + case videoFormatYV12: + pFile = WelsFopen (strFileName, "ab+"); + + if (NULL != pFile) { + fwrite (pSrcPic->pData[0], sizeof (uint8_t), pSrcPic->iStride[0]*m_iMaxPicHeight, pFile); + fwrite (pSrcPic->pData[1], sizeof (uint8_t), pSrcPic->iStride[1] * (m_iMaxPicHeight >> 1), pFile); + fwrite (pSrcPic->pData[2], sizeof (uint8_t), pSrcPic->iStride[2] * (m_iMaxPicHeight >> 1), pFile); + fflush (pFile); + fclose (pFile); + } else { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "DumpSrcPicture, strFileName %s open failed!", strFileName); + } + break; + case videoFormatRGB: + WelsStrcat (strFileName, 256, ".rgb"); // confirmed_safe_unsafe_usage + pFile = WelsFopen (strFileName, "ab+"); + if (NULL != pFile) { + fwrite (pSrcPic->pData[0], sizeof (uint8_t), iDataLength * 3, pFile); + fflush (pFile); + fclose (pFile); + } + case videoFormatBGR: + WelsStrcat (strFileName, 256, ".bgr"); // confirmed_safe_unsafe_usage + pFile = WelsFopen (strFileName, "ab+"); + if (NULL != pFile) { + fwrite (pSrcPic->pData[0], sizeof (uint8_t), iDataLength * 3, pFile); + fflush (pFile); + fclose (pFile); + } + break; + case videoFormatYUY2: + WelsStrcat (strFileName, 256, ".yuy2"); // confirmed_safe_unsafe_usage + pFile = WelsFopen (strFileName, "ab+"); + if (NULL != pFile) { + fwrite (pSrcPic->pData[0], sizeof (uint8_t), (CALC_BI_STRIDE (m_iMaxPicWidth, 16)) * m_iMaxPicHeight, pFile); + fflush (pFile); + fclose (pFile); + } + break; + default: + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "Exclusive case, m_iCspInternal= 0x%x", m_iCspInternal); + break; + } +#endif//DUMP_SRC_PICTURE + return; +} +} + +using namespace WelsEnc; + +int32_t WelsCreateSVCEncoder (ISVCEncoder** ppEncoder) { + if ((*ppEncoder = new CWelsH264SVCEncoder()) != NULL) { + return 0; + } + + return 1; +} + +void WelsDestroySVCEncoder (ISVCEncoder* pEncoder) { + CWelsH264SVCEncoder* pSVCEncoder = (CWelsH264SVCEncoder*)pEncoder; + + if (pSVCEncoder) { + delete pSVCEncoder; + pSVCEncoder = NULL; + } +} + +OpenH264Version WelsGetCodecVersion() { + return g_stCodecVersion; +} + +void WelsGetCodecVersionEx (OpenH264Version* pVersion) { + *pVersion = g_stCodecVersion; +} +////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/wels_enc_export.def b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/wels_enc_export.def new file mode 100644 index 000000000..e44d1d7e3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/encoder/plus/src/wels_enc_export.def @@ -0,0 +1,5 @@ +EXPORTS + WelsCreateSVCEncoder + WelsDestroySVCEncoder + WelsGetCodecVersion + WelsGetCodecVersionEx diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/interface/IWelsVP.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/interface/IWelsVP.h new file mode 100644 index 000000000..8a787941b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/interface/IWelsVP.h @@ -0,0 +1,310 @@ +/*! + * \copy + * Copyright (c) 2004-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file : IWelsVP.h + * + * \brief : Interface of wels video processor class + * + * \date : 2011/01/04 + * + * \description : 1. should support both C/C++ style interface + * 2. should concern with the feature extension requirement + * 3. should care the usage of "char"==> + * 1) value char : signed char/unsigned char + * 2) string char : char + * + ************************************************************************************* + */ + +#ifndef IWELSVP_H_ +#define IWELSVP_H_ + +#define WELSVP_MAJOR_VERSION 1 +#define WELSVP_MINOR_VERSION 1 +#define WELSVP_VERSION ((WELSVP_MAJOR_VERSION << 8) + WELSVP_MINOR_VERSION) + +typedef enum { + RET_SUCCESS = 0, + RET_FAILED = -1, + RET_INVALIDPARAM = -2, + RET_OUTOFMEMORY = -3, + RET_NOTSUPPORTED = -4, + RET_UNEXPECTED = -5, + RET_NEEDREINIT = -6 +} EResult; + +typedef enum { + VIDEO_FORMAT_NULL = 0, /* invalid format */ + /*rgb color formats*/ + VIDEO_FORMAT_RGB = 1, /* rgb 24bits */ + VIDEO_FORMAT_RGBA = 2, /* rgba */ + VIDEO_FORMAT_RGB555 = 3, /* rgb555 */ + VIDEO_FORMAT_RGB565 = 4, /* rgb565 */ + VIDEO_FORMAT_BGR = 5, /* bgr 24bits */ + VIDEO_FORMAT_BGRA = 6, /* bgr 32bits */ + VIDEO_FORMAT_ABGR = 7, /* abgr */ + VIDEO_FORMAT_ARGB = 8, /* argb */ + + /*yuv color formats*/ + VIDEO_FORMAT_YUY2 = 20, /* yuy2 */ + VIDEO_FORMAT_YVYU = 21, /* yvyu */ + VIDEO_FORMAT_UYVY = 22, /* uyvy */ + VIDEO_FORMAT_I420 = 23, /* yuv 4:2:0 planar */ + VIDEO_FORMAT_YV12 = 24, /* yuv 4:2:0 planar */ + VIDEO_FORMAT_INTERNAL = 25, /* Only Used for SVC decoder testbed */ + VIDEO_FORMAT_NV12 = 26, /* y planar + uv packed */ + VIDEO_FORMAT_I422 = 27, /* yuv 4:2:2 planar */ + VIDEO_FORMAT_I444 = 28, /* yuv 4:4:4 planar */ + VIDEO_FORMAT_YUYV = 20, /* yuv 4:2:2 packed */ + + VIDEO_FORMAT_RGB24 = 1, + VIDEO_FORMAT_RGB32 = 2, + VIDEO_FORMAT_RGB24_INV = 5, + VIDEO_FORMAT_RGB32_INV = 6, + VIDEO_FORMAT_RGB555_INV = 7, + VIDEO_FORMAT_RGB565_INV = 8, + VIDEO_FORMAT_YUV2 = 21, + VIDEO_FORMAT_420 = 23, + + VIDEO_FORMAT_VFlip = 0x80000000 +} EVideoFormat; + +typedef enum { + BUFFER_HOSTMEM = 0, + BUFFER_SURFACE +} EPixMapBufferProperty; + +typedef struct { + int iRectTop; + int iRectLeft; + int iRectWidth; + int iRectHeight; +} SRect; + +typedef struct { + void* pPixel[3]; + int iSizeInBits; + int iStride[3]; + SRect sRect; + EVideoFormat eFormat; + EPixMapBufferProperty eProperty;//not use? to remove? but how about the size of SPixMap? +} SPixMap; + +typedef enum { + METHOD_NULL = 0, + METHOD_COLORSPACE_CONVERT ,//not support yet + METHOD_DENOISE , + METHOD_SCENE_CHANGE_DETECTION_VIDEO , + METHOD_SCENE_CHANGE_DETECTION_SCREEN , + METHOD_DOWNSAMPLE , + METHOD_VAA_STATISTICS , + METHOD_BACKGROUND_DETECTION , + METHOD_ADAPTIVE_QUANT , + METHOD_COMPLEXITY_ANALYSIS , + METHOD_COMPLEXITY_ANALYSIS_SCREEN, + METHOD_IMAGE_ROTATE , + METHOD_SCROLL_DETECTION, + METHOD_MASK +} EMethods; + +//-----------------------------------------------------------------// +// Algorithm parameters define +//-----------------------------------------------------------------// + +typedef enum { + SIMILAR_SCENE, //similar scene + MEDIUM_CHANGED_SCENE, //medium changed scene + LARGE_CHANGED_SCENE //large changed scene +} ESceneChangeIdc; + +typedef enum { + NO_STATIC, // motion block + COLLOCATED_STATIC, // collocated static block + SCROLLED_STATIC, // scrolled static block + BLOCK_STATIC_IDC_ALL +} EStaticBlockIdc; + +typedef struct { + SRect sMaskRect; + bool bMaskInfoAvailable; + int iScrollMvX; + int iScrollMvY; + bool bScrollDetectFlag; // 0:false ; 1:ltr; 2: scene change +} SScrollDetectionParam; + +typedef struct { + ESceneChangeIdc eSceneChangeIdc; // SIMILAR_SCENE, MEDIUM_CHANGED_SCENE, LARGE_CHANGED_SCENE + int iMotionBlockNum; // Number of motion blocks + long long iFrameComplexity; // frame complexity + unsigned char* pStaticBlockIdc; // static block idc + SScrollDetectionParam sScrollResult; //results from scroll detection +} SSceneChangeResult; + +typedef struct { + unsigned char* pCurY; // Y data of current frame + unsigned char* pRefY; // Y data of pRef frame for diff calc + int (*pSad8x8)[4]; // sad of 8x8, every 4 in the same 16x16 get together + int* pSsd16x16; // sum of square difference of 16x16 + int* pSum16x16; // sum of 16x16 + int* pSumOfSquare16x16; // sum of square of 16x16 + int (*pSumOfDiff8x8)[4]; + unsigned char (*pMad8x8)[4]; + int iFrameSad; // sad of frame +} SVAACalcResult; + +typedef struct { + int iCalcVar; + int iCalcBgd; + int iCalcSsd; + int iReserved; + SVAACalcResult* pCalcResult; +} SVAACalcParam; + +typedef struct { + signed char* pBackgroundMbFlag; + SVAACalcResult* pCalcRes; +} SBGDInterface; + +typedef enum { + AQ_QUALITY_MODE, //Quality mode + AQ_BITRATE_MODE //Bitrate mode +} EAQModes; + +typedef struct { + unsigned short uiMotionIndex; + unsigned short uiTextureIndex; +} SMotionTextureUnit; + +typedef struct { + int iAdaptiveQuantMode; // 0:quality mode, 1:bitrates mode + SVAACalcResult* pCalcResult; + SMotionTextureUnit* pMotionTextureUnit; + + signed char* pMotionTextureIndexToDeltaQp; + int iAverMotionTextureIndexToDeltaQp; // *AQ_STEP_INT_MULTIPLY +} SAdaptiveQuantizationParam; + +typedef enum { + FRAME_SAD = 0, + GOM_SAD = -1, + GOM_VAR = -2 +} EComplexityAnalysisMode; + +typedef struct { + int iComplexityAnalysisMode; + int iCalcBgd; + int iMbNumInGom; + long long iFrameComplexity; + int* pGomComplexity; + int* pGomForegroundBlockNum; + signed char* pBackgroundMbFlag; + unsigned int* uiRefMbType; + SVAACalcResult* pCalcResult; +} SComplexityAnalysisParam; + +typedef struct { + int iMbRowInGom; + int* pGomComplexity; + int iGomNumInFrame; + long long iFrameComplexity; //255*255(MaxMbSAD)*36864(MaxFS) make the highest bit of 32-bit integer 1 + int iIdrFlag; + SScrollDetectionParam sScrollResult; +} SComplexityAnalysisScreenParam; +///////////////////////////////////////////////////////////////////////////////////////////// + +typedef struct { + void* pCtx; + EResult (*Init) (void* pCtx, int iType, void* pCfg); + EResult (*Uninit) (void* pCtx, int iType); + EResult (*Flush) (void* pCtx, int iType); + EResult (*Process) (void* pCtx, int iType, SPixMap* pSrc, SPixMap* dst); + EResult (*Get) (void* pCtx, int iType, void* pParam); + EResult (*Set) (void* pCtx, int iType, void* pParam); + EResult (*SpecialFeature) (void* pCtx, int iType, void* pIn, void* pOut); +} IWelsVPc; + +#if defined(__cplusplus) && !defined(CINTERFACE) /* C++ style interface */ + +class IWelsVP { + public: + virtual ~IWelsVP() {} + + public: + virtual EResult Init (int iType, void* pCfg) = 0; + virtual EResult Uninit (int iType) = 0; + virtual EResult Flush (int iType) = 0; + virtual EResult Process (int iType, SPixMap* pSrc, SPixMap* dst) = 0; + virtual EResult Get (int iType, void* pParam) = 0; + virtual EResult Set (int iType, void* pParam) = 0; + virtual EResult SpecialFeature (int iType, void* pIn, void* pOut) = 0; +}; + +/* Recommend to invoke the interface via the micro for convenient */ +#define IWelsVPFunc_Init(p, a, b) (p)->Init(a, b) +#define IWelsVPFunc_Uninit(p, a) (p)->Uninit(a) +#define IWelsVPFunc_Flush(p, a) (p)->Flush(a) +#define IWelsVPFunc_Process(p, a, b, c) (p)->Process(a, b, c) +#define IWelsVPFunc_Get(p, a, b) (p)->Get(a, b) +#define IWelsVPFunc_Set(p, a, b) (p)->Set(a, b) +#define IWelsVPFunc_SpecialFeature(p, a, b, c) (p)->SpecialFeature(a, b, c) + +/* C++ interface version */ +#define WELSVP_INTERFACE_VERION (0x8000 + (WELSVP_VERSION & 0x7fff)) +#define WELSVP_EXTERNC_BEGIN extern "C" { +#define WELSVP_EXTERNC_END } + +#else /* C style interface */ + +/* Recommend to invoke the interface via the micro for convenient */ +#define IWelsVPFunc_Init(p, a, b) (p)->Init(p->h, a, b) +#define IWelsVPFunc_Uninit(p, a) (p)->Uninit(p->h, a) +#define IWelsVPFunc_Flush(p, a) (p)->Flush(p->h, a) +#define IWelsVPFunc_Process(p, a, b, c) (p)->Process(p->h, a, b, c) +#define IWelsVPFunc_Get(p, a, b) (p)->Get(p->h, a, b) +#define IWelsVPFunc_Set(p, a, b) (p)->Set(p->h, a, b) +#define IWelsVPFunc_SpecialFeature(p, a, b, c) (p)->SpecialFeature(p->h, a, b, c) + +/* C interface version */ +#define WELSVP_INTERFACE_VERION (0x0001 + (WELSVP_VERSION & 0x7fff)) +#define WELSVP_EXTERNC_BEGIN +#define WELSVP_EXTERNC_END + +#endif + +WELSVP_EXTERNC_BEGIN +EResult WelsCreateVpInterface (void** ppCtx, int iVersion /*= WELSVP_INTERFACE_VERION*/); +EResult WelsDestroyVpInterface (void* pCtx , int iVersion /*= WELSVP_INTERFACE_VERION*/); +WELSVP_EXTERNC_END + +////////////////////////////////////////////////////////////////////////////////////////////// +#endif // IWELSVP_H_ + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp new file mode 100644 index 000000000..7deefa618 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp @@ -0,0 +1,270 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +#include "AdaptiveQuantization.h" +#include "macros.h" +WELSVP_NAMESPACE_BEGIN + + + +#define AVERAGE_TIME_MOTION (3000) //0.3046875 // 1/4 + 1/16 - 1/128 ~ 0.3 *AQ_TIME_INT_MULTIPLY +#define AVERAGE_TIME_TEXTURE_QUALITYMODE (10000) //0.5 // 1/2 *AQ_TIME_INT_MULTIPLY +#define AVERAGE_TIME_TEXTURE_BITRATEMODE (8750) //0.5 // 1/2 *AQ_TIME_INT_MULTIPLY +#define MODEL_ALPHA (9910) //1.5 //1.1102 *AQ_TIME_INT_MULTIPLY +#define MODEL_TIME (58185) //9.0 //5.9842 *AQ_TIME_INT_MULTIPLY + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +CAdaptiveQuantization::CAdaptiveQuantization (int32_t iCpuFlag) { + m_CPUFlag = iCpuFlag; + m_eMethod = METHOD_ADAPTIVE_QUANT; + m_pfVar = NULL; + WelsMemset (&m_sAdaptiveQuantParam, 0, sizeof (m_sAdaptiveQuantParam)); + WelsInitVarFunc (m_pfVar, m_CPUFlag); +} + +CAdaptiveQuantization::~CAdaptiveQuantization() { +} + +EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + EResult eReturn = RET_INVALIDPARAM; + + int32_t iWidth = pSrcPixMap->sRect.iRectWidth; + int32_t iHeight = pSrcPixMap->sRect.iRectHeight; + int32_t iMbWidth = iWidth >> 4; + int32_t iMbHeight = iHeight >> 4; + int32_t iMbTotalNum = iMbWidth * iMbHeight; + + SMotionTextureUnit* pMotionTexture = NULL; + SVAACalcResult* pVaaCalcResults = NULL; + int32_t iMotionTextureIndexToDeltaQp = 0; + int32_t iAverMotionTextureIndexToDeltaQp = 0; // double to uint32 + int64_t iAverageMotionIndex = 0; // double to float + int64_t iAverageTextureIndex = 0; + + int64_t iQStep = 0; + int64_t iLumaMotionDeltaQp = 0; + int64_t iLumaTextureDeltaQp = 0; + + uint8_t* pRefFrameY = NULL, *pCurFrameY = NULL; + int32_t iRefStride = 0, iCurStride = 0; + + uint8_t* pRefFrameTmp = NULL, *pCurFrameTmp = NULL; + int32_t i = 0, j = 0; + + pRefFrameY = (uint8_t*)pRefPixMap->pPixel[0]; + pCurFrameY = (uint8_t*)pSrcPixMap->pPixel[0]; + + iRefStride = pRefPixMap->iStride[0]; + iCurStride = pSrcPixMap->iStride[0]; + + /////////////////////////////////////// motion ////////////////////////////////// + // motion MB residual variance + iAverageMotionIndex = 0; + iAverageTextureIndex = 0; + pMotionTexture = m_sAdaptiveQuantParam.pMotionTextureUnit; + pVaaCalcResults = m_sAdaptiveQuantParam.pCalcResult; + + if (pVaaCalcResults->pRefY == pRefFrameY && pVaaCalcResults->pCurY == pCurFrameY) { + int32_t iMbIndex = 0; + int32_t iSumDiff, iSQDiff, uiSum, iSQSum; + for (j = 0; j < iMbHeight; j ++) { + pRefFrameTmp = pRefFrameY; + pCurFrameTmp = pCurFrameY; + for (i = 0; i < iMbWidth; i++) { + iSumDiff = pVaaCalcResults->pSad8x8[iMbIndex][0]; + iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][1]; + iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][2]; + iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][3]; + + iSQDiff = pVaaCalcResults->pSsd16x16[iMbIndex]; + uiSum = pVaaCalcResults->pSum16x16[iMbIndex]; + iSQSum = pVaaCalcResults->pSumOfSquare16x16[iMbIndex]; + + iSumDiff = iSumDiff >> 8; + pMotionTexture->uiMotionIndex = (iSQDiff >> 8) - (iSumDiff * iSumDiff); + + uiSum = uiSum >> 8; + pMotionTexture->uiTextureIndex = (iSQSum >> 8) - (uiSum * uiSum); + + iAverageMotionIndex += pMotionTexture->uiMotionIndex; + iAverageTextureIndex += pMotionTexture->uiTextureIndex; + pMotionTexture++; + ++iMbIndex; + pRefFrameTmp += MB_WIDTH_LUMA; + pCurFrameTmp += MB_WIDTH_LUMA; + } + pRefFrameY += (iRefStride) << 4; + pCurFrameY += (iCurStride) << 4; + } + } else { + for (j = 0; j < iMbHeight; j ++) { + pRefFrameTmp = pRefFrameY; + pCurFrameTmp = pCurFrameY; + for (i = 0; i < iMbWidth; i++) { + m_pfVar (pRefFrameTmp, iRefStride, pCurFrameTmp, iCurStride, pMotionTexture); + iAverageMotionIndex += pMotionTexture->uiMotionIndex; + iAverageTextureIndex += pMotionTexture->uiTextureIndex; + pMotionTexture++; + pRefFrameTmp += MB_WIDTH_LUMA; + pCurFrameTmp += MB_WIDTH_LUMA; + + } + pRefFrameY += (iRefStride) << 4; + pCurFrameY += (iCurStride) << 4; + } + } + iAverageMotionIndex = WELS_DIV_ROUND64 (iAverageMotionIndex * AQ_INT_MULTIPLY, iMbTotalNum); + iAverageTextureIndex = WELS_DIV_ROUND64 (iAverageTextureIndex * AQ_INT_MULTIPLY, iMbTotalNum); + if ((iAverageMotionIndex <= AQ_PESN) && (iAverageMotionIndex >= -AQ_PESN)) { + iAverageMotionIndex = AQ_INT_MULTIPLY; + } + if ((iAverageTextureIndex <= AQ_PESN) && (iAverageTextureIndex >= -AQ_PESN)) { + iAverageTextureIndex = AQ_INT_MULTIPLY; + } + // motion mb residual map to QP + // texture mb original map to QP + iAverMotionTextureIndexToDeltaQp = 0; + iAverageMotionIndex = WELS_DIV_ROUND64 (AVERAGE_TIME_MOTION * iAverageMotionIndex, AQ_TIME_INT_MULTIPLY); + + if (m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_QUALITY_MODE) { + iAverageTextureIndex = WELS_DIV_ROUND64 (AVERAGE_TIME_TEXTURE_QUALITYMODE * iAverageTextureIndex, AQ_TIME_INT_MULTIPLY); + } else { + iAverageTextureIndex = WELS_DIV_ROUND64 (AVERAGE_TIME_TEXTURE_BITRATEMODE * iAverageTextureIndex, AQ_TIME_INT_MULTIPLY); + } + + int64_t iAQ_EPSN = - ((int64_t)AQ_PESN * AQ_TIME_INT_MULTIPLY * AQ_QSTEP_INT_MULTIPLY / AQ_INT_MULTIPLY); + pMotionTexture = m_sAdaptiveQuantParam.pMotionTextureUnit; + for (j = 0; j < iMbHeight; j ++) { + for (i = 0; i < iMbWidth; i++) { + int64_t a = WELS_DIV_ROUND64 ((int64_t) (pMotionTexture->uiTextureIndex) * AQ_INT_MULTIPLY * AQ_TIME_INT_MULTIPLY, + iAverageTextureIndex); + iQStep = WELS_DIV_ROUND64 ((a - AQ_TIME_INT_MULTIPLY) * AQ_QSTEP_INT_MULTIPLY, (a + MODEL_ALPHA)); + iLumaTextureDeltaQp = MODEL_TIME * iQStep;// range +- 6 + + iMotionTextureIndexToDeltaQp = ((int32_t) (iLumaTextureDeltaQp / (AQ_TIME_INT_MULTIPLY))); + + a = WELS_DIV_ROUND64 (((int64_t)pMotionTexture->uiMotionIndex) * AQ_INT_MULTIPLY * AQ_TIME_INT_MULTIPLY, + iAverageMotionIndex); + iQStep = WELS_DIV_ROUND64 ((a - AQ_TIME_INT_MULTIPLY) * AQ_QSTEP_INT_MULTIPLY, (a + MODEL_ALPHA)); + iLumaMotionDeltaQp = MODEL_TIME * iQStep;// range +- 6 + + if ((m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_QUALITY_MODE && iLumaMotionDeltaQp < iAQ_EPSN) + || (m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_BITRATE_MODE)) { + iMotionTextureIndexToDeltaQp += ((int32_t) (iLumaMotionDeltaQp / (AQ_TIME_INT_MULTIPLY))); + } + + m_sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[j * iMbWidth + i] = (int8_t) (iMotionTextureIndexToDeltaQp / + AQ_QSTEP_INT_MULTIPLY); + iAverMotionTextureIndexToDeltaQp += iMotionTextureIndexToDeltaQp; + pMotionTexture++; + } + } + + m_sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp = iAverMotionTextureIndexToDeltaQp / iMbTotalNum; + + eReturn = RET_SUCCESS; + + return eReturn; +} + + + +EResult CAdaptiveQuantization::Set (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + + m_sAdaptiveQuantParam = * (SAdaptiveQuantizationParam*)pParam; + + return RET_SUCCESS; +} + +EResult CAdaptiveQuantization::Get (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + + SAdaptiveQuantizationParam* sAdaptiveQuantParam = (SAdaptiveQuantizationParam*)pParam; + + sAdaptiveQuantParam->iAverMotionTextureIndexToDeltaQp = m_sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp; + + return RET_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////////////////////// + +void CAdaptiveQuantization::WelsInitVarFunc (PVarFunc& pfVar, int32_t iCpuFlag) { + pfVar = SampleVariance16x16_c; + +#ifdef X86_ASM + if (iCpuFlag & WELS_CPU_SSE2) { + pfVar = SampleVariance16x16_sse2; + } +#endif +#ifdef HAVE_NEON + if (iCpuFlag & WELS_CPU_NEON) { + pfVar = SampleVariance16x16_neon; + } +#endif +#ifdef HAVE_NEON_AARCH64 + if (iCpuFlag & WELS_CPU_NEON) { + pfVar = SampleVariance16x16_AArch64_neon; + } +#endif +} + +void SampleVariance16x16_c (uint8_t* pRefY, int32_t iRefStride, uint8_t* pSrcY, int32_t iSrcStride, + SMotionTextureUnit* pMotionTexture) { + uint32_t uiCurSquare = 0, uiSquare = 0; + uint16_t uiCurSum = 0, uiSum = 0; + + for (int32_t y = 0; y < MB_WIDTH_LUMA; y++) { + for (int32_t x = 0; x < MB_WIDTH_LUMA; x++) { + uint32_t uiDiff = WELS_ABS (pRefY[x] - pSrcY[x]); + uiSum += uiDiff; + uiSquare += uiDiff * uiDiff; + + uiCurSum += pSrcY[x]; + uiCurSquare += pSrcY[x] * pSrcY[x]; + } + pRefY += iRefStride; + pSrcY += iSrcStride; + } + + uiSum = uiSum >> 8; + pMotionTexture->uiMotionIndex = (uiSquare >> 8) - (uiSum * uiSum); + + uiCurSum = uiCurSum >> 8; + pMotionTexture->uiTextureIndex = (uiCurSquare >> 8) - (uiCurSum * uiCurSum); +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/adaptivequantization/AdaptiveQuantization.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/adaptivequantization/AdaptiveQuantization.h new file mode 100644 index 000000000..4d04a09c2 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/adaptivequantization/AdaptiveQuantization.h @@ -0,0 +1,97 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : AdaptiveQuantization.h + * + * \brief : adaptive quantization class of wels video processor class + * + * \date : 2011/03/21 + * + * \description : 1. rewrite the package code of scene change detection class + * + */ + +#ifndef WELSVP_ADAPTIVEQUANTIZATION_H +#define WELSVP_ADAPTIVEQUANTIZATION_H + +#include "util.h" +#include "memory.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" +#include "cpu.h" + +WELSVP_NAMESPACE_BEGIN + +typedef void (VarFunc) (uint8_t* pRefY, int32_t iRefStrideY, uint8_t* pSrc, int32_t iSrcStrideY, + SMotionTextureUnit* pMotionTexture); + +typedef VarFunc* PVarFunc; + +VarFunc SampleVariance16x16_c; + +#ifdef X86_ASM +WELSVP_EXTERN_C_BEGIN +VarFunc SampleVariance16x16_sse2; +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_NEON +WELSVP_EXTERN_C_BEGIN +VarFunc SampleVariance16x16_neon; +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_NEON_AARCH64 +WELSVP_EXTERN_C_BEGIN +VarFunc SampleVariance16x16_AArch64_neon; +WELSVP_EXTERN_C_END +#endif + +class CAdaptiveQuantization : public IStrategy { + public: + CAdaptiveQuantization (int32_t iCpuFlag); + ~CAdaptiveQuantization(); + + EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* pRef); + EResult Set (int32_t iType, void* pParam); + EResult Get (int32_t iType, void* pParam); + + private: + void WelsInitVarFunc (PVarFunc& pfVar, int32_t iCpuFlag); + + private: + PVarFunc m_pfVar; + int32_t m_CPUFlag; + SAdaptiveQuantizationParam m_sAdaptiveQuantParam; +}; + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/adaptive_quantization.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/adaptive_quantization.S new file mode 100644 index 000000000..da5b9075f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/adaptive_quantization.S @@ -0,0 +1,110 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +.macro SQR_ADD_16BYTES arg0, arg1, arg2 + vmull.u8 q3, \arg0, \arg0 + vmull.u8 q8, \arg1, \arg1 + vpadal.u16 \arg2, q3 + vpadal.u16 \arg2, q8 +.endm + + +WELS_ASM_FUNC_BEGIN SampleVariance16x16_neon + stmdb sp!, {r4} + + vld1.8 {q15}, [r0], r1 //save the ref data (16bytes) + vld1.8 {q14}, [r2], r3 //save the src data (16bytes) + + + vabd.u8 q13, q14, q15 + vmull.u8 q12, d27, d27 + vmull.u8 q11, d26, d26 + vaddl.u16 q12, d24, d25 + vpadal.u16 q12, q11 //sqr + + vaddl.u8 q13, d26, d27 //sum + + vaddl.u8 q10, d28, d29 //sum_cur + + vmull.u8 q9, d29, d29 + vmull.u8 q8, d28, d28 + vaddl.u16 q9, d18, d19 //sqr_cur + vpadal.u16 q9, q8 + + mov r4, #15 +pixel_var_16x16_loop0: + + vld1.8 {q0}, [r0], r1 //save the ref data (16bytes) + vld1.8 {q1}, [r2], r3 //save the src data (16bytes) + + vabd.u8 q2, q0, q1 + + //q10 save sum_cur + vpadal.u8 q10, q1 + + //q12 save sqr + SQR_ADD_16BYTES d4, d5, q12 + + //q13 save sum + vpadal.u8 q13, q2 + + subs r4, #1 + + //q9 save sqr_cur + SQR_ADD_16BYTES d2, d3, q9 + + bne pixel_var_16x16_loop0 + + vadd.u16 d0, d26, d27 //sum + vadd.u16 d1, d20, d21 //sum_cur + vpaddl.u16 q0, q0 + vadd.u32 d2, d24, d25 //sqr + vadd.u32 d3, d18, d19 //sqr_cur + vpadd.u32 d0, d0, d1 + vpadd.u32 d1, d2, d3 + + ldr r4, [sp, #4] + + vshr.u32 q0, q0, #8 + vmul.u32 d0, d0 + vsub.u32 d0, d1, d0 + vmovl.u32 q0, d0 + vst2.16 {d0[0], d1[0]}, [r4] + + ldmia sp!, {r4} + +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/down_sample_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/down_sample_neon.S new file mode 100644 index 000000000..53a66a8c9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/down_sample_neon.S @@ -0,0 +1,443 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + + +WELS_ASM_FUNC_BEGIN DyadicBilinearDownsampler_neon + stmdb sp!, {r4-r8, lr} + + //Get the width and height + ldr r4, [sp, #24] //src_width + ldr r5, [sp, #28] //src_height + + //Initialize the register + mov r6, r2 + mov r8, r0 + mov lr, #0 + lsr r5, #1 + + //Save the tailer for the unasigned size + mla r7, r1, r5, r0 + vld1.32 {q15}, [r7] + + add r7, r2, r3 + //processing a colume data +comp_ds_bilinear_loop0: + + vld1.8 {q0,q1}, [r2]! + vld1.8 {q2,q3}, [r7]! + vuzp.8 q0, q1 + vuzp.8 q2, q3 + vrhadd.u8 q0, q0, q1 + vrhadd.u8 q2, q2, q3 + vrhadd.u8 q0, q0, q2 + vst1.32 {q0}, [r0]! + add lr, #32 + + cmp lr, r4 + movcs lr, #0 + addcs r6, r6, r3, lsl #1 + movcs r2, r6 + addcs r7, r2, r3 + addcs r8, r1 + movcs r0, r8 + subscs r5, #1 + bne comp_ds_bilinear_loop0 + + //restore the tailer for the unasigned size + vst1.32 {q15}, [r0] + + ldmia sp!, {r4-r8,lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN comp_ds_bilinear_w_x8_neon + stmdb sp!, {r4-r7, lr} + + //Get the width and height + ldr r4, [sp, #20] //src_width + ldr r5, [sp, #24] //src_height + + //Get the difference + sub lr, r3, r4 + sub r1, r1, r4, lsr #1 + + lsr r5, #1 + + //processing a colume data +comp_ds_bilinear_w_x8_loop0: + + lsr r6, r4, #3 + add r7, r2, r3 + //processing a line data +comp_ds_bilinear_w_x8_loop1: + + vld1.8 {d0}, [r2]! + vld1.8 {d1}, [r7]! + vpaddl.u8 q0, q0 + vrshr.u16 q0, #1 + vrhadd.u16 d0, d1 + + vmovn.u16 d0, q0 + vst1.32 {d0[0]}, [r0]! + subs r6, #1 + bne comp_ds_bilinear_w_x8_loop1 + + add r2, r7, lr + add r0, r1 + subs r5, #1 + bne comp_ds_bilinear_w_x8_loop0 + + ldmia sp!, {r4-r7,lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN comp_ds_bilinear_w_x16_neon + stmdb sp!, {r4-r7, lr} + + //Get the width and height + ldr r4, [sp, #20] //src_width + ldr r5, [sp, #24] //src_height + + //Get the difference + sub lr, r3, r4 + sub r1, r1, r4, lsr #1 + + lsr r5, #1 + + //processing a colume data +comp_ds_bilinear_w_x16_loop0: + + lsr r6, r4, #4 + add r7, r2, r3 + //processing a line data +comp_ds_bilinear_w_x16_loop1: + + vld1.8 {q0}, [r2]! + vld1.8 {q1}, [r7]! + vpaddl.u8 q0, q0 + vpaddl.u8 q1, q1 + vrshr.u16 q0, #1 + vrshr.u16 q1, #1 + vrhadd.u16 q0, q1 + + vmovn.u16 d0, q0 + vst1.32 {d0}, [r0]! + subs r6, #1 + bne comp_ds_bilinear_w_x16_loop1 + + add r2, r7, lr + add r0, r1 + subs r5, #1 + bne comp_ds_bilinear_w_x16_loop0 + + ldmia sp!, {r4-r7,lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN DyadicBilinearDownsamplerWidthx32_neon + stmdb sp!, {r4-r7, lr} + + //Get the width and height + ldr r4, [sp, #20] //src_width + ldr r5, [sp, #24] //src_height + + //Get the difference + sub lr, r3, r4 + sub r1, r1, r4, lsr #1 + + lsr r5, #1 + + //processing a colume data +comp_ds_bilinear_w_x32_loop0: + + lsr r6, r4, #5 + add r7, r2, r3 + //processing a line data +comp_ds_bilinear_w_x32_loop1: + + vld1.8 {q0,q1}, [r2]! + vld1.8 {q2,q3}, [r7]! + vuzp.8 q0, q1 + vuzp.8 q2, q3 + vrhadd.u8 q0, q0, q1 + vrhadd.u8 q2, q2, q3 + vrhadd.u8 q0, q0, q2 + vst1.32 {q0}, [r0]! + subs r6, #1 + bne comp_ds_bilinear_w_x32_loop1 + + add r2, r7, lr + add r0, r1 + subs r5, #1 + bne comp_ds_bilinear_w_x32_loop0 + + ldmia sp!, {r4-r7,lr} +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN GeneralBilinearAccurateDownsampler_neon + stmdb sp!, {r4-r12, lr} + + //Get the data from stack + ldr r4, [sp, #40] //the addr of src + ldr r5, [sp, #44] //the value of src_stride + ldr r6, [sp, #48] //the value of scaleX + ldr r7, [sp, #52] //the value of scaleY + + mov r10, #32768 + sub r10, #1 + and r8, r6, r10 // r8 uinc(scaleX mod 32767) + mov r11, #-1 + mul r11, r8 // r11 -uinc + + vdup.s16 d2, r8 + vdup.s16 d0, r11 + vzip.s16 d0, d2 // uinc -uinc uinc -uinc + + and r9, r7, r10 // r9 vinc(scaleY mod 32767) + mov r11, #-1 + mul r11, r9 // r11 -vinc + + vdup.s16 d2, r9 + vdup.s16 d3, r11 + vext.8 d5, d3, d2, #4 // vinc vinc -vinc -vinc + + mov r11, #0x40000000 + mov r12, #0x4000 + sub r12, #1 + add r11, r12 + vdup.s32 d1, r11; //init u 16384 16383 16384 16383 + + mov r11, #16384 + vdup.s16 d16, r11 + sub r11, #1 + vdup.s16 d17, r11 + vext.8 d7, d17, d16, #4 //init v 16384 16384 16383 16383 + + veor q14, q14 + sub r1, r2 // stride - width + mov r8, #16384 // yInverse + sub r3, #1 + +_HEIGHT: + ldr r4, [sp, #40] //the addr of src + mov r11, r8 + lsr r11, #15 + mul r11, r5 + add r11, r4 // get current row address + mov r12, r11 + add r12, r5 + + mov r9, #16384 // xInverse + sub r10, r2, #1 + vmov.s16 d6, d1 + +_WIDTH: + mov lr, r9 + lsr lr, #15 + add r4, r11,lr + vld2.8 {d28[0],d29[0]}, [r4] //q14: 0000000b0000000a; + add r4, r12,lr + vld2.8 {d28[4],d29[4]}, [r4] //q14: 000d000b000c000a; + vzip.32 d28, d29 //q14: 000d000c000b000a; + + vmull.u16 q13, d6, d7 //q13: init u * init v + vmull.u32 q12, d26,d28 + vmlal.u32 q12, d27,d29 + vqadd.u64 d24, d24,d25 + vrshr.u64 d24, #30 + + vst1.8 {d24[0]}, [r0]! + add r9, r6 + vadd.u16 d6, d0 // inc u + vshl.u16 d6, #1 + vshr.u16 d6, #1 + subs r10, #1 + bne _WIDTH + +WIDTH_END: + lsr r9, #15 + add r4,r11,r9 + vld1.8 {d24[0]}, [r4] + vst1.8 {d24[0]}, [r0] + add r0, #1 + add r8, r7 + add r0, r1 + vadd.s16 d7, d5 // inc v + vshl.u16 d7, #1 + vshr.u16 d7, #1 + subs r3, #1 + bne _HEIGHT + +LAST_ROW: + ldr r4, [sp, #40] //the addr of src + lsr r8, #15 + mul r8, r5 + add r4, r8 // get current row address + mov r9, #16384 + +_LAST_ROW_WIDTH: + mov r11, r9 + lsr r11, #15 + + add r3, r4,r11 + vld1.8 {d0[0]}, [r3] + vst1.8 {d0[0]}, [r0] + add r0, #1 + add r9, r6 + subs r2, #1 + bne _LAST_ROW_WIDTH + + ldmia sp!, {r4-r12, lr} +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN DyadicBilinearOneThirdDownsampler_neon + stmdb sp!, {r4-r8, lr} + + //Get the width and height + ldr r4, [sp, #24] //src_width + ldr r5, [sp, #28] //src_height + + //Initialize the register + mov r6, r2 + mov r8, r0 + mov lr, #0 + + //Save the tailer for the un-aligned size + mla r7, r1, r5, r0 + vld1.32 {q15}, [r7] + + add r7, r2, r3 + //processing a colume data +comp_ds_bilinear_onethird_loop0: + + vld3.8 {d0, d1, d2}, [r2]! + vld3.8 {d3, d4, d5}, [r2]! + vld3.8 {d16, d17, d18}, [r7]! + vld3.8 {d19, d20, d21}, [r7]! + + vaddl.u8 q11, d0, d1 + vaddl.u8 q12, d3, d4 + vaddl.u8 q13, d16, d17 + vaddl.u8 q14, d19, d20 + vrshr.u16 q11, #1 + vrshr.u16 q12, #1 + vrshr.u16 q13, #1 + vrshr.u16 q14, #1 + + vrhadd.u16 q11, q13 + vrhadd.u16 q12, q14 + + vmovn.u16 d0, q11 + vmovn.u16 d1, q12 + vst1.8 {q0}, [r0]! + + add lr, #48 + cmp lr, r4 + movcs lr, #0 + addcs r6, r6, r3, lsl #1 + addcs r6, r6, r3 + movcs r2, r6 + addcs r7, r2, r3 + addcs r8, r1 + movcs r0, r8 + subscs r5, #1 + bne comp_ds_bilinear_onethird_loop0 + + //restore the tailer for the un-aligned size + vst1.32 {q15}, [r0] + + ldmia sp!, {r4-r8,lr} +WELS_ASM_FUNC_END + +WELS_ASM_FUNC_BEGIN DyadicBilinearQuarterDownsampler_neon + stmdb sp!, {r4-r8, lr} + + //Get the width and height + ldr r4, [sp, #24] //src_width + ldr r5, [sp, #28] //src_height + + //Initialize the register + mov r6, r2 + mov r8, r0 + mov lr, #0 + lsr r5, #2 + + //Save the tailer for the un-aligned size + mla r7, r1, r5, r0 + vld1.32 {q15}, [r7] + + add r7, r2, r3 + //processing a colume data +comp_ds_bilinear_quarter_loop0: + + vld2.16 {q0, q1}, [r2]! + vld2.16 {q2, q3}, [r2]! + vld2.16 {q8, q9}, [r7]! + vld2.16 {q10, q11}, [r7]! + + vpaddl.u8 q0, q0 + vpaddl.u8 q2, q2 + vpaddl.u8 q8, q8 + vpaddl.u8 q10, q10 + vrshr.u16 q0, #1 + vrshr.u16 q2, #1 + vrshr.u16 q8, #1 + vrshr.u16 q10, #1 + + vrhadd.u16 q0, q8 + vrhadd.u16 q2, q10 + vmovn.u16 d0, q0 + vmovn.u16 d1, q2 + vst1.8 {q0}, [r0]! + + add lr, #64 + cmp lr, r4 + movcs lr, #0 + addcs r6, r6, r3, lsl #2 + movcs r2, r6 + addcs r7, r2, r3 + addcs r8, r1 + movcs r0, r8 + subscs r5, #1 + bne comp_ds_bilinear_quarter_loop0 + + //restore the tailer for the un-aligned size + vst1.32 {q15}, [r0] + + ldmia sp!, {r4-r8,lr} +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/pixel_sad_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/pixel_sad_neon.S new file mode 100644 index 000000000..b1fc4fa37 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/pixel_sad_neon.S @@ -0,0 +1,67 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + + +WELS_ASM_FUNC_BEGIN WelsProcessingSampleSad8x8_neon + stmdb sp!, {lr} + //Loading a horizontal line data (8 bytes) + vld1.8 {d0}, [r0], r1 + vld1.8 {d1}, [r2], r3 + + //Do the SAD for 8 bytes + vabdl.u8 q1, d0, d1 + + mov lr, #7 +pixel_sad_8x8_loop0: + + //Loading a horizontal line data (8 bytes) + vld1.8 {d0}, [r0], r1 + vld1.8 {d1}, [r2], r3 + + subs lr, #1 + + //Do the SAD for 8 bytes + vabal.u8 q1, d0, d1 + bne pixel_sad_8x8_loop0 + + vadd.u16 d2, d3 + vpaddl.u16 d2, d2 + vpaddl.u32 d2, d2 + vmov.u32 r0, d2[0]//TBO... + + ldmia sp!, {lr} +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/vaa_calc_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/vaa_calc_neon.S new file mode 100644 index 000000000..af32a75e7 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm/vaa_calc_neon.S @@ -0,0 +1,755 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON +#include "arm_arch_common_macro.S" + +.macro ABS_SUB_SUM_16BYTES arg0, arg1, arg2, arg3, arg4 + vld1.32 {q15}, [\arg0], \arg2 + vld1.32 {q14}, [\arg1], \arg2 + vabal.u8 \arg3, d30, d28 + vabal.u8 \arg4, d31, d29 +.endm + +.macro ABS_SUB_SUM_8x16BYTES arg0, arg1, arg2, arg3, arg4 + vld1.32 {q15}, [\arg0], \arg2 + vld1.32 {q14}, [\arg1], \arg2 + vabdl.u8 \arg3, d30, d28 + vabdl.u8 \arg4, d31, d29 + + ABS_SUB_SUM_16BYTES \arg0, \arg1, \arg2, \arg3, \arg4 + ABS_SUB_SUM_16BYTES \arg0, \arg1, \arg2, \arg3, \arg4 + ABS_SUB_SUM_16BYTES \arg0, \arg1, \arg2, \arg3, \arg4 + ABS_SUB_SUM_16BYTES \arg0, \arg1, \arg2, \arg3, \arg4 + ABS_SUB_SUM_16BYTES \arg0, \arg1, \arg2, \arg3, \arg4 + ABS_SUB_SUM_16BYTES \arg0, \arg1, \arg2, \arg3, \arg4 + ABS_SUB_SUM_16BYTES \arg0, \arg1, \arg2, \arg3, \arg4 +.endm + +.macro SAD_8X16BITS arg0, arg1, arg2 + vadd.u16 d31, \arg0, \arg1 + vpaddl.u16 d31, d31 + vpaddl.u32 \arg2, d31 +.endm + + +WELS_ASM_FUNC_BEGIN VAACalcSad_neon + + stmdb sp!, {r4-r8} + + ldr r4, [sp, #20] //load pic_stride + ldr r5, [sp, #28] //load psad8x8 + + //Initial the Q8 register for save the "psadframe" + vmov.s64 q8, #0 + + //Get the jump distance to use on loop codes + lsl r8, r4, #4 + sub r7, r8, #16 //R7 keep the 16*pic_stride-16 + sub r8, r2 //R8 keep the 16*pic_stride-pic_width + +vaa_calc_sad_loop0: + + //R6 keep the pic_width + mov r6, r2 + +vaa_calc_sad_loop1: + + //Process the 16x16 bytes + ABS_SUB_SUM_8x16BYTES r0, r1, r4, q0, q1 + ABS_SUB_SUM_8x16BYTES r0, r1, r4, q2, q3 + + //Do the SAD + SAD_8X16BITS d0, d1, d0 + SAD_8X16BITS d2, d3, d1 + SAD_8X16BITS d4, d5, d2 + SAD_8X16BITS d6, d7, d3 + + //Write to "psad8x8" buffer + vst4.32 {d0[0],d1[0],d2[0],d3[0]}, [r5]! + + + //Adjust the input address + sub r0, r7 + sub r1, r7 + + subs r6, #16 + + //Save to calculate "psadframe" + vadd.u32 q0, q1 + vadd.u32 q8, q0 + + bne vaa_calc_sad_loop1 + + //Adjust the input address + add r0, r8 + add r1, r8 + + subs r3, #16 + bne vaa_calc_sad_loop0 + + ldr r6, [sp, #24] //load psadframe + vadd.u32 d16, d17 + vst1.32 {d16[0]}, [r6] + + ldmia sp!, {r4-r8} + +WELS_ASM_FUNC_END + + +.macro SAD_SD_MAD_16BYTES arg0, arg1, arg2, arg3, arg4, arg5, arg6 + vld1.32 {q0}, [\arg0], \arg2 + vld1.32 {q1}, [\arg1], \arg2 + + vpadal.u8 \arg3, q0 + vpadal.u8 \arg4, q1 + + vabd.u8 q0, q0, q1 + vmax.u8 \arg5, q0 + vpadal.u8 \arg6, q0 +.endm + +.macro SAD_SD_MAD_8x16BYTES arg0, arg1, arg2, arg3, arg4, arg5 + vld1.32 {q0}, [\arg0], \arg2 + vld1.32 {q1}, [\arg1], \arg2 + + vpaddl.u8 q2, q0 + vpaddl.u8 q3, q1 + + vabd.u8 \arg3, q0, q1 + vpaddl.u8 \arg4, \arg3 //abs_diff + + + SAD_SD_MAD_16BYTES \arg0,\arg1,\arg2,q2,q3,\arg3,\arg4 + SAD_SD_MAD_16BYTES \arg0,\arg1,\arg2,q2,q3,\arg3,\arg4 + SAD_SD_MAD_16BYTES \arg0,\arg1,\arg2,q2,q3,\arg3,\arg4 + SAD_SD_MAD_16BYTES \arg0,\arg1,\arg2,q2,q3,\arg3,\arg4 + SAD_SD_MAD_16BYTES \arg0,\arg1,\arg2,q2,q3,\arg3,\arg4 + SAD_SD_MAD_16BYTES \arg0,\arg1,\arg2,q2,q3,\arg3,\arg4 + SAD_SD_MAD_16BYTES \arg0,\arg1,\arg2,q2,q3,\arg3,\arg4 + + vsub.u16 \arg5, q2, q3 +.endm + +.macro SAD_SD_MAD_CALC arg0, arg1, arg2, arg3, arg4 + vpmax.u8 d0, \arg0, \arg1 //8bytes + vpmax.u8 d0, d0, d0 //4bytes + vpmax.u8 \arg2, d0, d0 //2bytes + + vpaddl.u16 \arg3, \arg3 + vpaddl.u32 \arg3, \arg3 + vpaddl.s16 \arg4, \arg4 + vpaddl.s32 \arg4, \arg4 +.endm + +WELS_ASM_FUNC_BEGIN VAACalcSadBgd_neon + + stmdb sp!, {r4-r10} + + ldr r4, [sp, #28] //load pic_stride + ldr r5, [sp, #36] //load psad8x8 + ldr r6, [sp, #40] //load psd8x8 + ldr r7, [sp, #44] //load pmad8x8 + + //Initial the Q4 register for save the "psadframe" + vmov.s64 q15, #0 + + //Get the jump distance to use on loop codes + lsl r10, r4, #4 + sub r9, r10, #16 //R9 keep the 16*pic_stride-16 + sub r10, r2 //R10 keep the 16*pic_stride-pic_width + +vaa_calc_sad_bgd_loop0: + + //R6 keep the pic_width + mov r8, r2 + +vaa_calc_sad_bgd_loop1: + + //Process the 16x16 bytes pmad psad psd + SAD_SD_MAD_8x16BYTES r0, r1, r4, q13, q11, q9 + SAD_SD_MAD_8x16BYTES r0, r1, r4, q14, q12, q10 + + SAD_SD_MAD_CALC d26, d27, d16, q11, q9 + SAD_SD_MAD_CALC d28, d29, d17, q12, q10 + + //Write to "psad8x8" buffer + vst4.32 {d22[0],d23[0],d24[0],d25[0]}, [r5]! + //Adjust the input address + sub r0, r9 + sub r1, r9 + //Write to "psd8x8" buffer + vst4.32 {d18[0],d19[0],d20[0],d21[0]}, [r6]! + subs r8, #16 + //Write to "pmad8x8" buffer + vst2.16 {d16[0],d17[0]}, [r7]! + //Save to calculate "psadframe" + vadd.u32 q11, q12 + vadd.u32 q15, q11 + + bne vaa_calc_sad_bgd_loop1 + + //Adjust the input address + add r0, r10 + add r1, r10 + + subs r3, #16 + bne vaa_calc_sad_bgd_loop0 + + ldr r8, [sp, #32] //load psadframe + vadd.u32 d30, d31 + vst1.32 {d30[0]}, [r8] + ldmia sp!, {r4-r10} + +WELS_ASM_FUNC_END + + +.macro SSD_MUL_SUM_16BYTES_RESET arg0, arg1, arg2, arg3 + vmull.u8 \arg3, \arg0, \arg0 + vpaddl.u16 \arg2, \arg3 + + vmull.u8 \arg3, \arg1, \arg1 + vpadal.u16 \arg2, \arg3 +.endm + +.macro SSD_MUL_SUM_16BYTES arg0, arg1, arg2, arg3 + vmull.u8 \arg3, \arg0, \arg0 + vpadal.u16 \arg2, \arg3 + + vmull.u8 \arg3, \arg1, \arg1 + vpadal.u16 \arg2, \arg3 +.endm + +.macro SAD_SSD_BGD_16 arg0, arg1, arg2, arg3 + vld1.8 {q0}, [\arg0], \arg2 //load cur_row + + vpadal.u8 q3, q0 //add cur_row together + vpadal.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vmax.u8 q5, q2 //l_mad for 16 bytes reset for every 8x16 + + vpadal.u8 \arg3, q2 //l_sad for 16 bytes reset for every 8x16 + + SSD_MUL_SUM_16BYTES d4,d5, q8, q11 //q8 for l_sqiff reset for every 16x16 + + vld1.8 {q1}, [\arg1], \arg2 //load ref_row + vpadal.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES d0,d1, q10, q11 //q10 for lsqsum reset for every 16x16 +.endm + +//the last row of a 16x16 block +.macro SAD_SSD_BGD_16_end arg0, arg1, arg2 + vld1.8 {q0}, [\arg0], \arg1 //load cur_row + + vpadal.u8 q3, q0 //add cur_row together + vpadal.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vmax.u8 q5, q2 //l_mad for 16 bytes reset for every 8x16 + + vpadal.u8 \arg2, q2 //l_sad for 16 bytes reset for every 8x16 + + SSD_MUL_SUM_16BYTES d4,d5, q8, q11 //q8 for l_sqiff reset for every 16x16 + + vpadal.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES d0,d1, q10, q11 //q10 for lsqsum reset for every 16x16 +.endm + +//for the begin of a 8x16 block, use some instructions to reset the register +.macro SAD_SSD_BGD_16_RESET_8x8 arg0, arg1, arg2, arg3 + vld1.8 {q0}, [\arg0], \arg2 //load cur_row + + vpaddl.u8 q3, q0 //add cur_row together + vpaddl.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vmov q5,q2 //calculate max and avoid reset to zero, l_mad for 16 bytes reset for every 8x16 + + vpaddl.u8 \arg3, q2 //l_sad for 16 bytes reset for every 8x16 + + + SSD_MUL_SUM_16BYTES d4,d5, q8, q11 //q8 for l_sqiff reset for every 16x16 + + vld1.8 {q1}, [\arg1], \arg2 //load ref_row + + vpadal.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES d0,d1, q10, q11 //q10 for lsqsum reset for every 16x16 +.endm + +//for the begin of a 16x16 block, use some instructions to reset the register +.macro SAD_SSD_BGD_16_RESET_16x16 arg0, arg1, arg2, arg3 + vld1.8 {q0}, [\arg0], \arg2 //load cur_row + vld1.8 {q1}, [\arg1], \arg2 //load ref_row + + vpaddl.u8 q3, q0 //add cur_row together + vpaddl.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vmov q5,q2 //calculate max and avoid reset to zero, l_mad for 16 bytes reset for every 8x16 + + vpaddl.u8 \arg3, q2 //l_sad for 16 bytes reset for every 8x16 + + SSD_MUL_SUM_16BYTES_RESET d4,d5,q8, q11 //q8 for l_sqiff reset for every 16x16 + + vld1.8 {q1}, [\arg1], \arg2 //load ref_row + + vpaddl.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES_RESET d0,d1,q10,q11 //q10 for lsqsum reset for every 16x16 +.endm + +//for each 8x16 block +.macro SAD_SSD_BGD_CALC_8x16 arg0, arg1, arg2 + + vpmax.u8 d10, d10, d11 //4 numbers + vpmax.u8 d10, d10, d10 //2 numbers + vpmax.u8 d10, d10, d10 //1 number1 + + vmov \arg0, d10 //d26 d27 keeps the l_mad + + //p_sd8x8 + vpaddl.u16 q3, q3 + vpaddl.u16 q4, q4 + + vsub.i32 \arg1, q3, q4 + vpaddl.u32 \arg1, \arg1 + + //psad8x8 + vpaddl.u16 \arg2, \arg2 + vpaddl.u32 \arg2, \arg2 + + //psadframe + vadd.i32 q12, \arg2 +.endm + +.macro SAD_SSD_BGD_16x16 arg0, arg1, arg2 + //for one 8x16 + SAD_SSD_BGD_16_RESET_16x16 \arg0, \arg1, \arg2, q6 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q6 + + SAD_SSD_BGD_CALC_8x16 d26, q14, q6 + + //for another 8x16 + SAD_SSD_BGD_16_RESET_8x8 \arg0, \arg1, \arg2, q7 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_BGD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_BGD_16_end \arg0, \arg2, q7 + + SAD_SSD_BGD_CALC_8x16 d27, q15, q7 +.endm + +.macro SSD_SAD_SD_MAD_PADDL arg0, arg1, arg2 + vpaddl.s16 \arg0, \arg0 + vpaddl.s32 \arg0, \arg0 + vadd.i32 \arg1, \arg1, \arg2 +.endm + + +WELS_ASM_FUNC_BEGIN VAACalcSadSsdBgd_neon + stmdb sp!, {r0-r12, r14} + vpush {q4-q7} + + ldr r4, [sp, #120] //r4 keeps the pic_stride + + sub r5, r4, #1 + lsl r5, r5, #4 //r5 keeps the little step + + lsl r6, r4, #4 + sub r6, r2, r6 //r6 keeps the big step + + + ldr r8, [sp, #128]//psad8x8 + ldr r9, [sp, #132]//psum16x16 + ldr r10, [sp, #136]//psqsum16x16 + ldr r11, [sp, #140]//psqdiff16x16 + ldr r12, [sp, #144]//p_sd8x8 + ldr r14, [sp, #148]//p_mad8x8 + + vmov.i8 q12, #0 + +vaa_calc_sad_ssd_bgd_height_loop: + + mov r7, r2 +vaa_calc_sad_ssd_bgd_width_loop: + + //l_sd q14&q15, l_mad q13, l_sad q6 & q7, l_sqdiff q8, l_sum q9, l_sqsum q10 + SAD_SSD_BGD_16x16 r0,r1,r4 + + //psad8x8 + vst4.32 {d12[0], d13[0], d14[0], d15[0]}, [r8]! + + sub r0, r0, r5 //jump to next 16x16 + sub r1, r1, r5 //jump to next 16x16 + + //p_sd8x8 + vst4.32 {d28[0], d29[0],d30[0], d31[0]}, [r12]! + + //p_mad8x8 + vst2.16 {d26[0], d27[0]}, [r14]! + + //psqdiff16x16 + vpaddl.s32 q8, q8 + vadd.i32 d16, d16, d17 + + vst1.32 {d16[0]}, [r11]! //psqdiff16x16 + + //psum16x16 + SSD_SAD_SD_MAD_PADDL q9, d18, d19 + vst1.32 {d18[0]}, [r9]! //psum16x16 + + //psqsum16x16 + vpaddl.s32 q10, q10 + vadd.i32 d20, d20, d21 + vst1.32 {d20[0]}, [r10]! //psqsum16x16 + + subs r7, #16 + + bne vaa_calc_sad_ssd_bgd_width_loop + + sub r0, r0, r6 //jump to next 16 x width + sub r1, r1, r6 //jump to next 16 x width + + subs r3, #16 +bne vaa_calc_sad_ssd_bgd_height_loop + + //psadframe + ldr r7, [sp, #124]//psadframe + + vadd.i32 d24, d24, d25 + vst1.32 {d24[0]}, [r7] + + vpop {q4-q7} + ldmia sp!, {r0-r12, r14} + +WELS_ASM_FUNC_END + + +.macro SAD_VAR_16 arg0, arg1, arg2, arg3 + vld1.8 {q0}, [\arg0], \arg2 //load cur_row + + vpadal.u8 q3, q0 //add cur_row together + vpadal.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vpadal.u8 \arg3, q2 //l_sad for 16 bytes reset for every 8x16 + + vld1.8 {q1}, [\arg1], \arg2 + + vpadal.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES d0,d1, q10, q11 //q10 for lsqsum reset for every 16x16 +.endm + +.macro SAD_VAR_16_END arg0, arg1, arg2 + vld1.8 {q0}, [\arg0], \arg1 //load cur_row + + vpadal.u8 q3, q0 //add cur_row together + vpadal.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vpadal.u8 \arg2, q2 //l_sad for 16 bytes reset for every 8x16 + + vpadal.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES d0,d1, q10, q11 //q10 for lsqsum reset for every 16x16 +.endm + + +.macro SAD_VAR_16_RESET_16x16 arg0, arg1, arg2, arg3 + vld1.8 {q0}, [\arg0], \arg2 //load cur_row + vld1.8 {q1}, [\arg1], \arg2 + + vpaddl.u8 q3, q0 //add cur_row together + vpaddl.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vpaddl.u8 \arg3, q2 //l_sad for 16 bytes reset for every 8x16 + + vld1.8 {q1}, [\arg1], \arg2 + + vpaddl.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES_RESET d0,d1, q10, q11 +.endm + +.macro SAD_VAR_16_RESET_8x8 arg0, arg1, arg2, arg3 + vld1.8 {q0}, [\arg0], \arg2 //load cur_row + + vpaddl.u8 q3, q0 //add cur_row together + vpaddl.u8 q4, q1 //add ref_row together + + vabd.u8 q2, q0, q1 //abs_diff + + vpaddl.u8 \arg3, q2 //l_sad for 16 bytes reset for every 8x16 + + vld1.8 {q1}, [\arg1], \arg2 + + vpadal.u8 q9, q0 //q9 for l_sum reset for every 16x16 + + SSD_MUL_SUM_16BYTES d0,d1, q10, q11 //q10 for lsqsum reset for every 16x16 +.endm + +.macro SAD_VAR_16x16 arg0, arg1, arg2 + //for one 8x16 + SAD_VAR_16_RESET_16x16 \arg0, \arg1, \arg2, q6 + SAD_VAR_16 \arg0, \arg1, \arg2, q6 + SAD_VAR_16 \arg0, \arg1, \arg2, q6 + SAD_VAR_16 \arg0, \arg1, \arg2, q6 + SAD_VAR_16 \arg0, \arg1, \arg2, q6 + SAD_VAR_16 \arg0, \arg1, \arg2, q6 + SAD_VAR_16 \arg0, \arg1, \arg2, q6 + SAD_VAR_16 \arg0, \arg1, \arg2, q6 + + vpaddl.u16 q6, q6 + vpaddl.u32 q6, q6 + vadd.i32 q12, q6 + + //for another 8x16 + SAD_VAR_16_RESET_8x8 \arg0, \arg1, \arg2, q7 + SAD_VAR_16 \arg0, \arg1, \arg2, q7 + SAD_VAR_16 \arg0, \arg1, \arg2, q7 + SAD_VAR_16 \arg0, \arg1, \arg2, q7 + SAD_VAR_16 \arg0, \arg1, \arg2, q7 + SAD_VAR_16 \arg0, \arg1, \arg2, q7 + SAD_VAR_16 \arg0, \arg1, \arg2, q7 + SAD_VAR_16_END \arg0, \arg2, q7 + + vpaddl.u16 q7, q7 + vpaddl.u32 q7, q7 + + vadd.i32 q12, q7 +.endm + + +WELS_ASM_FUNC_BEGIN VAACalcSadVar_neon + stmdb sp!, {r4-r11} + vpush {q4} + vpush {q6-q7} + + ldr r4, [sp, #80] //r4 keeps the pic_stride + + sub r5, r4, #1 + lsl r5, r5, #4 //r5 keeps the little step + + lsl r6, r4, #4 + sub r6, r2, r6 //r6 keeps the big step + + ldr r7, [sp, #84] //psadframe + ldr r8, [sp, #88] //psad8x8 + ldr r9, [sp, #92] //psum16x16 + ldr r10, [sp, #96] //psqsum16x16 + + vmov.i8 q12, #0 +vaa_calc_sad_var_height_loop: + + mov r11, r2 +vaa_calc_sad_var_width_loop: + + + SAD_VAR_16x16 r0,r1,r4 + //psad8x8 + vst4.32 {d12[0], d13[0], d14[0], d15[0]}, [r8]! + + sub r0, r0, r5 //jump to next 16x16 + sub r1, r1, r5 //jump to next 16x16 + + //psum16x16 + SSD_SAD_SD_MAD_PADDL q9, d18, d19 + vst1.32 {d18[0]}, [r9]! //psum16x16 + + //psqsum16x16 + vpaddl.s32 q10, q10 + subs r11, #16 + vadd.i32 d20, d20, d21 + vst1.32 {d20[0]}, [r10]! //psqsum16x16 + + bne vaa_calc_sad_var_width_loop + + sub r0, r0, r6 //jump to next 16 x width + sub r1, r1, r6 //jump to next 16 x width + + subs r3, #16 +bne vaa_calc_sad_var_height_loop + + vadd.i32 d24, d24, d25 + vst1.32 {d24[0]}, [r7] + + vpop {q6-q7} + vpop {q4} + ldmia sp!, {r4-r11} +WELS_ASM_FUNC_END + + +.macro SAD_SSD_16 arg0, arg1, arg2, arg3 + SAD_VAR_16 \arg0, \arg1, \arg2, \arg3 + + SSD_MUL_SUM_16BYTES d4,d5,q8, q11 +.endm + +.macro SAD_SSD_16_END arg0, arg1, arg2 + SAD_VAR_16_END \arg0, \arg1, \arg2 + + SSD_MUL_SUM_16BYTES d4,d5,q8, q11 //q8 for l_sqiff reset for every 16x16 +.endm + +.macro SAD_SSD_16_RESET_16x16 arg0, arg1, arg2, arg3 + SAD_VAR_16_RESET_16x16 \arg0, \arg1, \arg2, \arg3 + + SSD_MUL_SUM_16BYTES_RESET d4,d5,q8, q11 //q8 for l_sqiff reset for every 16x16 +.endm + +.macro SAD_SSD_16_RESET_8x8 arg0, arg1, arg2, arg3 + SAD_VAR_16_RESET_8x8 \arg0, \arg1, \arg2, \arg3 + + SSD_MUL_SUM_16BYTES d4,d5,q8, q11 //q8 for l_sqiff reset for every 16x16 +.endm + +.macro SAD_SSD_16x16 arg0, arg1, arg2 + //for one 8x16 + SAD_SSD_16_RESET_16x16 \arg0, \arg1, \arg2, q6 + SAD_SSD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_16 \arg0, \arg1, \arg2, q6 + SAD_SSD_16 \arg0, \arg1, \arg2, q6 + + vpaddl.u16 q6, q6 + vpaddl.u32 q6, q6 + vadd.i32 q12, q6 + + //for another 8x16 + SAD_SSD_16_RESET_8x8 \arg0, \arg1, \arg2, q7 + SAD_SSD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_16 \arg0, \arg1, \arg2, q7 + SAD_SSD_16_END \arg0, \arg2, q7 + + vpaddl.u16 q7, q7 + vpaddl.u32 q7, q7 + + vadd.i32 q12, q7 +.endm + + +WELS_ASM_FUNC_BEGIN VAACalcSadSsd_neon + stmdb sp!, {r4-r12} + vpush {q4} + vpush {q6-q7} + + ldr r4, [sp, #84] //r4 keeps the pic_stride + + sub r5, r4, #1 + lsl r5, r5, #4 //r5 keeps the little step + + lsl r6, r4, #4 + sub r6, r2, r6 //r6 keeps the big step + + ldr r7, [sp, #88] //psadframe + ldr r8, [sp, #92] //psad8x8 + ldr r9, [sp, #96] //psum16x16 + ldr r10, [sp, #100] //psqsum16x16 + ldr r11, [sp, #104] //psqdiff16x16 + + vmov.i8 q12, #0 +vaa_calc_sad_ssd_height_loop: + + mov r12, r2 +vaa_calc_sad_ssd_width_loop: + + + SAD_SSD_16x16 r0,r1,r4 + //psad8x8 + vst4.32 {d12[0], d13[0], d14[0], d15[0]}, [r8]! + + sub r0, r0, r5 //jump to next 16x16 + sub r1, r1, r5 //jump to next 16x16 + + //psum16x16 + vpaddl.s16 q9, q9 + vpaddl.s32 q9, q9 + vadd.i32 d18, d18, d19 + vst1.32 {d18[0]}, [r9]! //psum16x16 + + //psqsum16x16 + vpaddl.s32 q10, q10 + vadd.i32 d20, d20, d21 + vst1.32 {d20[0]}, [r10]! //psqsum16x16 + + //psqdiff16x16 + vpaddl.s32 q8, q8 + vadd.i32 d16, d16, d17 + subs r12, #16 + vst1.32 {d16[0]}, [r11]! //psqdiff16x16 + + bne vaa_calc_sad_ssd_width_loop + + sub r0, r0, r6 //jump to next 16 x width + sub r1, r1, r6 //jump to next 16 x width + + subs r3, #16 + bne vaa_calc_sad_ssd_height_loop + + vadd.i32 d24, d24, d25 + vst1.32 {d24[0]}, [r7] + + vpop {q6-q7} + vpop {q4} + ldmia sp!, {r4-r12} +WELS_ASM_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/adaptive_quantization_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/adaptive_quantization_aarch64_neon.S new file mode 100644 index 000000000..82981adb3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/adaptive_quantization_aarch64_neon.S @@ -0,0 +1,89 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" +WELS_ASM_AARCH64_FUNC_BEGIN SampleVariance16x16_AArch64_neon + SIGN_EXTENSION x1, w1 + SIGN_EXTENSION x3, w3 + ld1 {v1.16b}, [x0], x1 //save the ref data (16bytes) + ld1 {v0.16b}, [x2], x3 //save the src data (16bytes) + uabd v2.16b, v0.16b, v1.16b + umull v3.8h, v2.8b, v2.8b + umull2 v4.8h, v2.16b, v2.16b + uaddlp v4.4s, v4.8h + uadalp v4.4s, v3.8h //sqr + uaddlp v2.8h, v2.16b //sum + + uaddlp v1.8h, v0.16b //sum_cur + + umull v3.8h, v0.8b, v0.8b + umull2 v5.8h, v0.16b, v0.16b + uaddlp v3.4s, v3.8h + uadalp v3.4s, v5.8h //sqr_cur +.rept 15 + ld1 {v5.16b}, [x0], x1 //save the ref data (16bytes) + ld1 {v0.16b}, [x2], x3 //save the src data (16bytes) + + uabd v6.16b, v0.16b, v5.16b + + //v1 save sum_cur + uadalp v1.8h, v0.16b + + //v4 save sqr + umull v5.8h, v6.8b, v6.8b + umull2 v7.8h, v6.16b, v6.16b + uadalp v4.4s, v5.8h //sqr + uadalp v4.4s, v7.8h //sqr + + //v2 save sum + uadalp v2.8h, v6.16b + + //v3 save sqr_cur + umull v5.8h, v0.8b, v0.8b + umull2 v7.8h, v0.16b, v0.16b + uadalp v3.4s, v5.8h //sqr_cur + uadalp v3.4s, v7.8h //sqr_cur +.endr + uaddlv s2, v2.8h //sum + uaddlv s1, v1.8h //sum_cur + ins v2.s[1], v1.s[0] // sum, sum_cur + shrn v2.4h, v2.4s, #8 // sum, sum_cur>>8 + mul v2.4h, v2.4h, v2.4h//// sum*sum, sum_cur*sum_cur + uaddlv d4, v4.4s //sqr + uaddlv d3, v3.4s //sqr_cur + ins v4.s[1], v3.s[0] // sqr, sqr_cur + shrn v4.4h, v4.4s, #8 // sqr, sqr_cur>>8 + sub v4.4h, v4.4h, v2.4h + st1 {v4.s}[0], [x4] +WELS_ASM_AARCH64_FUNC_END +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/down_sample_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/down_sample_aarch64_neon.S new file mode 100644 index 000000000..b14f05556 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/down_sample_aarch64_neon.S @@ -0,0 +1,334 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +WELS_ASM_AARCH64_FUNC_BEGIN DyadicBilinearDownsampler_AArch64_neon + + //Initialize the register + mov x6, x2 + mov x8, x0 + mov w9, #0 + lsr w5, w5, #1 + + //Save the tailer for the unasigned size + smaddl x7, w1, w5, x0 + ld1 {v4.16b}, [x7] + + add x7, x2, w3, sxtw + //processing a colume data +comp_ds_bilinear_loop0: + + ld1 {v0.16b, v1.16b}, [x2], #32 + ld1 {v2.16b, v3.16b}, [x7], #32 + uzp1 v4.16b, v0.16b, v1.16b + uzp2 v5.16b, v0.16b, v1.16b + uzp1 v6.16b, v2.16b, v3.16b + uzp2 v7.16b, v2.16b, v3.16b + urhadd v0.16b, v4.16b, v5.16b + urhadd v1.16b, v6.16b, v7.16b + urhadd v2.16b, v0.16b, v1.16b + st1 {v2.16b}, [x0], #16 + add w9, w9, #32 + + cmp w9, w4 + b.cc comp_ds_bilinear_loop0 + + mov w9, #0 + add x6, x6, w3, sxtw #1 + mov x2, x6 + add x7, x2, w3, sxtw + add x8, x8, w1, sxtw + mov x0, x8 + sub w5, w5, #1 + + cbnz w5, comp_ds_bilinear_loop0 + + //restore the tailer for the unasigned size + st1 {v4.16b}, [x0] + +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN DyadicBilinearDownsamplerWidthx32_AArch64_neon + sub w9, w3, w4 + sub w1, w1, w4, lsr #1 + lsr w5, w5, #1 + + //processing a colume data +comp_ds_bilinear_w_x32_loop0: + + lsr w6, w4, #5 + add x7, x2, w3, sxtw + //processing a line data +comp_ds_bilinear_w_x32_loop1: + + ld1 {v0.16b, v1.16b}, [x2], #32 + ld1 {v2.16b, v3.16b}, [x7], #32 + uzp1 v4.16b, v0.16b, v1.16b + uzp2 v5.16b, v0.16b, v1.16b + uzp1 v6.16b, v2.16b, v3.16b + uzp2 v7.16b, v2.16b, v3.16b + urhadd v0.16b, v4.16b, v5.16b + urhadd v1.16b, v6.16b, v7.16b + urhadd v2.16b, v0.16b, v1.16b + st1 {v2.16b}, [x0], #16 + + sub w6, w6, #1 + cbnz w6, comp_ds_bilinear_w_x32_loop1 + + add x2, x7, w9, sxtw + add x0, x0, w1, sxtw + sub w5, w5, #1 + cbnz w5, comp_ds_bilinear_w_x32_loop0 +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN DyadicBilinearOneThirdDownsampler_AArch64_neon + + //Initialize the register + mov x6, x2 + mov x8, x0 + mov w9, #0 + + //Save the tailer for the unasigned size + smaddl x7, w1, w5, x0 + ld1 {v16.16b}, [x7] + + add x7, x2, w3, sxtw + //processing a colume data +comp_ds_bilinear_onethird_loop0: + + ld3 {v0.16b, v1.16b, v2.16b}, [x2], #48 + ld3 {v4.16b, v5.16b, v6.16b}, [x7], #48 + + uaddl v2.8h, v0.8b, v1.8b + uaddl2 v3.8h, v0.16b, v1.16b + uaddl v6.8h, v4.8b, v5.8b + uaddl2 v7.8h, v4.16b, v5.16b + urshr v2.8h, v2.8h, #1 + urshr v3.8h, v3.8h, #1 + urshr v6.8h, v6.8h, #1 + urshr v7.8h, v7.8h, #1 + + urhadd v0.8h, v2.8h, v6.8h + urhadd v1.8h, v3.8h, v7.8h + xtn v0.8b, v0.8h + xtn v1.8b, v1.8h + st1 {v0.8b,v1.8b}, [x0], #16 + + add w9, w9, #48 + + cmp w9, w4 + b.cc comp_ds_bilinear_onethird_loop0 + + mov w9, #0 + add x6, x6, w3, sxtw #1 + add x6, x6, w3, sxtw + mov x2, x6 + add x7, x2, w3, sxtw + add x8, x8, w1, sxtw + mov x0, x8 + sub w5, w5, #1 + + cbnz w5, comp_ds_bilinear_onethird_loop0 + + //restore the tailer for the unasigned size + st1 {v16.16b}, [x0] +WELS_ASM_AARCH64_FUNC_END +//void DyadicBilinearQuarterDownsampler_AArch64_neon(uint8_t* pDst, const int32_t kiDstStride, +//uint8_t* pSrc, const int32_t kiSrcStride, +//const int32_t kiSrcWidth, const int32_t kiHeight); + +WELS_ASM_AARCH64_FUNC_BEGIN DyadicBilinearQuarterDownsampler_AArch64_neon + //Initialize the register + mov x6, x2 + mov x8, x0 + mov w9, #0 + lsr w5, w5, #2 + + //Save the tailer for the unasigned size + smaddl x7, w1, w5, x0 + ld1 {v16.16b}, [x7] + + add x7, x2, w3, sxtw + //processing a colume data +comp_ds_bilinear_quarter_loop0: + + ld2 {v0.8h, v1.8h}, [x2], #32 + ld2 {v2.8h, v3.8h}, [x2], #32 + ld2 {v4.8h, v5.8h}, [x7], #32 + ld2 {v6.8h, v7.8h}, [x7], #32 + + uaddlp v0.8h, v0.16b + uaddlp v1.8h, v2.16b + uaddlp v4.8h, v4.16b + uaddlp v5.8h, v6.16b + urshr v0.8h, v0.8h, #1 + urshr v1.8h, v1.8h, #1 + urshr v4.8h, v4.8h, #1 + urshr v5.8h, v5.8h, #1 + + urhadd v0.8h, v0.8h, v4.8h + urhadd v1.8h, v1.8h, v5.8h + xtn v0.8b, v0.8h + xtn v1.8b, v1.8h + st1 {v0.8b,v1.8b}, [x0], #16 + + add w9, w9, #64 + + cmp w9, w4 + b.cc comp_ds_bilinear_quarter_loop0 + + mov w9, #0 + add x6, x6, w3, sxtw #2 + mov x2, x6 + add x7, x2, w3, sxtw + add x8, x8, w1, sxtw + mov x0, x8 + sub w5, w5, #1 + + cbnz w5, comp_ds_bilinear_quarter_loop0 + + //restore the tailer for the unasigned size + st1 {v16.16b}, [x0] +WELS_ASM_AARCH64_FUNC_END + +//void GeneralBilinearAccurateDownsampler_AArch64_neon (uint8_t* pDst, const int32_t kiDstStride, +// const int32_t kiDstWidth, const int32_t kiDstHeight, +// uint8_t* pSrc, const int32_t kiSrcStride, const uint32_t kuiScaleX, const uint32_t kuiScaleY); +WELS_ASM_AARCH64_FUNC_BEGIN GeneralBilinearAccurateDownsampler_AArch64_neon + mov w10, #32767 + and w8, w6, w10 + mov w11, #-1 + mul w12, w11, w8 + + dup v2.4h, w8 + dup v0.4h, w12 + zip1 v0.4h, v0.4h, v2.4h // uinc -uinc uinc -uinc + + and w9, w7, w10 + mul w12, w11, w9 + + dup v2.4h, w9 + dup v5.4h, w12 + ins v5.s[1], v2.s[0] // vinc vinc -vinc -vinc + + mov w11, #0x40000000 + mov w12, #0x3FFF + add w11, w11, w12 + dup v1.2s, w11 //init u 16384 16383 16384 16383 + + mov w8, #16384 + dup v7.4h, w8 + sub w11, w8, #1 + dup v2.4h, w11 + ins v7.s[0], v2.s[0] //init v 16384 16384 16383 16383 + + eor v26.16b, v26.16b, v26.16b + eor v27.16b, v27.16b, v27.16b + SIGN_EXTENSION x1, w1 + SIGN_EXTENSION x2, w2 + SIGN_EXTENSION x3, w3 + SIGN_EXTENSION x5, w5 + SIGN_EXTENSION x6, w6 + SIGN_EXTENSION x7, w7 + + sub x1, x1, x2 + sub x3, x3, #1 + +_HEIGHT: + lsr w11, w8, #15 + mul w11, w11, w5 + add x15, x4, w11, sxtw + add x12, x15, w5, sxtw + + mov x9, #16384 + sub x10, x2, #1 + orr v6.8b, v1.8b, v1.8b + +_WIDTH: + lsr x13, x9, #15 + add x14, x15, x13 + ld2 {v26.b, v27.b}[0], [x14] //q14: 0000000b0000000a; + add x14, x12, x13 + ld2 {v26.b, v27.b}[4], [x14] //q14: 000d000b000c000a; + zip1 v28.2s, v26.2s, v27.2s + zip2 v29.2s, v26.2s, v27.2s + + umull v20.4s, v6.4h, v7.4h + umull v21.2d, v28.2s, v20.2s + ins v20.d[0], v20.d[1] + umlal v21.2d, v29.2s, v20.2s + + addp d21, v21.2d + urshr d21, d21, #30 + + st1 {v21.b}[0], [x0], #1 + add x9, x9, x6 + add v6.4h, v6.4h, v0.4h + shl v6.4h, v6.4h, #1 + ushr v6.4h, v6.4h, #1 + sub x10, x10, #1 + cbnz x10, _WIDTH + +WIDTH_END: + lsr x9, x9, #15 + add x14, x15, x9 + ld1 {v21.b}[0], [x14] + st1 {v21.b}[0], [x0], #1 + add w8, w8, w7 + add x0, x0, x1 + add v7.4h, v7.4h, v5.4h + shl v7.4h, v7.4h, #1 + ushr v7.4h, v7.4h, #1 + sub x3, x3, #1 + cbnz x3, _HEIGHT + +LAST_ROW: + lsr w8, w8, #15 + mul w8, w8, w5 + add x4, x4, w8, sxtw + mov x9, #16384 + +_LAST_ROW_WIDTH: + mov x11, x9 + lsr x11, x11, #15 + add x3, x4, x11 + ld1 {v21.b}[0], [x3] + st1 {v21.b}[0], [x0], #1 + add x9, x9, x6 + sub x2, x2, #1 + cbnz x2, _LAST_ROW_WIDTH + +WELS_ASM_AARCH64_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/pixel_sad_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/pixel_sad_aarch64_neon.S new file mode 100644 index 000000000..9d09ed777 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/pixel_sad_aarch64_neon.S @@ -0,0 +1,51 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" +//int32_t WelsProcessingSampleSad8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +WELS_ASM_AARCH64_FUNC_BEGIN WelsProcessingSampleSad8x8_AArch64_neon + SIGN_EXTENSION x1, w1 + SIGN_EXTENSION x3, w3 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b +.rept 7 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b +.endr + saddlv s2, v2.8h + fmov w0, s2 +WELS_ASM_AARCH64_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/vaa_calc_aarch64_neon.S b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/vaa_calc_aarch64_neon.S new file mode 100644 index 000000000..a7be46a49 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/arm64/vaa_calc_aarch64_neon.S @@ -0,0 +1,549 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +#include "arm_arch64_common_macro.S" + +.macro ABS_SUB_SUM_16BYTES arg0, arg1 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabal \arg0, v0.8b, v1.8b + uabal2 \arg1, v0.16b,v1.16b +.endm + +.macro ABS_SUB_SUM_8x16BYTES arg0, arg1 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabdl \arg0, v0.8b, v1.8b + uabdl2 \arg1, v0.16b,v1.16b + + ABS_SUB_SUM_16BYTES \arg0, \arg1 + ABS_SUB_SUM_16BYTES \arg0, \arg1 + ABS_SUB_SUM_16BYTES \arg0, \arg1 + ABS_SUB_SUM_16BYTES \arg0, \arg1 + ABS_SUB_SUM_16BYTES \arg0, \arg1 + ABS_SUB_SUM_16BYTES \arg0, \arg1 + ABS_SUB_SUM_16BYTES \arg0, \arg1 +.endm + +/* + * void vaa_calc_sad_neon(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride, + * int32_t *psadframe, int32_t *psad8x8) + */ +WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSad_AArch64_neon + eor v31.16b, v31.16b, v31.16b + + SIGN_EXTENSION x4, w4 + lsl x9, x4, #4 + sub x10, x9, #16 //x10 keep the 16*pic_stride-16 + sub x9, x9, x2 //x9 keep the 16*pic_stride-pic_width +vaa_calc_sad_loop0: + mov w11, w2 +vaa_calc_sad_loop1: + ABS_SUB_SUM_8x16BYTES v2.8h, v3.8h + ABS_SUB_SUM_8x16BYTES v4.8h, v5.8h + + uaddlv s2, v2.8h + uaddlv s3, v3.8h + uaddlv s4, v4.8h + uaddlv s5, v5.8h + + st4 {v2.s, v3.s, v4.s, v5.s}[0], [x6], #16 + sub x0, x0, x10 + sub x1, x1, x10 + sub w11, w11, #16 + add v6.2s, v2.2s, v3.2s + add v7.2s, v4.2s, v5.2s + add v6.2s, v6.2s, v7.2s + add v31.2s, v31.2s, v6.2s + cbnz w11, vaa_calc_sad_loop1 + + add x0, x0, x9 + add x1, x1, x9 + sub w3, w3, #16 + cbnz w3, vaa_calc_sad_loop0 + + str s31, [x5] + +WELS_ASM_AARCH64_FUNC_END + +.macro SAD_SD_MAD_8x16BYTES + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v31.16b, v0.16b, v1.16b + uaddlp v2.8h, v31.16b + uaddlp v4.8h, v0.16b + uaddlp v5.8h, v1.16b +.rept 7 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v30.16b, v0.16b, v1.16b + umax v31.16b, v31.16b,v30.16b + uadalp v2.8h, v30.16b + uadalp v4.8h, v0.16b + uadalp v5.8h, v1.16b +.endr +.endm +/* + * void vaa_calc_sad_bgd_neon(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride, + * int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8) + */ +WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSadBgd_AArch64_neon + ldr x15, [sp, #0] + eor v28.16b, v28.16b, v28.16b + + SIGN_EXTENSION x4, w4 + lsl x9, x4, #4 + sub x10, x9, #16 //x10 keep the 16*pic_stride-16 + sub x9, x9, x2 //x9 keep the 16*pic_stride-pic_width +vaa_calc_sad_bgd_loop0: + mov w11, w2 +vaa_calc_sad_bgd_loop1: + SAD_SD_MAD_8x16BYTES + umaxv b24, v31.8b + ins v31.d[0], v31.d[1] + umaxv b25, v31.8b + uaddlv s20, v2.4h + ins v2.d[0], v2.d[1] + uaddlv s21, v2.4h + usubl v6.4s, v4.4h, v5.4h + usubl2 v7.4s, v4.8h, v5.8h + addv s16, v6.4s + addv s17, v7.4s + + SAD_SD_MAD_8x16BYTES + umaxv b26, v31.8b + ins v31.d[0], v31.d[1] + umaxv b27, v31.8b + uaddlv s22, v2.4h + ins v2.d[0], v2.d[1] + uaddlv s23, v2.4h + usubl v6.4s, v4.4h, v5.4h + usubl2 v7.4s, v4.8h, v5.8h + addv s18, v6.4s + addv s19, v7.4s + st4 {v20.s, v21.s, v22.s, v23.s}[0], [x6], #16 + + sub x0, x0, x10 + sub x1, x1, x10 + st4 {v16.s, v17.s, v18.s, v19.s}[0], [x7], #16 + sub w11, w11, #16 + st4 {v24.b, v25.b, v26.b, v27.b}[0], [x15], #4 + add v29.2s, v20.2s, v21.2s + add v30.2s, v22.2s, v23.2s + add v29.2s, v29.2s, v30.2s + add v28.2s, v28.2s, v29.2s + cbnz w11, vaa_calc_sad_bgd_loop1 + + add x0, x0, x9 + add x1, x1, x9 + sub w3, w3, #16 + cbnz w3, vaa_calc_sad_bgd_loop0 + str s28, [x5] + +WELS_ASM_AARCH64_FUNC_END + +.macro SAD_SSD_BGD_8x16BYTES_1 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v31.16b, v0.16b, v1.16b + umull v30.8h, v31.8b, v31.8b + uaddlp v29.4s, v30.8h + umull2 v30.8h, v31.16b, v31.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uaddlp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uaddlp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uaddlp v2.8h, v31.16b // p_sad + uaddlp v4.8h, v0.16b + uaddlp v5.8h, v1.16b +.rept 7 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v3.16b, v0.16b, v1.16b + umax v31.16b, v31.16b,v3.16b //p_mad + umull v30.8h, v3.8b, v3.8b + uadalp v29.4s, v30.8h + umull2 v30.8h, v3.16b, v3.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uadalp v2.8h, v3.16b //p_sad + uadalp v4.8h, v0.16b + uadalp v5.8h, v1.16b //p_sd +.endr +.endm + +.macro SAD_SSD_BGD_8x16BYTES_2 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v26.16b, v0.16b, v1.16b + umull v30.8h, v26.8b, v26.8b + uadalp v29.4s, v30.8h + umull2 v30.8h, v26.16b, v26.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uaddlp v16.8h,v26.16b // p_sad + uaddlp v6.8h, v0.16b + uaddlp v7.8h, v1.16b +.rept 7 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v3.16b, v0.16b, v1.16b + umax v26.16b, v26.16b,v3.16b //p_mad + umull v30.8h, v3.8b, v3.8b + uadalp v29.4s, v30.8h + umull2 v30.8h, v3.16b, v3.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uadalp v16.8h, v3.16b //p_sad + uadalp v6.8h, v0.16b + uadalp v7.8h, v1.16b //p_sd +.endr +.endm + +/* + * void vaa_calc_sad_ssd_bgd_c(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride, + * int32_t *psadframe,int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, int32_t *psqdiff16x16, int32_t *p_sd8x8, uint8_t *p_mad8x8) + */ +WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSadSsdBgd_AArch64_neon + ldr x12, [sp, #0] //psqsum16x16 + ldr x13, [sp, #8] //psqdiff16x16 + ldr x14, [sp, #16] //p_sd8x8 + ldr x15, [sp, #24] //p_mad8x8 + eor v17.16b, v17.16b, v17.16b + + SIGN_EXTENSION x4, w4 + lsl x9, x4, #4 + sub x10, x9, #16 //x10 keep the 16*pic_stride-16 + sub x9, x9, x2 //x9 keep the 16*pic_stride-pic_width + +vaa_calc_sad_ssd_bgd_height_loop: + mov w11, w2 +vaa_calc_sad_ssd_bgd_width_loop: + SAD_SSD_BGD_8x16BYTES_1 //psad:v2, v16, psum:v28, psqsum:v27, psqdiff:v29, psd:v4, v5, v6, v7, pmad:v31, v26 + SAD_SSD_BGD_8x16BYTES_2 + + umaxv b22, v31.8b + ins v31.d[0], v31.d[1] + umaxv b23, v31.8b + umaxv b24, v26.8b + ins v26.d[0], v26.d[1] + umaxv b25, v26.8b + st4 {v22.b, v23.b, v24.b, v25.b}[0], [x15], #4 + + usubl v20.4s, v4.4h, v5.4h + usubl2 v21.4s, v4.8h, v5.8h + addv s20, v20.4s + addv s21, v21.4s + usubl v22.4s, v6.4h, v7.4h + usubl2 v23.4s, v6.8h, v7.8h + addv s22, v22.4s + addv s23, v23.4s + st4 {v20.s, v21.s, v22.s, v23.s}[0], [x14], #16 + + uaddlv s20, v2.4h + ins v2.d[0], v2.d[1] + uaddlv s21, v2.4h + uaddlv s22, v16.4h + ins v16.d[0], v16.d[1] + uaddlv s23, v16.4h + st4 {v20.s, v21.s, v22.s, v23.s}[0], [x6], #16 + + uaddlv s28, v28.8h + str s28, [x7], #4 + addv s27, v27.4s + str s27, [x12], #4 + addv s29, v29.4s + str s29, [x13], #4 + + sub x0, x0, x10 + sub x1, x1, x10 + sub w11, w11, #16 + add v29.2s, v20.2s, v21.2s + add v30.2s, v22.2s, v23.2s + add v29.2s, v29.2s, v30.2s + add v17.2s, v17.2s, v29.2s + cbnz w11, vaa_calc_sad_ssd_bgd_width_loop + + add x0, x0, x9 + add x1, x1, x9 + sub w3, w3, #16 + cbnz w3, vaa_calc_sad_ssd_bgd_height_loop + str s17, [x5] +WELS_ASM_AARCH64_FUNC_END + + +.macro SAD_SSD_8x16BYTES_1 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v31.16b, v0.16b, v1.16b + umull v30.8h, v31.8b, v31.8b + uaddlp v29.4s, v30.8h + umull2 v30.8h, v31.16b, v31.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uaddlp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uaddlp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uaddlp v2.8h, v31.16b // p_sad +.rept 7 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v3.16b, v0.16b, v1.16b + umull v30.8h, v3.8b, v3.8b + uadalp v29.4s, v30.8h + umull2 v30.8h, v3.16b, v3.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uadalp v2.8h, v3.16b //p_sad +.endr +.endm + +.macro SAD_SSD_8x16BYTES_2 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v26.16b, v0.16b, v1.16b + umull v30.8h, v26.8b, v26.8b + uadalp v29.4s, v30.8h + umull2 v30.8h, v26.16b, v26.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uaddlp v16.8h,v26.16b // p_sad + uaddlp v6.8h, v0.16b + uaddlp v7.8h, v1.16b +.rept 7 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v3.16b, v0.16b, v1.16b + umull v30.8h, v3.8b, v3.8b + uadalp v29.4s, v30.8h + umull2 v30.8h, v3.16b, v3.16b + uadalp v29.4s, v30.8h // p_sqdiff + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + + uadalp v16.8h, v3.16b //p_sad +.endr +.endm +/* + * void vaa_calc_sad_ssd_c(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride, + * int32_t *psadframe,int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, int32_t *psqdiff16x16) + */ +WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSadSsd_AArch64_neon + ldr x12, [sp, #0] //psqsum16x16 + ldr x13, [sp, #8] //psqdiff16x16 + eor v17.16b, v17.16b, v17.16b + + SIGN_EXTENSION x4, w4 + lsl x9, x4, #4 + sub x10, x9, #16 //x10 keep the 16*pic_stride-16 + sub x9, x9, x2 //x9 keep the 16*pic_stride-pic_width + +vaa_calc_sad_ssd_height_loop: + mov w11, w2 +vaa_calc_sad_ssd_width_loop: + SAD_SSD_8x16BYTES_1 //psad:v2, v16, psum:v28, psqsum:v27, psqdiff:v29 + SAD_SSD_8x16BYTES_2 + + uaddlv s20, v2.4h + ins v2.d[0], v2.d[1] + uaddlv s21, v2.4h + uaddlv s22, v16.4h + ins v16.d[0], v16.d[1] + uaddlv s23, v16.4h + st4 {v20.s, v21.s, v22.s, v23.s}[0], [x6], #16 + + uaddlv s28, v28.8h + str s28, [x7], #4 + addv s27, v27.4s + str s27, [x12], #4 + addv s29, v29.4s + str s29, [x13], #4 + + sub x0, x0, x10 + sub x1, x1, x10 + sub w11, w11, #16 + add v29.2s, v20.2s, v21.2s + add v30.2s, v22.2s, v23.2s + add v29.2s, v29.2s, v30.2s + add v17.2s, v17.2s, v29.2s + cbnz w11, vaa_calc_sad_ssd_width_loop + + add x0, x0, x9 + add x1, x1, x9 + sub w3, w3, #16 + cbnz w3, vaa_calc_sad_ssd_height_loop + str s17, [x5] +WELS_ASM_AARCH64_FUNC_END + + +.macro SAD_VAR_8x16BYTES_1 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v31.16b, v0.16b, v1.16b + uaddlp v2.8h, v31.16b // p_sad + + uaddlp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uaddlp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum + +.rept 7 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v3.16b, v0.16b, v1.16b + uadalp v2.8h, v3.16b //p_sad + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum +.endr +.endm +.macro SAD_VAR_8x16BYTES_2 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v26.16b, v0.16b, v1.16b + uaddlp v16.8h,v26.16b // p_sad + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum +.rept 7 + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + uabd v3.16b, v0.16b, v1.16b + uadalp v16.8h, v3.16b //p_sad + + uadalp v28.8h, v0.16b // p_sum + umull v30.8h, v0.8b, v0.8b + uadalp v27.4s, v30.8h + umull2 v30.8h, v0.16b, v0.16b + uadalp v27.4s, v30.8h // p_sqsum +.endr +.endm + +/* + * void vaa_calc_sad_var_c(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride, + * int32_t *psadframe,int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16) + */ +WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSadVar_AArch64_neon + ldr x12, [sp, #0] //psqsum16x16 + eor v17.16b, v17.16b, v17.16b + + SIGN_EXTENSION x4, w4 + lsl x9, x4, #4 + sub x10, x9, #16 //x10 keep the 16*pic_stride-16 + sub x9, x9, x2 //x9 keep the 16*pic_stride-pic_width + +vaa_calc_sad_var_height_loop: + mov w11, w2 +vaa_calc_sad_var_width_loop: + SAD_VAR_8x16BYTES_1 //psad:v2, v16, psum:v28, psqsum:v27 + SAD_VAR_8x16BYTES_2 + + uaddlv s20, v2.4h + ins v2.d[0], v2.d[1] + uaddlv s21, v2.4h + uaddlv s22, v16.4h + ins v16.d[0], v16.d[1] + uaddlv s23, v16.4h + st4 {v20.s, v21.s, v22.s, v23.s}[0], [x6], #16 + + uaddlv s28, v28.8h + str s28, [x7], #4 + addv s27, v27.4s + str s27, [x12], #4 + + sub x0, x0, x10 + sub x1, x1, x10 + sub w11, w11, #16 + add v29.2s, v20.2s, v21.2s + add v30.2s, v22.2s, v23.2s + add v29.2s, v29.2s, v30.2s + add v17.2s, v17.2s, v29.2s + + cbnz w11, vaa_calc_sad_var_width_loop + + add x0, x0, x9 + add x1, x1, x9 + sub w3, w3, #16 + cbnz w3, vaa_calc_sad_var_height_loop + str s17, [x5] +WELS_ASM_AARCH64_FUNC_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/backgrounddetection/BackgroundDetection.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/backgrounddetection/BackgroundDetection.cpp new file mode 100644 index 000000000..1e66a6291 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/backgrounddetection/BackgroundDetection.cpp @@ -0,0 +1,383 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "BackgroundDetection.h" + +WELSVP_NAMESPACE_BEGIN + +#define LOG2_BGD_OU_SIZE (4) +#define LOG2_BGD_OU_SIZE_UV (LOG2_BGD_OU_SIZE-1) +#define BGD_OU_SIZE (1<>1) +#define BGD_THD_SAD (2*BGD_OU_SIZE*BGD_OU_SIZE) +#define BGD_THD_ASD_UV (4*BGD_OU_SIZE_UV) +#define LOG2_MB_SIZE (4) +#define OU_SIZE_IN_MB (BGD_OU_SIZE >> 4) +#define Q_FACTOR (8) +#define BGD_DELTA_QP_THD (3) + +#define OU_LEFT (0x01) +#define OU_RIGHT (0x02) +#define OU_TOP (0x04) +#define OU_BOTTOM (0x08) + +CBackgroundDetection::CBackgroundDetection (int32_t iCpuFlag) { + m_eMethod = METHOD_BACKGROUND_DETECTION; + WelsMemset (&m_BgdParam, 0, sizeof (m_BgdParam)); + m_iLargestFrameSize = 0; +} + +CBackgroundDetection::~CBackgroundDetection() { + WelsFree (m_BgdParam.pOU_array); +} + +EResult CBackgroundDetection::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + EResult eReturn = RET_INVALIDPARAM; + + if (pSrcPixMap == NULL || pRefPixMap == NULL) + return eReturn; + + m_BgdParam.pCur[0] = (uint8_t*)pSrcPixMap->pPixel[0]; + m_BgdParam.pCur[1] = (uint8_t*)pSrcPixMap->pPixel[1]; + m_BgdParam.pCur[2] = (uint8_t*)pSrcPixMap->pPixel[2]; + m_BgdParam.pRef[0] = (uint8_t*)pRefPixMap->pPixel[0]; + m_BgdParam.pRef[1] = (uint8_t*)pRefPixMap->pPixel[1]; + m_BgdParam.pRef[2] = (uint8_t*)pRefPixMap->pPixel[2]; + m_BgdParam.iBgdWidth = pSrcPixMap->sRect.iRectWidth; + m_BgdParam.iBgdHeight = pSrcPixMap->sRect.iRectHeight; + m_BgdParam.iStride[0] = pSrcPixMap->iStride[0]; + m_BgdParam.iStride[1] = pSrcPixMap->iStride[1]; + m_BgdParam.iStride[2] = pSrcPixMap->iStride[2]; + + int32_t iCurFrameSize = m_BgdParam.iBgdWidth * m_BgdParam.iBgdHeight; + if (m_BgdParam.pOU_array == NULL || iCurFrameSize > m_iLargestFrameSize) { + WelsFree (m_BgdParam.pOU_array); + m_BgdParam.pOU_array = AllocateOUArrayMemory (m_BgdParam.iBgdWidth, m_BgdParam.iBgdHeight); + m_iLargestFrameSize = iCurFrameSize; + } + + if (m_BgdParam.pOU_array == NULL) + return eReturn; + + BackgroundDetection (&m_BgdParam); + + return RET_SUCCESS; +} + +EResult CBackgroundDetection::Set (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + + SBGDInterface* pInterface = (SBGDInterface*)pParam; + + m_BgdParam.pBackgroundMbFlag = (int8_t*)pInterface->pBackgroundMbFlag; + m_BgdParam.pCalcRes = pInterface->pCalcRes; + + return RET_SUCCESS; +} + +inline SBackgroundOU* CBackgroundDetection::AllocateOUArrayMemory (int32_t iWidth, int32_t iHeight) { + int32_t iMaxOUWidth = (BGD_OU_SIZE - 1 + iWidth) >> LOG2_BGD_OU_SIZE; + int32_t iMaxOUHeight = (BGD_OU_SIZE - 1 + iHeight) >> LOG2_BGD_OU_SIZE; + return (SBackgroundOU*)WelsMalloc (iMaxOUWidth * iMaxOUHeight * sizeof (SBackgroundOU)); +} + +void CBackgroundDetection::GetOUParameters (SVAACalcResult* sVaaCalcInfo, int32_t iMbIndex, int32_t iMbWidth, + SBackgroundOU* pBgdOU) { + int32_t iSubSD[4]; + uint8_t iSubMAD[4]; + int32_t iSubSAD[4]; + + uint8_t (*pMad8x8)[4]; + int32_t (*pSad8x8)[4]; + int32_t (*pSd8x8)[4]; + + pSad8x8 = sVaaCalcInfo->pSad8x8; + pMad8x8 = sVaaCalcInfo->pMad8x8; + pSd8x8 = sVaaCalcInfo->pSumOfDiff8x8; + + iSubSAD[0] = pSad8x8[iMbIndex][0]; + iSubSAD[1] = pSad8x8[iMbIndex][1]; + iSubSAD[2] = pSad8x8[iMbIndex][2]; + iSubSAD[3] = pSad8x8[iMbIndex][3]; + + iSubSD[0] = pSd8x8[iMbIndex][0]; + iSubSD[1] = pSd8x8[iMbIndex][1]; + iSubSD[2] = pSd8x8[iMbIndex][2]; + iSubSD[3] = pSd8x8[iMbIndex][3]; + + iSubMAD[0] = pMad8x8[iMbIndex][0]; + iSubMAD[1] = pMad8x8[iMbIndex][1]; + iSubMAD[2] = pMad8x8[iMbIndex][2]; + iSubMAD[3] = pMad8x8[iMbIndex][3]; + + pBgdOU->iSD = iSubSD[0] + iSubSD[1] + iSubSD[2] + iSubSD[3]; + pBgdOU->iSAD = iSubSAD[0] + iSubSAD[1] + iSubSAD[2] + iSubSAD[3]; + pBgdOU->iSD = WELS_ABS (pBgdOU->iSD); + + // get the max absolute difference (MAD) of OU and min value of the MAD of sub-blocks of OU + pBgdOU->iMAD = WELS_MAX (WELS_MAX (iSubMAD[0], iSubMAD[1]), WELS_MAX (iSubMAD[2], iSubMAD[3])); + pBgdOU->iMinSubMad = WELS_MIN (WELS_MIN (iSubMAD[0], iSubMAD[1]), WELS_MIN (iSubMAD[2], iSubMAD[3])); + + // get difference between the max and min SD of the SDs of sub-blocks of OU + pBgdOU->iMaxDiffSubSd = WELS_MAX (WELS_MAX (iSubSD[0], iSubSD[1]), WELS_MAX (iSubSD[2], iSubSD[3])) - + WELS_MIN (WELS_MIN (iSubSD[0], iSubSD[1]), WELS_MIN (iSubSD[2], iSubSD[3])); +} + +void CBackgroundDetection::ForegroundBackgroundDivision (vBGDParam* pBgdParam) { + int32_t iPicWidthInOU = pBgdParam->iBgdWidth >> LOG2_BGD_OU_SIZE; + int32_t iPicHeightInOU = pBgdParam->iBgdHeight >> LOG2_BGD_OU_SIZE; + int32_t iPicWidthInMb = (15 + pBgdParam->iBgdWidth) >> 4; + + SBackgroundOU* pBackgroundOU = pBgdParam->pOU_array; + + for (int32_t j = 0; j < iPicHeightInOU; j ++) { + for (int32_t i = 0; i < iPicWidthInOU; i++) { + GetOUParameters (pBgdParam->pCalcRes, (j * iPicWidthInMb + i) << (LOG2_BGD_OU_SIZE - LOG2_MB_SIZE), iPicWidthInMb, + pBackgroundOU); + + pBackgroundOU->iBackgroundFlag = 0; + if (pBackgroundOU->iMAD > 63) { + pBackgroundOU++; + continue; + } + if ((pBackgroundOU->iMaxDiffSubSd <= pBackgroundOU->iSAD >> 3 + || pBackgroundOU->iMaxDiffSubSd <= (BGD_OU_SIZE * Q_FACTOR)) + && pBackgroundOU->iSAD < (BGD_THD_SAD << 1)) { //BGD_OU_SIZE*BGD_OU_SIZE>>2 + if (pBackgroundOU->iSAD <= BGD_OU_SIZE * Q_FACTOR) { + pBackgroundOU->iBackgroundFlag = 1; + } else { + pBackgroundOU->iBackgroundFlag = pBackgroundOU->iSAD < BGD_THD_SAD ? + (pBackgroundOU->iSD < (pBackgroundOU->iSAD * 3) >> 2) : + (pBackgroundOU->iSD << 1 < pBackgroundOU->iSAD); + } + } + pBackgroundOU++; + } + } +} +inline int32_t CBackgroundDetection::CalculateAsdChromaEdge (uint8_t* pOriRef, uint8_t* pOriCur, int32_t iStride) { + int32_t ASD = 0; + int32_t idx; + for (idx = 0; idx < BGD_OU_SIZE_UV; idx++) { + ASD += *pOriCur - *pOriRef; + pOriRef += iStride; + pOriCur += iStride; + } + return WELS_ABS (ASD); +} + +inline bool CBackgroundDetection::ForegroundDilation23Luma (SBackgroundOU* pBackgroundOU, + SBackgroundOU* pOUNeighbours[]) { + SBackgroundOU* pOU_L = pOUNeighbours[0]; + SBackgroundOU* pOU_R = pOUNeighbours[1]; + SBackgroundOU* pOU_U = pOUNeighbours[2]; + SBackgroundOU* pOU_D = pOUNeighbours[3]; + + if (pBackgroundOU->iMAD > pBackgroundOU->iMinSubMad << 1) { + int32_t iMaxNbrForegroundMad; + int32_t iMaxNbrBackgroundMad; + int32_t aBackgroundMad[4]; + int32_t aForegroundMad[4]; + + aForegroundMad[0] = (pOU_L->iBackgroundFlag - 1) & pOU_L->iMAD; + aForegroundMad[1] = (pOU_R->iBackgroundFlag - 1) & pOU_R->iMAD; + aForegroundMad[2] = (pOU_U->iBackgroundFlag - 1) & pOU_U->iMAD; + aForegroundMad[3] = (pOU_D->iBackgroundFlag - 1) & pOU_D->iMAD; + iMaxNbrForegroundMad = WELS_MAX (WELS_MAX (aForegroundMad[0], aForegroundMad[1]), WELS_MAX (aForegroundMad[2], + aForegroundMad[3])); + + aBackgroundMad[0] = ((!pOU_L->iBackgroundFlag) - 1) & pOU_L->iMAD; + aBackgroundMad[1] = ((!pOU_R->iBackgroundFlag) - 1) & pOU_R->iMAD; + aBackgroundMad[2] = ((!pOU_U->iBackgroundFlag) - 1) & pOU_U->iMAD; + aBackgroundMad[3] = ((!pOU_D->iBackgroundFlag) - 1) & pOU_D->iMAD; + iMaxNbrBackgroundMad = WELS_MAX (WELS_MAX (aBackgroundMad[0], aBackgroundMad[1]), WELS_MAX (aBackgroundMad[2], + aBackgroundMad[3])); + + return ((iMaxNbrForegroundMad > pBackgroundOU->iMinSubMad << 2) || (pBackgroundOU->iMAD > iMaxNbrBackgroundMad << 1 + && pBackgroundOU->iMAD <= (iMaxNbrForegroundMad * 3) >> 1)); + } + return 0; +} + +inline bool CBackgroundDetection::ForegroundDilation23Chroma (int8_t iNeighbourForegroundFlags, + int32_t iStartSamplePos, int32_t iPicStrideUV, vBGDParam* pBgdParam) { + static const int8_t kaOUPos[4] = {OU_LEFT, OU_RIGHT, OU_TOP, OU_BOTTOM}; + int32_t aEdgeOffset[4] = {0, BGD_OU_SIZE_UV - 1, 0, iPicStrideUV* (BGD_OU_SIZE_UV - 1)}; + int32_t iStride[4] = {iPicStrideUV, iPicStrideUV, 1, 1}; + + // V component first, high probability because V stands for red color and human skin colors have more weight on this component + for (int32_t i = 0; i < 4; i++) { + if (iNeighbourForegroundFlags & kaOUPos[i]) { + uint8_t* pRefC = pBgdParam->pRef[2] + iStartSamplePos + aEdgeOffset[i]; + uint8_t* pCurC = pBgdParam->pCur[2] + iStartSamplePos + aEdgeOffset[i]; + if (CalculateAsdChromaEdge (pRefC, pCurC, iStride[i]) > BGD_THD_ASD_UV) { + return 1; + } + } + } + // U component, which stands for blue color, low probability + for (int32_t i = 0; i < 4; i++) { + if (iNeighbourForegroundFlags & kaOUPos[i]) { + uint8_t* pRefC = pBgdParam->pRef[1] + iStartSamplePos + aEdgeOffset[i]; + uint8_t* pCurC = pBgdParam->pCur[1] + iStartSamplePos + aEdgeOffset[i]; + if (CalculateAsdChromaEdge (pRefC, pCurC, iStride[i]) > BGD_THD_ASD_UV) { + return 1; + } + } + } + + return 0; +} + +inline void CBackgroundDetection::ForegroundDilation (SBackgroundOU* pBackgroundOU, SBackgroundOU* pOUNeighbours[], + vBGDParam* pBgdParam, int32_t iChromaSampleStartPos) { + int32_t iPicStrideUV = pBgdParam->iStride[1]; + int32_t iSumNeighBackgroundFlags = pOUNeighbours[0]->iBackgroundFlag + pOUNeighbours[1]->iBackgroundFlag + + pOUNeighbours[2]->iBackgroundFlag + pOUNeighbours[3]->iBackgroundFlag; + + if (pBackgroundOU->iSAD > BGD_OU_SIZE * Q_FACTOR) { + switch (iSumNeighBackgroundFlags) { + case 0: + case 1: + pBackgroundOU->iBackgroundFlag = 0; + break; + case 2: + case 3: + pBackgroundOU->iBackgroundFlag = !ForegroundDilation23Luma (pBackgroundOU, pOUNeighbours); + + // chroma component check + if (pBackgroundOU->iBackgroundFlag == 1) { + int8_t iNeighbourForegroundFlags = (!pOUNeighbours[0]->iBackgroundFlag) | ((!pOUNeighbours[1]->iBackgroundFlag) << 1) + | ((!pOUNeighbours[2]->iBackgroundFlag) << 2) | ((!pOUNeighbours[3]->iBackgroundFlag) << 3); + pBackgroundOU->iBackgroundFlag = !ForegroundDilation23Chroma (iNeighbourForegroundFlags, iChromaSampleStartPos, + iPicStrideUV, pBgdParam); + } + break; + default: + break; + } + } +} +inline void CBackgroundDetection::BackgroundErosion (SBackgroundOU* pBackgroundOU, SBackgroundOU* pOUNeighbours[]) { + if (pBackgroundOU->iMaxDiffSubSd <= (BGD_OU_SIZE * Q_FACTOR)) { //BGD_OU_SIZE*BGD_OU_SIZE>>2 + int32_t iSumNeighBackgroundFlags = pOUNeighbours[0]->iBackgroundFlag + pOUNeighbours[1]->iBackgroundFlag + + pOUNeighbours[2]->iBackgroundFlag + pOUNeighbours[3]->iBackgroundFlag; + int32_t sumNbrBGsad = (pOUNeighbours[0]->iSAD & (-pOUNeighbours[0]->iBackgroundFlag)) + (pOUNeighbours[2]->iSAD & + (-pOUNeighbours[2]->iBackgroundFlag)) + + (pOUNeighbours[1]->iSAD & (-pOUNeighbours[1]->iBackgroundFlag)) + (pOUNeighbours[3]->iSAD & + (-pOUNeighbours[3]->iBackgroundFlag)); + if (pBackgroundOU->iSAD * iSumNeighBackgroundFlags <= (3 * sumNbrBGsad) >> 1) { + if (iSumNeighBackgroundFlags == 4) { + pBackgroundOU->iBackgroundFlag = 1; + } else { + if ((pOUNeighbours[0]->iBackgroundFlag & pOUNeighbours[1]->iBackgroundFlag) + || (pOUNeighbours[2]->iBackgroundFlag & pOUNeighbours[3]->iBackgroundFlag)) { + pBackgroundOU->iBackgroundFlag = !ForegroundDilation23Luma (pBackgroundOU, pOUNeighbours); + } + } + } + } +} + +inline void CBackgroundDetection::SetBackgroundMbFlag (int8_t* pBackgroundMbFlag, int32_t iPicWidthInMb, + int32_t iBackgroundMbFlag) { + *pBackgroundMbFlag = iBackgroundMbFlag; +} + +inline void CBackgroundDetection::UpperOUForegroundCheck (SBackgroundOU* pCurOU, int8_t* pBackgroundMbFlag, + int32_t iPicWidthInOU, int32_t iPicWidthInMb) { + if (pCurOU->iSAD > BGD_OU_SIZE * Q_FACTOR) { + SBackgroundOU* pOU_L = pCurOU - 1; + SBackgroundOU* pOU_R = pCurOU + 1; + SBackgroundOU* pOU_U = pCurOU - iPicWidthInOU; + SBackgroundOU* pOU_D = pCurOU + iPicWidthInOU; + if (pOU_L->iBackgroundFlag + pOU_R->iBackgroundFlag + pOU_U->iBackgroundFlag + pOU_D->iBackgroundFlag <= 1) { + SetBackgroundMbFlag (pBackgroundMbFlag, iPicWidthInMb, 0); + pCurOU->iBackgroundFlag = 0; + } + } +} + +void CBackgroundDetection::ForegroundDilationAndBackgroundErosion (vBGDParam* pBgdParam) { + int32_t iPicStrideUV = pBgdParam->iStride[1]; + int32_t iPicWidthInOU = pBgdParam->iBgdWidth >> LOG2_BGD_OU_SIZE; + int32_t iPicHeightInOU = pBgdParam->iBgdHeight >> LOG2_BGD_OU_SIZE; + int32_t iOUStrideUV = iPicStrideUV << (LOG2_BGD_OU_SIZE - 1); + int32_t iPicWidthInMb = (15 + pBgdParam->iBgdWidth) >> 4; + + SBackgroundOU* pBackgroundOU = pBgdParam->pOU_array; + int8_t* pVaaBackgroundMbFlag = (int8_t*)pBgdParam->pBackgroundMbFlag; + SBackgroundOU* pOUNeighbours[4];//0: left; 1: right; 2: top; 3: bottom + + pOUNeighbours[2] = pBackgroundOU;//top OU + for (int32_t j = 0; j < iPicHeightInOU; j ++) { + int8_t* pRowSkipFlag = pVaaBackgroundMbFlag; + pOUNeighbours[0] = pBackgroundOU;//left OU + pOUNeighbours[3] = pBackgroundOU + (iPicWidthInOU & ((j == iPicHeightInOU - 1) - 1)); //bottom OU + for (int32_t i = 0; i < iPicWidthInOU; i++) { + pOUNeighbours[1] = pBackgroundOU + (i < iPicWidthInOU - 1); //right OU + + if (pBackgroundOU->iBackgroundFlag) + ForegroundDilation (pBackgroundOU, pOUNeighbours, pBgdParam, j * iOUStrideUV + (i << LOG2_BGD_OU_SIZE_UV)); + else + BackgroundErosion (pBackgroundOU, pOUNeighbours); + + // check the up OU + if (j > 1 && i > 0 && i < iPicWidthInOU - 1 && pOUNeighbours[2]->iBackgroundFlag == 1) { + UpperOUForegroundCheck (pOUNeighbours[2], pRowSkipFlag - OU_SIZE_IN_MB * iPicWidthInMb, iPicWidthInOU, iPicWidthInMb); + } + + SetBackgroundMbFlag (pRowSkipFlag, iPicWidthInMb, pBackgroundOU->iBackgroundFlag); + + // preparation for the next OU + pRowSkipFlag += OU_SIZE_IN_MB; + pOUNeighbours[0] = pBackgroundOU; + pOUNeighbours[2]++; + pOUNeighbours[3]++; + pBackgroundOU++; + } + pOUNeighbours[2] = pBackgroundOU - iPicWidthInOU; + pVaaBackgroundMbFlag += OU_SIZE_IN_MB * iPicWidthInMb; + } +} + +void CBackgroundDetection::BackgroundDetection (vBGDParam* pBgdParam) { + // 1st step: foreground/background coarse division + ForegroundBackgroundDivision (pBgdParam); + + // 2nd step: foreground dilation and background erosion + ForegroundDilationAndBackgroundErosion (pBgdParam); +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/backgrounddetection/BackgroundDetection.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/backgrounddetection/BackgroundDetection.h new file mode 100644 index 000000000..80d916ddc --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/backgrounddetection/BackgroundDetection.h @@ -0,0 +1,105 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : BackgroundDetection.h + * + * \brief : background detection class of wels video processor class + * + * \date : 2011/03/17 + * + * \description : 1. rewrite the package code of background detection class + * + */ + +#ifndef WELSVP_BACKGROUNDDETECTION_H +#define WELSVP_BACKGROUNDDETECTION_H + +#include "util.h" +#include "memory.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" + +WELSVP_NAMESPACE_BEGIN + +typedef struct { + int32_t iBackgroundFlag; + int32_t iSAD; + int32_t iSD; + int32_t iMAD; + int32_t iMinSubMad; + int32_t iMaxDiffSubSd; +} SBackgroundOU; + +class CBackgroundDetection : public IStrategy { + public: + CBackgroundDetection (int32_t iCpuFlag); + ~CBackgroundDetection(); + + EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* pRef); + EResult Set (int32_t iType, void* pParam); + + private: + struct vBGDParam { + uint8_t* pCur[3]; + uint8_t* pRef[3]; + int32_t iBgdWidth; + int32_t iBgdHeight; + int32_t iStride[3]; + SBackgroundOU* pOU_array; + int8_t* pBackgroundMbFlag; + SVAACalcResult* pCalcRes; + } m_BgdParam; + + int32_t m_iLargestFrameSize; + + private: + inline SBackgroundOU* AllocateOUArrayMemory (int32_t iWidth, int32_t iHeight); + inline int32_t CalculateAsdChromaEdge (uint8_t* pOriRef, uint8_t* pOriCur, int32_t iStride); + inline bool ForegroundDilation23Luma (SBackgroundOU* pBackgroundOU, + SBackgroundOU* pOUNeighbours[]); //Foreground_Dilation_2_3_Luma + inline bool ForegroundDilation23Chroma (int8_t iNeighbourForegroundFlags, int32_t iStartSamplePos, + int32_t iPicStrideUV, vBGDParam* pBgdParam);//Foreground_Dilation_2_3_Chroma + inline void ForegroundDilation (SBackgroundOU* pBackgroundOU, SBackgroundOU* pOUNeighbours[], vBGDParam* pBgdParam, + int32_t iChromaSampleStartPos); + inline void BackgroundErosion (SBackgroundOU* pBackgroundOU, SBackgroundOU* pOUNeighbours[]); + inline void SetBackgroundMbFlag (int8_t* pBackgroundMbFlag, int32_t iPicWidthInMb, int32_t iBackgroundMbFlag); + inline void UpperOUForegroundCheck (SBackgroundOU* pCurOU, int8_t* pBackgroundMbFlag, int32_t iPicWidthInOU, + int32_t iPicWidthInMb); + + void GetOUParameters (SVAACalcResult* sVaaCalcInfo, int32_t iMbIndex, int32_t iMbWidth, + SBackgroundOU* pBackgroundOU); + void ForegroundBackgroundDivision (vBGDParam* pBgdParam); + void ForegroundDilationAndBackgroundErosion (vBGDParam* pBgdParam); + void BackgroundDetection (vBGDParam* pBgdParam); +}; + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWork.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWork.cpp new file mode 100644 index 000000000..79e196b79 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWork.cpp @@ -0,0 +1,302 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "WelsFrameWork.h" +#include "../denoise/denoise.h" +#include "../downsample/downsample.h" +#include "../scrolldetection/ScrollDetection.h" +#include "../scenechangedetection/SceneChangeDetection.h" +#include "../vaacalc/vaacalculation.h" +#include "../backgrounddetection/BackgroundDetection.h" +#include "../adaptivequantization/AdaptiveQuantization.h" +#include "../complexityanalysis/ComplexityAnalysis.h" +#include "../imagerotate/imagerotate.h" +#include "util.h" + +/* interface API implement */ + +EResult WelsCreateVpInterface (void** ppCtx, int iVersion) { + if (iVersion & 0x8000) + return WelsVP::CreateSpecificVpInterface ((IWelsVP**)ppCtx); + else if (iVersion & 0x7fff) + return WelsVP::CreateSpecificVpInterface ((IWelsVPc**)ppCtx); + else + return RET_INVALIDPARAM; +} + +EResult WelsDestroyVpInterface (void* pCtx, int iVersion) { + if (iVersion & 0x8000) + return WelsVP::DestroySpecificVpInterface ((IWelsVP*)pCtx); + else if (iVersion & 0x7fff) + return WelsVP::DestroySpecificVpInterface ((IWelsVPc*)pCtx); + else + return RET_INVALIDPARAM; +} + +WELSVP_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////// + +EResult CreateSpecificVpInterface (IWelsVP** ppCtx) { + EResult eReturn = RET_FAILED; + + CVpFrameWork* pFr = new CVpFrameWork (1, eReturn); + if (pFr) { + *ppCtx = (IWelsVP*)pFr; + eReturn = RET_SUCCESS; + } + + return eReturn; +} + +EResult DestroySpecificVpInterface (IWelsVP* pCtx) { + delete pCtx; + + return RET_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// + +CVpFrameWork::CVpFrameWork (uint32_t uiThreadsNum, EResult& eReturn) { + int32_t iCoreNum = 1; + uint32_t uiCPUFlag = WelsCPUFeatureDetect (&iCoreNum); + + for (int32_t i = 0; i < MAX_STRATEGY_NUM; i++) { + m_pStgChain[i] = CreateStrategy (WelsStaticCast (EMethods, i + 1), uiCPUFlag); + } + + WelsMutexInit (&m_mutes); + + eReturn = RET_SUCCESS; +} + +CVpFrameWork::~CVpFrameWork() { + for (int32_t i = 0; i < MAX_STRATEGY_NUM; i++) { + if (m_pStgChain[i]) { + Uninit (m_pStgChain[i]->m_eMethod); + delete m_pStgChain[i]; + } + } + + WelsMutexDestroy (&m_mutes); +} + +EResult CVpFrameWork::Init (int32_t iType, void* pCfg) { + EResult eReturn = RET_SUCCESS; + int32_t iCurIdx = WelsStaticCast (int32_t, WelsVpGetValidMethod (iType)) - 1; + + Uninit (iType); + + WelsMutexLock (&m_mutes); + + IStrategy* pStrategy = m_pStgChain[iCurIdx]; + if (pStrategy) + eReturn = pStrategy->Init (0, pCfg); + + WelsMutexUnlock (&m_mutes); + + return eReturn; +} + +EResult CVpFrameWork::Uninit (int32_t iType) { + EResult eReturn = RET_SUCCESS; + int32_t iCurIdx = WelsStaticCast (int32_t, WelsVpGetValidMethod (iType)) - 1; + + WelsMutexLock (&m_mutes); + + IStrategy* pStrategy = m_pStgChain[iCurIdx]; + if (pStrategy) + eReturn = pStrategy->Uninit (0); + + WelsMutexUnlock (&m_mutes); + + return eReturn; +} + +EResult CVpFrameWork::Flush (int32_t iType) { + EResult eReturn = RET_SUCCESS; + + return eReturn; +} + +EResult CVpFrameWork::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pDstPixMap) { + EResult eReturn = RET_NOTSUPPORTED; + EMethods eMethod = WelsVpGetValidMethod (iType); + int32_t iCurIdx = WelsStaticCast (int32_t, eMethod) - 1; + SPixMap sSrcPic; + SPixMap sDstPic; + memset (&sSrcPic, 0, sizeof (sSrcPic)); // confirmed_safe_unsafe_usage + memset (&sDstPic, 0, sizeof (sDstPic)); // confirmed_safe_unsafe_usage + + if (pSrcPixMap) sSrcPic = *pSrcPixMap; + if (pDstPixMap) sDstPic = *pDstPixMap; + if (!CheckValid (eMethod, sSrcPic, sDstPic)) + return RET_INVALIDPARAM; + + WelsMutexLock (&m_mutes); + + IStrategy* pStrategy = m_pStgChain[iCurIdx]; + if (pStrategy) + eReturn = pStrategy->Process (0, &sSrcPic, &sDstPic); + + WelsMutexUnlock (&m_mutes); + + return eReturn; +} + +EResult CVpFrameWork::Get (int32_t iType, void* pParam) { + EResult eReturn = RET_SUCCESS; + int32_t iCurIdx = WelsStaticCast (int32_t, WelsVpGetValidMethod (iType)) - 1; + + if (!pParam) + return RET_INVALIDPARAM; + + WelsMutexLock (&m_mutes); + + IStrategy* pStrategy = m_pStgChain[iCurIdx]; + if (pStrategy) + eReturn = pStrategy->Get (0, pParam); + + WelsMutexUnlock (&m_mutes); + + return eReturn; +} + +EResult CVpFrameWork::Set (int32_t iType, void* pParam) { + EResult eReturn = RET_SUCCESS; + int32_t iCurIdx = WelsStaticCast (int32_t, WelsVpGetValidMethod (iType)) - 1; + + if (!pParam) + return RET_INVALIDPARAM; + + WelsMutexLock (&m_mutes); + + IStrategy* pStrategy = m_pStgChain[iCurIdx]; + if (pStrategy) + eReturn = pStrategy->Set (0, pParam); + + WelsMutexUnlock (&m_mutes); + + return eReturn; +} + +EResult CVpFrameWork::SpecialFeature (int32_t iType, void* pIn, void* pOut) { + EResult eReturn = RET_SUCCESS; + + return eReturn; +} + +bool CVpFrameWork::CheckValid (EMethods eMethod, SPixMap& pSrcPixMap, SPixMap& pDstPixMap) { + bool eReturn = false; + + if (eMethod == METHOD_NULL) + goto exit; + + if (eMethod != METHOD_COLORSPACE_CONVERT) { + if (pSrcPixMap.pPixel[0]) { + if (pSrcPixMap.eFormat != VIDEO_FORMAT_I420 && pSrcPixMap.eFormat != VIDEO_FORMAT_YV12) + goto exit; + } + if (pSrcPixMap.pPixel[0] && pDstPixMap.pPixel[0]) { + if (pDstPixMap.eFormat != pSrcPixMap.eFormat) + goto exit; + } + } + + if (pSrcPixMap.pPixel[0]) { + if (pSrcPixMap.sRect.iRectWidth <= 0 || pSrcPixMap.sRect.iRectHeight <= 0 + || pSrcPixMap.sRect.iRectWidth * pSrcPixMap.sRect.iRectHeight > (MAX_MBS_PER_FRAME << 8)) + goto exit; + if (pSrcPixMap.sRect.iRectTop >= pSrcPixMap.sRect.iRectHeight + || pSrcPixMap.sRect.iRectLeft >= pSrcPixMap.sRect.iRectWidth || pSrcPixMap.sRect.iRectWidth > pSrcPixMap.iStride[0]) + goto exit; + } + if (pDstPixMap.pPixel[0]) { + if (pDstPixMap.sRect.iRectWidth <= 0 || pDstPixMap.sRect.iRectHeight <= 0 + || pDstPixMap.sRect.iRectWidth * pDstPixMap.sRect.iRectHeight > (MAX_MBS_PER_FRAME << 8)) + goto exit; + if (pDstPixMap.sRect.iRectTop >= pDstPixMap.sRect.iRectHeight + || pDstPixMap.sRect.iRectLeft >= pDstPixMap.sRect.iRectWidth || pDstPixMap.sRect.iRectWidth > pDstPixMap.iStride[0]) + goto exit; + } + eReturn = true; + +exit: + return eReturn; +} + +IStrategy* CVpFrameWork::CreateStrategy (EMethods m_eMethod, int32_t iCpuFlag) { + IStrategy* pStrategy = NULL; + + switch (m_eMethod) { + case METHOD_COLORSPACE_CONVERT: + //not support yet + break; + case METHOD_DENOISE: + pStrategy = WelsDynamicCast (IStrategy*, new CDenoiser (iCpuFlag)); + break; + case METHOD_SCROLL_DETECTION: + pStrategy = WelsDynamicCast (IStrategy*, new CScrollDetection (iCpuFlag)); + break; + case METHOD_SCENE_CHANGE_DETECTION_VIDEO: + case METHOD_SCENE_CHANGE_DETECTION_SCREEN: + pStrategy = BuildSceneChangeDetection (m_eMethod, iCpuFlag); + break; + case METHOD_DOWNSAMPLE: + pStrategy = WelsDynamicCast (IStrategy*, new CDownsampling (iCpuFlag)); + break; + case METHOD_VAA_STATISTICS: + pStrategy = WelsDynamicCast (IStrategy*, new CVAACalculation (iCpuFlag)); + break; + case METHOD_BACKGROUND_DETECTION: + pStrategy = WelsDynamicCast (IStrategy*, new CBackgroundDetection (iCpuFlag)); + break; + case METHOD_ADAPTIVE_QUANT: + pStrategy = WelsDynamicCast (IStrategy*, new CAdaptiveQuantization (iCpuFlag)); + break; + case METHOD_COMPLEXITY_ANALYSIS: + pStrategy = WelsDynamicCast (IStrategy*, new CComplexityAnalysis (iCpuFlag)); + break; + case METHOD_COMPLEXITY_ANALYSIS_SCREEN: + pStrategy = WelsDynamicCast (IStrategy*, new CComplexityAnalysisScreen (iCpuFlag)); + break; + case METHOD_IMAGE_ROTATE: + pStrategy = WelsDynamicCast (IStrategy*, new CImageRotating (iCpuFlag)); + break; + default: + break; + } + + return pStrategy; +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWork.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWork.h new file mode 100644 index 000000000..6b1636530 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWork.h @@ -0,0 +1,130 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : WelsFrameWork.h + * + * \brief : framework of wels video processor class + * + * \date : 2011/01/04 + * + * \description : + * + ************************************************************************************* + */ + +#ifndef WELSVP_WELSFRAMEWORK_H +#define WELSVP_WELSFRAMEWORK_H + +#include "IWelsVP.h" +#include "util.h" +#include "WelsThreadLib.h" + +WELSVP_NAMESPACE_BEGIN + +EResult CreateSpecificVpInterface (IWelsVP** ppCtx); +EResult DestroySpecificVpInterface (IWelsVP* pCtx); + +EResult CreateSpecificVpInterface (IWelsVPc** ppCtx); +EResult DestroySpecificVpInterface (IWelsVPc* pCtx); + +#define MAX_STRATEGY_NUM (METHOD_MASK - 1) + +class IStrategy : public IWelsVP { + public: + IStrategy() { + m_eMethod = METHOD_NULL; + m_eFormat = VIDEO_FORMAT_I420; + m_iIndex = 0; + m_bInit = false; + } + + virtual ~IStrategy() {} + + public: + virtual EResult Init (int32_t iType, void* pCfg) { + return RET_SUCCESS; + } + virtual EResult Uninit (int32_t iType) { + return RET_SUCCESS; + } + virtual EResult Flush (int32_t iType) { + return RET_SUCCESS; + } + virtual EResult Get (int32_t iType, void* pParam) { + return RET_SUCCESS; + } + virtual EResult Set (int32_t iType, void* pParam) { + return RET_SUCCESS; + } + virtual EResult SpecialFeature (int32_t iType, void* pIn, void* pOut) { + return RET_SUCCESS; + } + virtual EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* pDst) = 0; + + public: + EMethods m_eMethod; + EVideoFormat m_eFormat; + int32_t m_iIndex; + bool m_bInit; +}; + +class CVpFrameWork : public IWelsVP { + public: + CVpFrameWork (uint32_t uiThreadsNum, EResult& ret); + ~CVpFrameWork(); + + public: + EResult Init (int32_t iType, void* pCfg); + + EResult Uninit (int32_t iType); + + EResult Flush (int32_t iType); + + EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* pDst); + + EResult Get (int32_t iType, void* pParam); + + EResult Set (int32_t iType, void* pParam); + + EResult SpecialFeature (int32_t iType, void* pIn, void* pOut); + + private: + bool CheckValid (EMethods eMethod, SPixMap& sSrc, SPixMap& sDst); + IStrategy* CreateStrategy (EMethods eMethod, int32_t iCpuFlag); + + private: + IStrategy* m_pStgChain[MAX_STRATEGY_NUM]; + + WELS_MUTEX m_mutes; +}; + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWorkEx.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWorkEx.cpp new file mode 100644 index 000000000..1681a1453 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsFrameWorkEx.cpp @@ -0,0 +1,96 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "WelsFrameWork.h" + +/////////////////////////////////////////////////////////////////////// + +WELSVP_NAMESPACE_BEGIN + +EResult Init (void* pCtx, int32_t iType, void* pCfg) { + return pCtx ? WelsStaticCast (IWelsVP*, pCtx)->Init (iType, pCfg) : RET_INVALIDPARAM; +} +EResult Uninit (void* pCtx, int32_t iType) { + return pCtx ? WelsStaticCast (IWelsVP*, pCtx)->Uninit (iType) : RET_INVALIDPARAM; +} +EResult Flush (void* pCtx, int32_t iType) { + return pCtx ? WelsStaticCast (IWelsVP*, pCtx)->Flush (iType) : RET_INVALIDPARAM; +} +EResult Process (void* pCtx, int32_t iType, SPixMap* pSrc, SPixMap* dst) { + return pCtx ? WelsStaticCast (IWelsVP*, pCtx)->Process (iType, pSrc, dst) : RET_INVALIDPARAM; +} +EResult Get (void* pCtx, int32_t iType, void* pParam) { + return pCtx ? WelsStaticCast (IWelsVP*, pCtx)->Get (iType, pParam) : RET_INVALIDPARAM; +} +EResult Set (void* pCtx, int32_t iType, void* pParam) { + return pCtx ? WelsStaticCast (IWelsVP*, pCtx)->Set (iType, pParam) : RET_INVALIDPARAM; +} +EResult SpecialFeature (void* pCtx, int32_t iType, void* pIn, void* pOut) { + return pCtx ? WelsStaticCast (IWelsVP*, pCtx)->SpecialFeature (iType, pIn, pOut) : RET_INVALIDPARAM; +} + +/////////////////////////////////////////////////////////////////////////////// + +EResult CreateSpecificVpInterface (IWelsVPc** pCtx) { + EResult ret = RET_FAILED; + IWelsVP* pWelsVP = NULL; + + ret = CreateSpecificVpInterface (&pWelsVP); + if (ret == RET_SUCCESS) { + IWelsVPc* pVPc = new IWelsVPc; + if (pVPc) { + pVPc->Init = Init; + pVPc->Uninit = Uninit; + pVPc->Flush = Flush; + pVPc->Process = Process; + pVPc->Get = Get; + pVPc->Set = Set; + pVPc->SpecialFeature = SpecialFeature; + pVPc->pCtx = WelsStaticCast (void*, pWelsVP); + *pCtx = pVPc; + } else + ret = RET_OUTOFMEMORY; + } + + return ret; +} + +EResult DestroySpecificVpInterface (IWelsVPc* pCtx) { + if (pCtx) { + DestroySpecificVpInterface (WelsStaticCast (IWelsVP*, pCtx->pCtx)); + delete pCtx; + } + + return RET_SUCCESS; +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsVP.def b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsVP.def new file mode 100644 index 000000000..eebc91ee3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsVP.def @@ -0,0 +1,36 @@ +;*! +;* \copy +;* Copyright (c) 2011-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* + +LIBRARY welsvp.dll +EXPORTS + WelsCreateVpInterface + WelsDestroyVpInterface \ No newline at end of file diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsVP.rc b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsVP.rc new file mode 100644 index 000000000..0f7542297 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/WelsVP.rc @@ -0,0 +1,115 @@ +// Microsoft Visual C++ generated resource script. +// +#include "resource.h" + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "windows.h" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// Chinese (P.R.C.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_CHS) +#ifdef _WIN32 +LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED +#pragma code_page(936) +#endif //_WIN32 + +#ifdef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// TEXTINCLUDE +// + +1 TEXTINCLUDE +BEGIN + "resource.h\0" +END + +2 TEXTINCLUDE +BEGIN + "#include ""windows.h""\r\n" + "\0" +END + +3 TEXTINCLUDE +BEGIN + "\r\n" + "\0" +END + +#endif // APSTUDIO_INVOKED + +#endif // Chinese (P.R.C.) resources +///////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 0,0,0,0 + PRODUCTVERSION 0,0,0,0 + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x40004L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "Comments", "Cisco OpenH264 video preprocessing" + VALUE "CompanyName", "Cisco Systems" + VALUE "FileDescription", "Cisco OpenH264 video preprocessing" + VALUE "FileVersion", "0, 0, 0, 0" + VALUE "InternalName", "welsvp.dll" + VALUE "LegalCopyright", " 2011-2015 Cisco and/or its affiliates. All rights reserved." + VALUE "OriginalFilename", "welsvp.dll" + VALUE "ProductName", "Cisco OpenH264 video preprocessing" + VALUE "ProductVersion", "0, 0, 0, 0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// + + + +#ifndef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 3 resource. +// + + +///////////////////////////////////////////////////////////////////////////// +#endif // not APSTUDIO_INVOKED + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/common.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/common.h new file mode 100644 index 000000000..386248a2c --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/common.h @@ -0,0 +1,84 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : SceneChangeDetectionCommon.h + * + * \brief : scene change detection class of wels video processor class + * + * \date : 2011/03/14 + * + * \description : 1. rewrite the package code of scene change detection class + * + */ + +#ifndef WELSVP_COMMON_H +#define WELSVP_COMMON_H + +#include "util.h" +#include "memory.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" +#include "sad_common.h" +#include "intra_pred_common.h" + + + +typedef void (GetIntraPred) (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +typedef GetIntraPred* GetIntraPredPtr; + +GetIntraPred WelsI16x16LumaPredV_c; +GetIntraPred WelsI16x16LumaPredH_c; + +WELSVP_NAMESPACE_BEGIN + +typedef int32_t (SadFunc) (uint8_t* pSrcY, int32_t iSrcStrideY, uint8_t* pRefY, int32_t iRefStrideY); + +typedef SadFunc* SadFuncPtr; + +typedef int32_t (Sad16x16Func) (uint8_t* pSrcY, int32_t iSrcStrideY, uint8_t* pRefY, int32_t iRefStrideY); +typedef Sad16x16Func* PSad16x16Func; + + +#ifdef HAVE_NEON +WELSVP_EXTERN_C_BEGIN +int32_t WelsProcessingSampleSad8x8_neon (uint8_t*, int32_t, uint8_t*, int32_t); +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_NEON_AARCH64 +WELSVP_EXTERN_C_BEGIN +int32_t WelsProcessingSampleSad8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +WELSVP_EXTERN_C_END +#endif + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/memory.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/memory.cpp new file mode 100644 index 000000000..65ef5876f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/memory.cpp @@ -0,0 +1,117 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "memory.h" + +WELSVP_NAMESPACE_BEGIN +///////////////////////////////////////////////////////////////////////////////// + +void* WelsMalloc (const uint32_t kuiSize, char* pTag) { + const int32_t kiSizeVoidPointer = sizeof (void**); + const int32_t kiSizeInt32 = sizeof (int32_t); + const int32_t kiAlignedBytes = ALIGNBYTES - 1; + + uint8_t* pBuf = (uint8_t*) ::malloc (kuiSize + kiAlignedBytes + kiSizeVoidPointer + kiSizeInt32); + uint8_t* pAlignedBuf = NULL; + + if (NULL == pBuf) + return NULL; + + // to fill zero values + WelsMemset (pBuf, 0, kuiSize + kiAlignedBytes + kiSizeVoidPointer + kiSizeInt32); + + pAlignedBuf = pBuf + kiAlignedBytes + kiSizeVoidPointer + kiSizeInt32; + pAlignedBuf -= WelsCastFromPointer (pAlignedBuf) & kiAlignedBytes; + * ((void**) (pAlignedBuf - kiSizeVoidPointer)) = pBuf; + * ((int32_t*) (pAlignedBuf - (kiSizeVoidPointer + kiSizeInt32))) = kuiSize; + + return (pAlignedBuf); +} + +///////////////////////////////////////////////////////////////////////////// + +void WelsFree (void* pPointer, char* pTag) { + if (pPointer) { + ::free (* (((void**) pPointer) - 1)); + } +} + +///////////////////////////////////////////////////////////////////////////// + +void* InternalReallocate (void* pPointer, const uint32_t kuiSize, char* pTag) { + uint32_t iOldSize = 0; + uint8_t* pNew = NULL; + if (pPointer != NULL) + iOldSize = * ((int32_t*) ((uint8_t*) pPointer - sizeof (void**) - sizeof (int32_t))); + else + return WelsMalloc (kuiSize, pTag); + + pNew = (uint8_t*)WelsMalloc (kuiSize, pTag); + if (0 == pNew) { + if (iOldSize > 0 && kuiSize > 0 && iOldSize >= kuiSize) + return (pPointer); + return 0; + } else if (iOldSize > 0 && kuiSize > 0) + memcpy (pNew, pPointer, (iOldSize < kuiSize) ? iOldSize : kuiSize); + else + return 0; + + WelsFree (pPointer, pTag); + return (pNew); +} + +///////////////////////////////////////////////////////////////////////////// + +void* WelsRealloc (void* pPointer, uint32_t* pRealSize, const uint32_t kuiSize, char* pTag) { + const uint32_t kuiOldSize = *pRealSize; + uint32_t kuiNewSize = 0; + void* pLocalPointer = NULL; + if (kuiOldSize >= kuiSize) // large enough of original block, so do nothing + return (pPointer); + + // new request + kuiNewSize = kuiSize + 15; + kuiNewSize -= (kuiNewSize & 15); + kuiNewSize += 32; + + pLocalPointer = InternalReallocate (pPointer, kuiNewSize, pTag); + if (NULL != pLocalPointer) { + *pRealSize = kuiNewSize; + return (pLocalPointer); + } else { + return NULL; + } + + return NULL; // something wrong +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/memory.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/memory.h new file mode 100644 index 000000000..7448e1f2f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/memory.h @@ -0,0 +1,110 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : memory.h + * + * \brief : memory definition for wels video processor class + * + * \date : 2011/02/22 + * + * \description : + * + ************************************************************************************* + */ + +#ifndef WELSVP_MEMORY_H +#define WELSVP_MEMORY_H + +#include "util.h" +#include "typedef.h" + +WELSVP_NAMESPACE_BEGIN + +inline void* WelsMemset (void* pPointer, int32_t iValue, uint32_t uiSize) { + return ::memset (pPointer, iValue, uiSize); +} + +inline void* WelsMemcpy (void* pDst, const void* kpSrc, uint32_t uiSize) { + return ::memcpy (pDst, kpSrc, uiSize); +} + +inline int32_t WelsMemcmp (const void* kpBuf1, const void* kpBuf2, uint32_t uiSize) { + return ::memcmp (kpBuf1, kpBuf2, uiSize); +} + +/*! +************************************************************************************* +* \brief malloc with zero filled utilization in Wels +* +* \param i_size uiSize of memory block required +* +* \return allocated memory pointer exactly, failed in case of NULL return +* +* \note N/A +************************************************************************************* +*/ +void* WelsMalloc (const uint32_t kuiSize, char* pTag = NULL); + +/*! +************************************************************************************* +* \brief free utilization in Wels +* +* \param p data pointer to be free. +* i.e, uint8_t *p = actual data to be free, argv = &p. +* +* \return NONE +* +* \note N/A +************************************************************************************* +*/ +void WelsFree (void* pPointer, char* pTag = NULL); + +/*! +************************************************************************************* +* \brief reallocation in Wels. Do nothing and continue using old block +* in case the block is large enough currently +* +* \param p memory block required in old time +* \param i_size new uiSize of memory block requested +* \param sz_real pointer to the old uiSize of memory block +* +* \return reallocated memory pointer exactly, failed in case of NULL return +* +* \note N/A +************************************************************************************* +*/ +void* WelsRealloc (void* pPointer, uint32_t* pRealSize, const uint32_t kuiSize, char* pTag = NULL); + +////////////////////////////////////////////////////////////////////////////////////// +WELSVP_NAMESPACE_END + +#endif + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/resource.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/resource.h new file mode 100644 index 000000000..d0125dc7a --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/resource.h @@ -0,0 +1,15 @@ +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by WelsVP.rc +// + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 101 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1000 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/typedef.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/typedef.h new file mode 100644 index 000000000..c8420d334 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/typedef.h @@ -0,0 +1,56 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : typedef.h + * + * \brief : basic type definition + * + * \date : 2011/01/04 + * + * \description : 1. Define basic type with platform-independent; + * 2. Define specific namespace to avoid name pollution; + * 3. C++ ONLY; + * + ************************************************************************************* + */ + +#ifndef WELSVP_TYPEDEF_H +#define WELSVP_TYPEDEF_H + +#define WELSVP_EXTERN_C_BEGIN extern "C" { +#define WELSVP_EXTERN_C_END } + +#define WELSVP_NAMESPACE_BEGIN namespace WelsVP { +#define WELSVP_NAMESPACE_END } + +// Get the stdint type definitions from typedefs.h in the common lib +#include "typedefs.h" + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/util.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/util.h new file mode 100644 index 000000000..5ebaabb80 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/common/util.h @@ -0,0 +1,105 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : util.h + * + * \brief : utils for wels video processor class + * + * \date : 2011/01/04 + * + * \description : + * + ************************************************************************************* + */ + +#ifndef WELSVP_UTIL_H +#define WELSVP_UTIL_H + +#include +#include +#include +#include +#include + +#include "typedef.h" +#include "memory.h" +#include "IWelsVP.h" + +WELSVP_NAMESPACE_BEGIN + +#define MAX_MBS_PER_FRAME 36864 //in accordance with max level support in Rec + +#define MB_WIDTH_LUMA (16) +#define PESN (1e-6) // desired float precision +#define AQ_INT_MULTIPLY 10000000 +#define AQ_TIME_INT_MULTIPLY 10000 +#define AQ_QSTEP_INT_MULTIPLY 100 +#define AQ_PESN 10 // (1e-6)*AQ_INT_MULTIPLY + +#define MB_TYPE_INTRA4x4 0x00000001 +#define MB_TYPE_INTRA16x16 0x00000002 +#define MB_TYPE_INTRA_PCM 0x00000004 +#define MB_TYPE_INTRA (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM) +#define IS_INTRA(type) ((type)&MB_TYPE_INTRA) + +#define WELS_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define WELS_MIN(x, y) ((x) < (y) ? (x) : (y)) + +#ifndef WELS_SIGN +#define WELS_SIGN(a) ((int32_t)(a) >> 31) +#endif + +#ifndef WELS_ABS +#define WELS_ABS(a) ((WELS_SIGN(a) ^ (int32_t)(a)) - WELS_SIGN(a)) +#endif + +#define WELS_CLAMP(x, minv, maxv) WELS_MIN(WELS_MAX(x, minv), maxv) + +#define ALIGNBYTES (16) /* Worst case is requiring alignment to an 16 byte boundary */ + +#define WelsCastFromPointer(p) (reinterpret_cast(p)) +#define WelsStaticCast(type, p) (static_cast(p)) +#define WelsDynamicCast(type, p) (dynamic_cast(p)) + +#define GET_METHOD(x) ((x) & 0xff) // mask method as the lowest 8bits +#define GET_SPECIAL(x) (((x) >> 8) & 0xff) // mask special flag as 8bits + +inline EMethods WelsVpGetValidMethod (int32_t a) { + int32_t iMethod = GET_METHOD (a); + return WelsStaticCast (EMethods, WELS_CLAMP (iMethod, METHOD_NULL + 1, METHOD_MASK - 1)); +} + + + +WELSVP_NAMESPACE_END + +#endif + + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp new file mode 100644 index 000000000..428a08edf --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp @@ -0,0 +1,489 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "ComplexityAnalysis.h" +#include "cpu.h" +#include "macros.h" +#include "intra_pred_common.h" + +WELSVP_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +CComplexityAnalysis::CComplexityAnalysis (int32_t iCpuFlag) { + m_eMethod = METHOD_COMPLEXITY_ANALYSIS; + m_pfGomSad = NULL; + WelsMemset (&m_sComplexityAnalysisParam, 0, sizeof (m_sComplexityAnalysisParam)); +} + +CComplexityAnalysis::~CComplexityAnalysis() { +} + +EResult CComplexityAnalysis::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + EResult eReturn = RET_SUCCESS; + + switch (m_sComplexityAnalysisParam.iComplexityAnalysisMode) { + case FRAME_SAD: + AnalyzeFrameComplexityViaSad (pSrcPixMap, pRefPixMap); + break; + case GOM_SAD: + AnalyzeGomComplexityViaSad (pSrcPixMap, pRefPixMap); + break; + case GOM_VAR: + AnalyzeGomComplexityViaVar (pSrcPixMap, pRefPixMap); + break; + default: + eReturn = RET_INVALIDPARAM; + break; + } + + return eReturn; +} + + +EResult CComplexityAnalysis::Set (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + + m_sComplexityAnalysisParam = * (SComplexityAnalysisParam*)pParam; + + return RET_SUCCESS; +} + +EResult CComplexityAnalysis::Get (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + + SComplexityAnalysisParam* sComplexityAnalysisParam = (SComplexityAnalysisParam*)pParam; + + sComplexityAnalysisParam->iFrameComplexity = m_sComplexityAnalysisParam.iFrameComplexity; + + return RET_SUCCESS; +} + + +/////////////////////////////////////////////////////////////////////////////////////////////// +void CComplexityAnalysis::AnalyzeFrameComplexityViaSad (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + SVAACalcResult* pVaaCalcResults = NULL; + pVaaCalcResults = m_sComplexityAnalysisParam.pCalcResult; + + m_sComplexityAnalysisParam.iFrameComplexity = pVaaCalcResults->iFrameSad; + + if (m_sComplexityAnalysisParam.iCalcBgd) { //BGD control + m_sComplexityAnalysisParam.iFrameComplexity = GetFrameSadExcludeBackground (pSrcPixMap, pRefPixMap); + } +} + +int32_t CComplexityAnalysis::GetFrameSadExcludeBackground (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + int32_t iWidth = pSrcPixMap->sRect.iRectWidth; + int32_t iHeight = pSrcPixMap->sRect.iRectHeight; + int32_t iMbWidth = iWidth >> 4; + int32_t iMbHeight = iHeight >> 4; + int32_t iMbNum = iMbWidth * iMbHeight; + + int32_t iMbNumInGom = m_sComplexityAnalysisParam.iMbNumInGom; + int32_t iGomMbNum = (iMbNum + iMbNumInGom - 1) / iMbNumInGom; + int32_t iGomMbStartIndex = 0, iGomMbEndIndex = 0; + + uint8_t* pBackgroundMbFlag = (uint8_t*)m_sComplexityAnalysisParam.pBackgroundMbFlag; + uint32_t* uiRefMbType = (uint32_t*)m_sComplexityAnalysisParam.uiRefMbType; + SVAACalcResult* pVaaCalcResults = m_sComplexityAnalysisParam.pCalcResult; + int32_t* pGomForegroundBlockNum = m_sComplexityAnalysisParam.pGomForegroundBlockNum; + + uint32_t uiFrameSad = 0; + for (int32_t j = 0; j < iGomMbNum; j ++) { + iGomMbStartIndex = j * iMbNumInGom; + iGomMbEndIndex = WELS_MIN ((j + 1) * iMbNumInGom, iMbNum); + + for (int32_t i = iGomMbStartIndex; i < iGomMbEndIndex; i ++) { + if (pBackgroundMbFlag[i] == 0 || IS_INTRA (uiRefMbType[i])) { + pGomForegroundBlockNum[j]++; + uiFrameSad += pVaaCalcResults->pSad8x8[i][0]; + uiFrameSad += pVaaCalcResults->pSad8x8[i][1]; + uiFrameSad += pVaaCalcResults->pSad8x8[i][2]; + uiFrameSad += pVaaCalcResults->pSad8x8[i][3]; + } + } + } + + return (uiFrameSad); +} + + +void InitGomSadFunc (PGOMSadFunc& pfGomSad, uint8_t iCalcBgd) { + pfGomSad = GomSampleSad; + + if (iCalcBgd) { + pfGomSad = GomSampleSadExceptBackground; + } +} + +void GomSampleSad (uint32_t* pGomSad, int32_t* pGomForegroundBlockNum, int32_t* pSad8x8, uint8_t pBackgroundMbFlag) { + (*pGomForegroundBlockNum) ++; + *pGomSad += pSad8x8[0]; + *pGomSad += pSad8x8[1]; + *pGomSad += pSad8x8[2]; + *pGomSad += pSad8x8[3]; +} + +void GomSampleSadExceptBackground (uint32_t* pGomSad, int32_t* pGomForegroundBlockNum, int32_t* pSad8x8, + uint8_t pBackgroundMbFlag) { + if (pBackgroundMbFlag == 0) { + (*pGomForegroundBlockNum) ++; + *pGomSad += pSad8x8[0]; + *pGomSad += pSad8x8[1]; + *pGomSad += pSad8x8[2]; + *pGomSad += pSad8x8[3]; + } +} + +void CComplexityAnalysis::AnalyzeGomComplexityViaSad (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + int32_t iWidth = pSrcPixMap->sRect.iRectWidth; + int32_t iHeight = pSrcPixMap->sRect.iRectHeight; + int32_t iMbWidth = iWidth >> 4; + int32_t iMbHeight = iHeight >> 4; + int32_t iMbNum = iMbWidth * iMbHeight; + + int32_t iMbNumInGom = m_sComplexityAnalysisParam.iMbNumInGom; + int32_t iGomMbNum = (iMbNum + iMbNumInGom - 1) / iMbNumInGom; + + int32_t iGomMbStartIndex = 0, iGomMbEndIndex = 0, iGomMbRowNum = 0; + int32_t iMbStartIndex = 0, iMbEndIndex = 0; + + uint8_t* pBackgroundMbFlag = (uint8_t*)m_sComplexityAnalysisParam.pBackgroundMbFlag; + uint32_t* uiRefMbType = (uint32_t*)m_sComplexityAnalysisParam.uiRefMbType; + SVAACalcResult* pVaaCalcResults = m_sComplexityAnalysisParam.pCalcResult; + int32_t* pGomForegroundBlockNum = (int32_t*)m_sComplexityAnalysisParam.pGomForegroundBlockNum; + int32_t* pGomComplexity = (int32_t*)m_sComplexityAnalysisParam.pGomComplexity; + + uint32_t uiGomSad = 0, uiFrameSad = 0; + InitGomSadFunc (m_pfGomSad, m_sComplexityAnalysisParam.iCalcBgd); + + for (int32_t j = 0; j < iGomMbNum; j ++) { + uiGomSad = 0; + + iGomMbStartIndex = j * iMbNumInGom; + iGomMbEndIndex = WELS_MIN ((j + 1) * iMbNumInGom, iMbNum); + iGomMbRowNum = (iGomMbEndIndex + iMbWidth - 1) / iMbWidth - iGomMbStartIndex / iMbWidth; + + iMbStartIndex = iGomMbStartIndex; + iMbEndIndex = WELS_MIN ((iMbStartIndex / iMbWidth + 1) * iMbWidth, iGomMbEndIndex); + + do { + for (int32_t i = iMbStartIndex; i < iMbEndIndex; i ++) { + m_pfGomSad (&uiGomSad, pGomForegroundBlockNum + j, pVaaCalcResults->pSad8x8[i], pBackgroundMbFlag[i] + && !IS_INTRA (uiRefMbType[i])); + } + + iMbStartIndex = iMbEndIndex; + iMbEndIndex = WELS_MIN (iMbEndIndex + iMbWidth , iGomMbEndIndex); + + } while (--iGomMbRowNum); + pGomComplexity[j] = uiGomSad; + uiFrameSad += pGomComplexity[j]; + } + m_sComplexityAnalysisParam.iFrameComplexity = uiFrameSad; +} + + +void CComplexityAnalysis::AnalyzeGomComplexityViaVar (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + int32_t iWidth = pSrcPixMap->sRect.iRectWidth; + int32_t iHeight = pSrcPixMap->sRect.iRectHeight; + int32_t iMbWidth = iWidth >> 4; + int32_t iMbHeight = iHeight >> 4; + int32_t iMbNum = iMbWidth * iMbHeight; + + int32_t iMbNumInGom = m_sComplexityAnalysisParam.iMbNumInGom; + int32_t iGomMbNum = (iMbNum + iMbNumInGom - 1) / iMbNumInGom; + int32_t iGomSampleNum = 0; + + int32_t iGomMbStartIndex = 0, iGomMbEndIndex = 0, iGomMbRowNum = 0; + int32_t iMbStartIndex = 0, iMbEndIndex = 0; + + SVAACalcResult* pVaaCalcResults = m_sComplexityAnalysisParam.pCalcResult; + int32_t* pGomComplexity = (int32_t*)m_sComplexityAnalysisParam.pGomComplexity; + uint32_t uiFrameSad = 0; + + uint32_t uiSampleSum = 0, uiSquareSum = 0; + + for (int32_t j = 0; j < iGomMbNum; j ++) { + uiSampleSum = 0; + uiSquareSum = 0; + + iGomMbStartIndex = j * iMbNumInGom; + iGomMbEndIndex = WELS_MIN ((j + 1) * iMbNumInGom, iMbNum); + iGomMbRowNum = (iGomMbEndIndex + iMbWidth - 1) / iMbWidth - iGomMbStartIndex / iMbWidth; + + iMbStartIndex = iGomMbStartIndex; + iMbEndIndex = WELS_MIN ((iMbStartIndex / iMbWidth + 1) * iMbWidth, iGomMbEndIndex); + + iGomSampleNum = (iMbEndIndex - iMbStartIndex) * MB_WIDTH_LUMA * MB_WIDTH_LUMA; + + do { + for (int32_t i = iMbStartIndex; i < iMbEndIndex; i ++) { + uiSampleSum += pVaaCalcResults->pSum16x16[i]; + uiSquareSum += pVaaCalcResults->pSumOfSquare16x16[i]; + } + + iMbStartIndex = iMbEndIndex; + iMbEndIndex = WELS_MIN (iMbEndIndex + iMbWidth, iGomMbEndIndex); + + } while (--iGomMbRowNum); + + pGomComplexity[j] = uiSquareSum - (uiSampleSum * uiSampleSum / iGomSampleNum); + uiFrameSad += pGomComplexity[j]; + } + m_sComplexityAnalysisParam.iFrameComplexity = uiFrameSad; +} + + +CComplexityAnalysisScreen::CComplexityAnalysisScreen (int32_t iCpuFlag) { + m_eMethod = METHOD_COMPLEXITY_ANALYSIS_SCREEN; + WelsMemset (&m_ComplexityAnalysisParam, 0, sizeof (m_ComplexityAnalysisParam)); + + m_pSadFunc = WelsSampleSad16x16_c; + m_pIntraFunc[0] = WelsI16x16LumaPredV_c; + m_pIntraFunc[1] = WelsI16x16LumaPredH_c; +#ifdef X86_ASM + if (iCpuFlag & WELS_CPU_SSE2) { + m_pSadFunc = WelsSampleSad16x16_sse2; + m_pIntraFunc[0] = WelsI16x16LumaPredV_sse2; + m_pIntraFunc[1] = WelsI16x16LumaPredH_sse2; + + } +#endif + +#if defined (HAVE_NEON) + if (iCpuFlag & WELS_CPU_NEON) { + m_pSadFunc = WelsSampleSad16x16_neon; + m_pIntraFunc[0] = WelsI16x16LumaPredV_neon; + m_pIntraFunc[1] = WelsI16x16LumaPredH_neon; + + } +#endif + +#if defined (HAVE_NEON_AARCH64) + if (iCpuFlag & WELS_CPU_NEON) { + m_pSadFunc = WelsSampleSad16x16_AArch64_neon; + m_pIntraFunc[0] = WelsI16x16LumaPredV_AArch64_neon; + m_pIntraFunc[1] = WelsI16x16LumaPredH_AArch64_neon; + } +#endif + +} + +CComplexityAnalysisScreen::~CComplexityAnalysisScreen() { +} + +EResult CComplexityAnalysisScreen::Process (int32_t nType, SPixMap* pSrc, SPixMap* pRef) { + bool bScrollFlag = m_ComplexityAnalysisParam.sScrollResult.bScrollDetectFlag; + int32_t iIdrFlag = m_ComplexityAnalysisParam.iIdrFlag; + int32_t iScrollMvX = m_ComplexityAnalysisParam.sScrollResult.iScrollMvX; + int32_t iScrollMvY = m_ComplexityAnalysisParam.sScrollResult.iScrollMvY; + + if (m_ComplexityAnalysisParam.iMbRowInGom <= 0) + return RET_INVALIDPARAM; + if (!iIdrFlag && pRef == NULL) + return RET_INVALIDPARAM; + + if (iIdrFlag || pRef == NULL) { + GomComplexityAnalysisIntra (pSrc); + } else if (!bScrollFlag || ((iScrollMvX == 0) && (iScrollMvY == 0))) { + GomComplexityAnalysisInter (pSrc, pRef, 0); + } else { + GomComplexityAnalysisInter (pSrc, pRef, 1); + } + + return RET_SUCCESS; +} + + +EResult CComplexityAnalysisScreen::Set (int32_t nType, void* pParam) { + if (pParam == NULL) + return RET_INVALIDPARAM; + + m_ComplexityAnalysisParam = * (SComplexityAnalysisScreenParam*)pParam; + + return RET_SUCCESS; +} + +EResult CComplexityAnalysisScreen::Get (int32_t nType, void* pParam) { + if (pParam == NULL) + return RET_INVALIDPARAM; + + * (SComplexityAnalysisScreenParam*)pParam = m_ComplexityAnalysisParam; + + return RET_SUCCESS; +} + +void CComplexityAnalysisScreen::GomComplexityAnalysisIntra (SPixMap* pSrc) { + int32_t iWidth = pSrc->sRect.iRectWidth; + int32_t iHeight = pSrc->sRect.iRectHeight; + int32_t iBlockWidth = iWidth >> 4; + int32_t iBlockHeight = iHeight >> 4; + + int32_t iBlockSadH, iBlockSadV, iGomSad = 0; + int32_t iIdx = 0; + + uint8_t* pPtrY = NULL; + int32_t iStrideY = 0; + int32_t iRowStrideY = 0; + + uint8_t* pTmpCur = NULL; + + ENFORCE_STACK_ALIGN_1D (uint8_t, iMemPredMb, 256, 16) + + pPtrY = (uint8_t*)pSrc->pPixel[0]; + + iStrideY = pSrc->iStride[0]; + iRowStrideY = iStrideY << 4; + + m_ComplexityAnalysisParam.iFrameComplexity = 0; + + for (int32_t j = 0; j < iBlockHeight; j ++) { + pTmpCur = pPtrY; + + for (int32_t i = 0; i < iBlockWidth; i++) { + iBlockSadH = iBlockSadV = 0x7fffffff; // INT_MAX + if (j > 0) { + m_pIntraFunc[0] (iMemPredMb, pTmpCur, iStrideY); + iBlockSadH = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); + } + if (i > 0) { + m_pIntraFunc[1] (iMemPredMb, pTmpCur, iStrideY); + iBlockSadV = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); + } + if (i || j) + iGomSad += WELS_MIN (iBlockSadH, iBlockSadV); + + pTmpCur += 16; + + if (i == iBlockWidth - 1 && ((j + 1) % m_ComplexityAnalysisParam.iMbRowInGom == 0 || j == iBlockHeight - 1)) { + m_ComplexityAnalysisParam.pGomComplexity[iIdx] = iGomSad; + m_ComplexityAnalysisParam.iFrameComplexity += iGomSad; + iIdx++; + iGomSad = 0; + } + } + + pPtrY += iRowStrideY; + } + m_ComplexityAnalysisParam.iGomNumInFrame = iIdx; +} + + +void CComplexityAnalysisScreen::GomComplexityAnalysisInter (SPixMap* pSrc, SPixMap* pRef, bool bScrollFlag) { + int32_t iWidth = pSrc->sRect.iRectWidth; + int32_t iHeight = pSrc->sRect.iRectHeight; + int32_t iBlockWidth = iWidth >> 4; + int32_t iBlockHeight = iHeight >> 4; + + int32_t iInterSad, iScrollSad, iBlockSadH, iBlockSadV, iGomSad = 0; + int32_t iIdx = 0; + + int32_t iScrollMvX = m_ComplexityAnalysisParam.sScrollResult.iScrollMvX; + int32_t iScrollMvY = m_ComplexityAnalysisParam.sScrollResult.iScrollMvY; + + uint8_t* pPtrX = NULL, *pPtrY = NULL; + int32_t iStrideX = 0, iStrideY = 0; + int32_t iRowStrideX = 0, iRowStrideY = 0; + + uint8_t* pTmpRef = NULL, *pTmpCur = NULL, *pTmpRefScroll = NULL; + + ENFORCE_STACK_ALIGN_1D (uint8_t, iMemPredMb, 256, 16) + + pPtrX = (uint8_t*)pRef->pPixel[0]; + pPtrY = (uint8_t*)pSrc->pPixel[0]; + + iStrideX = pRef->iStride[0]; + iStrideY = pSrc->iStride[0]; + + iRowStrideX = pRef->iStride[0] << 4; + iRowStrideY = pSrc->iStride[0] << 4; + + m_ComplexityAnalysisParam.iFrameComplexity = 0; + + for (int32_t j = 0; j < iBlockHeight; j ++) { + pTmpRef = pPtrX; + pTmpCur = pPtrY; + + for (int32_t i = 0; i < iBlockWidth; i++) { + int32_t iBlockPointX = i << 4; + int32_t iBlockPointY = j << 4; + + iInterSad = m_pSadFunc (pTmpCur, iStrideY, pTmpRef, iStrideX); + if (bScrollFlag) { + if ((iInterSad != 0) && + (iBlockPointX + iScrollMvX >= 0) && (iBlockPointX + iScrollMvX <= iWidth - 8) && + (iBlockPointY + iScrollMvY >= 0) && (iBlockPointY + iScrollMvY <= iHeight - 8)) { + pTmpRefScroll = pTmpRef - iScrollMvY * iStrideX + iScrollMvX; + iScrollSad = m_pSadFunc (pTmpCur, iStrideY, pTmpRefScroll, iStrideX); + + if (iScrollSad < iInterSad) { + iInterSad = iScrollSad; + } + } + + } + + iBlockSadH = iBlockSadV = 0x7fffffff; // INT_MAX + + if (j > 0) { + m_pIntraFunc[0] (iMemPredMb, pTmpCur, iStrideY); + iBlockSadH = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); + } + if (i > 0) { + m_pIntraFunc[1] (iMemPredMb, pTmpCur, iStrideY); + iBlockSadV = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); + } + + iGomSad += WELS_MIN (WELS_MIN (iBlockSadH, iBlockSadV), iInterSad); + + if (i == iBlockWidth - 1 && ((j + 1) % m_ComplexityAnalysisParam.iMbRowInGom == 0 || j == iBlockHeight - 1)) { + m_ComplexityAnalysisParam.pGomComplexity[iIdx] = iGomSad; + m_ComplexityAnalysisParam.iFrameComplexity += iGomSad; + iIdx++; + iGomSad = 0; + } + + pTmpRef += 16; + pTmpCur += 16; + } + pPtrX += iRowStrideX; + pPtrY += iRowStrideY; + } + m_ComplexityAnalysisParam.iGomNumInFrame = iIdx; +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/complexityanalysis/ComplexityAnalysis.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/complexityanalysis/ComplexityAnalysis.h new file mode 100644 index 000000000..31f06d751 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/complexityanalysis/ComplexityAnalysis.h @@ -0,0 +1,107 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * +* \file : ComplexityAnalysis.h +* +* \brief : complexity analysis class of wels video processor class +* +* \date : 2011/03/28 +* +* \description : 1. rewrite the package code of complexity analysis class +* +************************************************************************************* +*/ + +#ifndef WELSVP_COMPLEXITYANALYSIS_H +#define WELSVP_COMPLEXITYANALYSIS_H + +#include "util.h" +#include "memory.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" +#include "common.h" + +WELSVP_NAMESPACE_BEGIN + +typedef void (GOMSadFunc) (uint32_t* pGomSad, int32_t* pGomForegroundBlockNum, int32_t* pSad8x8, + uint8_t pBackgroundMbFlag); + +typedef GOMSadFunc* PGOMSadFunc; + +GOMSadFunc GomSampleSad; +GOMSadFunc GomSampleSadExceptBackground; + +class CComplexityAnalysis : public IStrategy { + public: + CComplexityAnalysis (int32_t iCpuFlag); + ~CComplexityAnalysis(); + + EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* pRef); + EResult Set (int32_t iType, void* pParam); + EResult Get (int32_t iType, void* pParam); + + private: + void AnalyzeFrameComplexityViaSad (SPixMap* pSrc, SPixMap* pRef); + int32_t GetFrameSadExcludeBackground (SPixMap* pSrc, SPixMap* pRef); + + void AnalyzeGomComplexityViaSad (SPixMap* pSrc, SPixMap* pRef); + void AnalyzeGomComplexityViaVar (SPixMap* pSrc, SPixMap* pRef); + + private: + PGOMSadFunc m_pfGomSad; + SComplexityAnalysisParam m_sComplexityAnalysisParam; +}; + + +//for screen content + +class CComplexityAnalysisScreen : public IStrategy { + public: + CComplexityAnalysisScreen (int32_t cpu_flag); + ~CComplexityAnalysisScreen(); + + EResult Process (int32_t nType, SPixMap* src, SPixMap* ref); + EResult Set (int32_t nType, void* pParam); + EResult Get (int32_t nType, void* pParam); + + private: + void GomComplexityAnalysisIntra (SPixMap* pSrc); + void GomComplexityAnalysisInter (SPixMap* pSrc, SPixMap* pRef, bool bScrollFlag); + + private: + PSad16x16Func m_pSadFunc; + GetIntraPredPtr m_pIntraFunc[2]; + SComplexityAnalysisScreenParam m_ComplexityAnalysisParam; +}; + + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise.cpp new file mode 100644 index 000000000..4ce1cb249 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise.cpp @@ -0,0 +1,124 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "denoise.h" +#include "cpu.h" + +WELSVP_NAMESPACE_BEGIN + +#define CALC_BI_STRIDE(iWidth, iBitcount) ((((iWidth) * (iBitcount) + 31) & ~31) >> 3) + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +CDenoiser::CDenoiser (int32_t iCpuFlag) { + m_CPUFlag = iCpuFlag; + m_eMethod = METHOD_DENOISE; + WelsMemset (&m_pfDenoise, 0, sizeof (m_pfDenoise)); + + m_uiSpaceRadius = DENOISE_GRAY_RADIUS; + m_fSigmaGrey = DENOISE_GRAY_SIGMA; + m_uiType = DENOISE_ALL_COMPONENT; + InitDenoiseFunc (m_pfDenoise, m_CPUFlag); +} + +CDenoiser::~CDenoiser() { +} + +void CDenoiser::InitDenoiseFunc (SDenoiseFuncs& denoiser, int32_t iCpuFlag) { + denoiser.pfBilateralLumaFilter8 = BilateralLumaFilter8_c; + denoiser.pfWaverageChromaFilter8 = WaverageChromaFilter8_c; +#if defined(X86_ASM) + if (iCpuFlag & WELS_CPU_SSE2) { + denoiser.pfBilateralLumaFilter8 = BilateralLumaFilter8_sse2; + denoiser.pfWaverageChromaFilter8 = WaverageChromaFilter8_sse2; + } +#endif +} + +EResult CDenoiser::Process (int32_t iType, SPixMap* pSrc, SPixMap* dst) { + uint8_t* pSrcY = (uint8_t*)pSrc->pPixel[0]; + uint8_t* pSrcU = (uint8_t*)pSrc->pPixel[1]; + uint8_t* pSrcV = (uint8_t*)pSrc->pPixel[2]; + if (pSrcY == NULL || pSrcU == NULL || pSrcV == NULL) { + return RET_INVALIDPARAM; + } + + int32_t iWidthY = pSrc->sRect.iRectWidth; + int32_t iHeightY = pSrc->sRect.iRectHeight; + int32_t iWidthUV = iWidthY >> 1; + int32_t iHeightUV = iHeightY >> 1; + + if (m_uiType & DENOISE_Y_COMPONENT) + BilateralDenoiseLuma (pSrcY, iWidthY, iHeightY, pSrc->iStride[0]); + + if (m_uiType & DENOISE_U_COMPONENT) + WaverageDenoiseChroma (pSrcU, iWidthUV, iHeightUV, pSrc->iStride[1]); + + if (m_uiType & DENOISE_V_COMPONENT) + WaverageDenoiseChroma (pSrcV, iWidthUV, iHeightUV, pSrc->iStride[2]); + + return RET_SUCCESS; +} + +void CDenoiser::BilateralDenoiseLuma (uint8_t* pSrcY, int32_t iWidth, int32_t iHeight, int32_t iStride) { + int32_t w; + + pSrcY = pSrcY + m_uiSpaceRadius * iStride; + for (int32_t h = m_uiSpaceRadius; h < iHeight - m_uiSpaceRadius; h++) { + for (w = m_uiSpaceRadius; w < iWidth - m_uiSpaceRadius - TAIL_OF_LINE8; w += 8) { + m_pfDenoise.pfBilateralLumaFilter8 (pSrcY + w, iStride); + } + for (; w < iWidth - m_uiSpaceRadius; w++) { + Gauss3x3Filter (pSrcY + w, iStride); + } + pSrcY += iStride; + } +} + +void CDenoiser::WaverageDenoiseChroma (uint8_t* pSrcUV, int32_t iWidth, int32_t iHeight, int32_t iStride) { + int32_t w; + + pSrcUV = pSrcUV + UV_WINDOWS_RADIUS * iStride; + for (int32_t h = UV_WINDOWS_RADIUS; h < iHeight - UV_WINDOWS_RADIUS; h++) { + for (w = UV_WINDOWS_RADIUS; w < iWidth - UV_WINDOWS_RADIUS - TAIL_OF_LINE8; w += 8) { + m_pfDenoise.pfWaverageChromaFilter8 (pSrcUV + w, iStride); + } + + for (; w < iWidth - UV_WINDOWS_RADIUS; w++) { + Gauss3x3Filter (pSrcUV + w, iStride); + } + pSrcUV += iStride; + } +} + + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise.h new file mode 100644 index 000000000..13d284028 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise.h @@ -0,0 +1,109 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : denoise.h + * + * \brief : denoise class of wels video processor class + * + * \date : 2011/03/15 + * + * \description : 1. rewrite the package code of denoise class + * + ************************************************************************************* + */ + +#ifndef WELSVP_DENOISE_H +#define WELSVP_DENOISE_H + +#include "util.h" +#include "memory.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" + + +#define DENOISE_GRAY_RADIUS (1) +#define DENOISE_GRAY_SIGMA (2) + +#define UV_WINDOWS_RADIUS (2) +#define TAIL_OF_LINE8 (7) + +#define DENOISE_Y_COMPONENT (1) +#define DENOISE_U_COMPONENT (2) +#define DENOISE_V_COMPONENT (4) +#define DENOISE_ALL_COMPONENT (7) + + +WELSVP_NAMESPACE_BEGIN + +void Gauss3x3Filter (uint8_t* pixels, int32_t stride); + +typedef void (DenoiseFilterFunc) (uint8_t* pixels, int32_t stride); + +typedef DenoiseFilterFunc* DenoiseFilterFuncPtr; + +DenoiseFilterFunc BilateralLumaFilter8_c; +DenoiseFilterFunc WaverageChromaFilter8_c; + +#ifdef X86_ASM +WELSVP_EXTERN_C_BEGIN +DenoiseFilterFunc BilateralLumaFilter8_sse2 ; +DenoiseFilterFunc WaverageChromaFilter8_sse2 ; +WELSVP_EXTERN_C_END +#endif + +typedef struct TagDenoiseFuncs { + DenoiseFilterFuncPtr pfBilateralLumaFilter8;//on 8 samples + DenoiseFilterFuncPtr pfWaverageChromaFilter8;//on 8 samples +} SDenoiseFuncs; + +class CDenoiser : public IStrategy { + public: + CDenoiser (int32_t iCpuFlag); + ~CDenoiser(); + + EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* dst); + + private: + void InitDenoiseFunc (SDenoiseFuncs& pf, int32_t cpu); + void BilateralDenoiseLuma (uint8_t* p_y_data, int32_t width, int32_t height, int32_t stride); + void WaverageDenoiseChroma (uint8_t* pSrcUV, int32_t width, int32_t height, int32_t stride); + + private: + float m_fSigmaGrey; //sigma for grey scale similarity, suggestion 2.5-3 + uint16_t m_uiSpaceRadius; //filter windows radius: 1-3x3, 2-5x5,3-7x7. Larger size, slower speed + uint16_t m_uiType; //do denoising on which component 1-Y, 2-U, 4-V; 7-YUV, 3-YU, 5-YV, 6-UV + + SDenoiseFuncs m_pfDenoise; + int32_t m_CPUFlag; +}; + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise_filter.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise_filter.cpp new file mode 100644 index 000000000..533c0402f --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/denoise/denoise_filter.cpp @@ -0,0 +1,126 @@ +/*! + * \copy + * Copyright (c) 2010-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file svc_preprocess.h + * + * \brief svc denoising + * + * \date 4/1/2010 Created + * + */ + +#include "denoise.h" + +WELSVP_NAMESPACE_BEGIN + +void BilateralLumaFilter8_c (uint8_t* pSample, int32_t iStride) { + int32_t nSum = 0, nTotWeight = 0; + int32_t iCenterSample = *pSample; + uint8_t* pCurLine = pSample - iStride - DENOISE_GRAY_RADIUS; + int32_t x, y; + int32_t iCurSample, iCurWeight, iGreyDiff; + uint8_t aSample[8]; + + for (int32_t i = 0; i < 8; i++) { + nSum = 0; + nTotWeight = 0; + iCenterSample = *pSample; + pCurLine = pSample - iStride - DENOISE_GRAY_RADIUS; + for (y = 0; y < 3; y++) { + for (x = 0; x < 3; x++) { + if (x == 1 && y == 1) continue; // except center point + iCurSample = pCurLine[x]; + iCurWeight = WELS_ABS (iCurSample - iCenterSample); + iGreyDiff = 32 - iCurWeight; + if (iGreyDiff < 0) continue; + else iCurWeight = (iGreyDiff * iGreyDiff) >> 5; + nSum += iCurSample * iCurWeight; + nTotWeight += iCurWeight; + } + pCurLine += iStride; + } + nTotWeight = 256 - nTotWeight; + nSum += iCenterSample * nTotWeight; + aSample[i] = nSum >> 8; + pSample++; + } + WelsMemcpy (pSample - 8, aSample, 8); +} + + +/*************************************************************************** +5x5 filter: +1 1 2 1 1 +1 2 4 2 1 +2 4 20 4 2 +1 2 4 2 1 +1 1 2 1 1 +***************************************************************************/ +#define SUM_LINE1(pSample) (pSample[0] +(pSample[1]) +(pSample[2]<<1) + pSample[3] + pSample[4]) +#define SUM_LINE2(pSample) (pSample[0] +(pSample[1]<<1) +(pSample[2]<<2) +(pSample[3]<<1) + pSample[4]) +#define SUM_LINE3(pSample) ((pSample[0]<<1) +(pSample[1]<<2) +(pSample[2]*20) +(pSample[3]<<2) +(pSample[4]<<1)) +void WaverageChromaFilter8_c (uint8_t* pSample, int32_t iStride) { + int32_t sum; + uint8_t* pStartPixels = pSample - UV_WINDOWS_RADIUS * iStride - UV_WINDOWS_RADIUS; + uint8_t* pCurLine1 = pStartPixels; + uint8_t* pCurLine2 = pCurLine1 + iStride; + uint8_t* pCurLine3 = pCurLine2 + iStride; + uint8_t* pCurLine4 = pCurLine3 + iStride; + uint8_t* pCurLine5 = pCurLine4 + iStride; + uint8_t aSample[8]; + + for (int32_t i = 0; i < 8; i++) { + sum = SUM_LINE1 ((pCurLine1 + i)) + SUM_LINE2 ((pCurLine2 + i)) + SUM_LINE3 ((pCurLine3 + i)) + + SUM_LINE2 ((pCurLine4 + i)) + SUM_LINE1 ((pCurLine5 + i)); + aSample[i] = (sum >> 6); + pSample++; + } + WelsMemcpy (pSample - 8, aSample, 8); +} + +/*************************************************************************** +edge of y/uv use a 3x3 Gauss filter, radius = 1: +1 2 1 +2 4 2 +1 2 1 +***************************************************************************/ +void Gauss3x3Filter (uint8_t* pSrc, int32_t iStride) { + int32_t nSum = 0; + uint8_t* pCurLine1 = pSrc - iStride - 1; + uint8_t* pCurLine2 = pCurLine1 + iStride; + uint8_t* pCurLine3 = pCurLine2 + iStride; + + nSum = pCurLine1[0] + (pCurLine1[1] << 1) + pCurLine1[2] + + (pCurLine2[0] << 1) + (pCurLine2[1] << 2) + (pCurLine2[2] << 1) + + pCurLine3[0] + (pCurLine3[1] << 1) + pCurLine3[2]; + *pSrc = nSum >> 4; +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsample.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsample.cpp new file mode 100644 index 000000000..1fb197eb0 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsample.cpp @@ -0,0 +1,294 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "downsample.h" +#include "cpu.h" +#include + +WELSVP_NAMESPACE_BEGIN +#define MAX_SAMPLE_WIDTH 1920 +#define MAX_SAMPLE_HEIGHT 1088 + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +CDownsampling::CDownsampling (int32_t iCpuFlag) { + m_iCPUFlag = iCpuFlag; + m_eMethod = METHOD_DOWNSAMPLE; + WelsMemset (&m_pfDownsample, 0, sizeof (m_pfDownsample)); + InitDownsampleFuncs (m_pfDownsample, m_iCPUFlag); + WelsMemset(m_pSampleBuffer,0,sizeof(m_pSampleBuffer)); + m_bNoSampleBuffer = AllocateSampleBuffer(); +} + +CDownsampling::~CDownsampling() { + FreeSampleBuffer(); +} +bool CDownsampling::AllocateSampleBuffer() { + for (int32_t i = 0; i < 2; i++) { + m_pSampleBuffer[i][0] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT); + if (!m_pSampleBuffer[i][0]) + goto FREE_RET; + m_pSampleBuffer[i][1] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4); + if (!m_pSampleBuffer[i][1]) + goto FREE_RET; + m_pSampleBuffer[i][2] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4); + if (!m_pSampleBuffer[i][2]) + goto FREE_RET; + } + return false; +FREE_RET: + FreeSampleBuffer(); + return true; + +} +void CDownsampling::FreeSampleBuffer() { + for (int32_t i = 0; i < 2; i++) { + WelsFree (m_pSampleBuffer[i][0]); + m_pSampleBuffer[i][0] = NULL; + WelsFree (m_pSampleBuffer[i][1]); + m_pSampleBuffer[i][1] = NULL; + WelsFree (m_pSampleBuffer[i][2]); + m_pSampleBuffer[i][2] = NULL; + } +} + +void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int32_t iCpuFlag) { + sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsampler_c; + sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_c; + sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_c; + sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_c; + sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsampler_c; + sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsampler_c; +#if defined(X86_ASM) + if (iCpuFlag & WELS_CPU_SSE) { + sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_sse; + sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_sse; + sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse; + } + if (iCpuFlag & WELS_CPU_SSE2) { + sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2; + sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_sse2; + } + if (iCpuFlag & WELS_CPU_SSSE3) { + sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_ssse3; + sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_ssse3; + sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_ssse3; + sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_ssse3; + sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_ssse3; + } + if (iCpuFlag & WELS_CPU_SSE41) { + sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_sse4; + sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse4; + sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse41; + } +#ifdef HAVE_AVX2 + if (iCpuFlag & WELS_CPU_AVX2) { + sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2; + sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_avx2; + } +#endif +#endif//X86_ASM + +#if defined(HAVE_NEON) + if (iCpuFlag & WELS_CPU_NEON) { + sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_neon; + sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_neon; + sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_neon; + sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_neon; + sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_neon; + sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearAccurateDownsamplerWrap_neon; + } +#endif + +#if defined(HAVE_NEON_AARCH64) + if (iCpuFlag & WELS_CPU_NEON) { + sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_AArch64_neon; + sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_AArch64_neon; + sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_AArch64_neon; + sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_AArch64_neon; + sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon; + sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon; + } +#endif +} + +EResult CDownsampling::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pDstPixMap) { + int32_t iSrcWidthY = pSrcPixMap->sRect.iRectWidth; + int32_t iSrcHeightY = pSrcPixMap->sRect.iRectHeight; + int32_t iDstWidthY = pDstPixMap->sRect.iRectWidth; + int32_t iDstHeightY = pDstPixMap->sRect.iRectHeight; + + int32_t iSrcWidthUV = iSrcWidthY >> 1; + int32_t iSrcHeightUV = iSrcHeightY >> 1; + int32_t iDstWidthUV = iDstWidthY >> 1; + int32_t iDstHeightUV = iDstHeightY >> 1; + + if (iSrcWidthY <= iDstWidthY || iSrcHeightY <= iDstHeightY) { + return RET_INVALIDPARAM; + } + if ((iSrcWidthY >> 1) > MAX_SAMPLE_WIDTH || (iSrcHeightY >> 1) > MAX_SAMPLE_HEIGHT || m_bNoSampleBuffer) { + if ((iSrcWidthY >> 1) == iDstWidthY && (iSrcHeightY >> 1) == iDstHeightY) { + // use half average functions + DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], + (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY); + DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], + (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV); + DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], + (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV); + } else if ((iSrcWidthY >> 2) == iDstWidthY && (iSrcHeightY >> 2) == iDstHeightY) { + + m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], + (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY); + + m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], + (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV); + + m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], + (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV); + + } else if ((iSrcWidthY / 3) == iDstWidthY && (iSrcHeightY / 3) == iDstHeightY) { + + m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], + (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iDstHeightY); + + m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], + (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iDstHeightUV); + + m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], + (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iDstHeightUV); + + } else { + m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY, + (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY); + + m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV, + (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV); + + m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV, + (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV); + } + } else { + + int32_t iIdx = 0; + int32_t iHalfSrcWidth = iSrcWidthY >> 1; + int32_t iHalfSrcHeight = iSrcHeightY >> 1; + uint8_t* pSrcY = (uint8_t*)pSrcPixMap->pPixel[0]; + uint8_t* pSrcU = (uint8_t*)pSrcPixMap->pPixel[1]; + uint8_t* pSrcV = (uint8_t*)pSrcPixMap->pPixel[2]; + int32_t iSrcStrideY = pSrcPixMap->iStride[0]; + int32_t iSrcStrideU = pSrcPixMap->iStride[1]; + int32_t iSrcStrideV = pSrcPixMap->iStride[2]; + + int32_t iDstStrideY = pDstPixMap->iStride[0]; + int32_t iDstStrideU = pDstPixMap->iStride[1]; + int32_t iDstStrideV = pDstPixMap->iStride[2]; + + uint8_t* pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0]; + uint8_t* pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1]; + uint8_t* pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2]; + iIdx++; + do { + if ((iHalfSrcWidth == iDstWidthY) && (iHalfSrcHeight == iDstHeightY)) { //end + // use half average functions + DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], + (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY); + DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], + (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV); + DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], + (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV); + break; + } else if ((iHalfSrcWidth > iDstWidthY) && (iHalfSrcHeight > iDstHeightY)){ + // use half average functions + iDstStrideY = WELS_ALIGN (iHalfSrcWidth, 32); + iDstStrideU = WELS_ALIGN (iHalfSrcWidth >> 1, 32); + iDstStrideV = WELS_ALIGN (iHalfSrcWidth >> 1, 32); + DownsampleHalfAverage ((uint8_t*)pDstY, iDstStrideY, + (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY); + DownsampleHalfAverage ((uint8_t*)pDstU, iDstStrideU, + (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV); + DownsampleHalfAverage ((uint8_t*)pDstV, iDstStrideV, + (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV); + + pSrcY = (uint8_t*)pDstY; + pSrcU = (uint8_t*)pDstU; + pSrcV = (uint8_t*)pDstV; + + + iSrcWidthY = iHalfSrcWidth; + iSrcWidthUV = iHalfSrcWidth >> 1; + iSrcHeightY = iHalfSrcHeight; + iSrcHeightUV = iHalfSrcHeight >> 1; + + iSrcStrideY = iDstStrideY; + iSrcStrideU = iDstStrideU; + iSrcStrideV = iDstStrideV; + + iHalfSrcWidth >>= 1; + iHalfSrcHeight >>= 1; + + iIdx = iIdx % 2; + pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0]; + pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1]; + pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2]; + iIdx++; + } else { + m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY, + (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY); + + m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV, + (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV); + + m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV, + (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV); + break; + } + } while (true); + } + return RET_SUCCESS; +} + +void CDownsampling::DownsampleHalfAverage (uint8_t* pDst, int32_t iDstStride, + uint8_t* pSrc, int32_t iSrcStride, int32_t iSrcWidth, int32_t iSrcHeight) { + if ((iSrcStride & 31) == 0) { + assert ((iDstStride & 15) == 0); + m_pfDownsample.pfHalfAverageWidthx32 (pDst, iDstStride, + pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 32), iSrcHeight); + } else { + assert ((iSrcStride & 15) == 0); + assert ((iDstStride & 7) == 0); + m_pfDownsample.pfHalfAverageWidthx16 (pDst, iDstStride, + pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 16), iSrcHeight); + } +} + + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsample.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsample.h new file mode 100644 index 000000000..ca5c4bd49 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsample.h @@ -0,0 +1,201 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : downsample.h + * + * \brief : downsample class of wels video processor class + * + * \date : 2011/03/33 + * + * \description : 1. rewrite the package code of downsample class + * + ************************************************************************************* + */ + +#ifndef WELSVP_DOWNSAMPLE_H +#define WELSVP_DOWNSAMPLE_H + +#include "util.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" +#include "macros.h" + +WELSVP_NAMESPACE_BEGIN + + +typedef void (HalveDownsampleFunc) (uint8_t* pDst, const int32_t kiDstStride, + uint8_t* pSrc, const int32_t kiSrcStride, + const int32_t kiSrcWidth, const int32_t kiSrcHeight); + +typedef void (SpecificDownsampleFunc) (uint8_t* pDst, const int32_t kiDstStride, + uint8_t* pSrc, const int32_t kiSrcStride, + const int32_t kiSrcWidth, const int32_t kiHeight); + +typedef void (GeneralDownsampleFunc) (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, + const int32_t kiDstHeight, + uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight); + +typedef HalveDownsampleFunc* PHalveDownsampleFunc; +typedef SpecificDownsampleFunc* PSpecificDownsampleFunc; +typedef GeneralDownsampleFunc* PGeneralDownsampleFunc; + +HalveDownsampleFunc DyadicBilinearDownsampler_c; +GeneralDownsampleFunc GeneralBilinearFastDownsampler_c; +GeneralDownsampleFunc GeneralBilinearAccurateDownsampler_c; +SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_c; +SpecificDownsampleFunc DyadicBilinearQuarterDownsampler_c; + +typedef struct { + PHalveDownsampleFunc pfHalfAverageWidthx32; + PHalveDownsampleFunc pfHalfAverageWidthx16; + PSpecificDownsampleFunc pfOneThirdDownsampler; + PSpecificDownsampleFunc pfQuarterDownsampler; + PGeneralDownsampleFunc pfGeneralRatioLuma; + PGeneralDownsampleFunc pfGeneralRatioChroma; +} SDownsampleFuncs; + + +#ifdef X86_ASM +WELSVP_EXTERN_C_BEGIN +// used for scr width is multipler of 8 pixels +HalveDownsampleFunc DyadicBilinearDownsamplerWidthx8_sse; +// iSrcWidth= x16 pixels +HalveDownsampleFunc DyadicBilinearDownsamplerWidthx16_sse; +// iSrcWidth= x32 pixels +HalveDownsampleFunc DyadicBilinearDownsamplerWidthx32_sse; +// used for scr width is multipler of 16 pixels +HalveDownsampleFunc DyadicBilinearDownsamplerWidthx16_ssse3; +// iSrcWidth= x32 pixels +HalveDownsampleFunc DyadicBilinearDownsamplerWidthx32_ssse3; + +GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_sse2; +GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse2; +GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_ssse3; +GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse41; +#ifdef HAVE_AVX2 +GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_avx2; +GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_avx2; +#endif + +SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_ssse3; +SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_sse4; +SpecificDownsampleFunc DyadicBilinearQuarterDownsampler_sse; +SpecificDownsampleFunc DyadicBilinearQuarterDownsampler_ssse3; +SpecificDownsampleFunc DyadicBilinearQuarterDownsampler_sse4; + +void GeneralBilinearFastDownsampler_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, + const int32_t kiDstHeight, uint8_t* pSrc, const int32_t kiSrcStride, const uint32_t kuiScaleX, + const uint32_t kuiScaleY); +void GeneralBilinearAccurateDownsampler_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, + const int32_t kiDstHeight, uint8_t* pSrc, const int32_t kiSrcStride, const uint32_t kuiScaleX, + const uint32_t kuiScaleY); +void GeneralBilinearFastDownsampler_ssse3 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, + int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, + uint32_t uiScaleY); +void GeneralBilinearAccurateDownsampler_sse41 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, + int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, + uint32_t uiScaleY); +#ifdef HAVE_AVX2 +void GeneralBilinearFastDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, + int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, + uint32_t uiScaleY); +void GeneralBilinearAccurateDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, + int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, + uint32_t uiScaleY); +#endif + +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_NEON +WELSVP_EXTERN_C_BEGIN +// iSrcWidth no limitation +HalveDownsampleFunc DyadicBilinearDownsampler_neon; +// iSrcWidth = x32 pixels +HalveDownsampleFunc DyadicBilinearDownsamplerWidthx32_neon; + +GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_neon; + +SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_neon; + +SpecificDownsampleFunc DyadicBilinearQuarterDownsampler_neon; + +void GeneralBilinearAccurateDownsampler_neon (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, + const int32_t kiDstHeight, + uint8_t* pSrc, const int32_t kiSrcStride, const uint32_t kuiScaleX, const uint32_t kuiScaleY); + +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_NEON_AARCH64 +WELSVP_EXTERN_C_BEGIN +// iSrcWidth no limitation +HalveDownsampleFunc DyadicBilinearDownsampler_AArch64_neon; +// iSrcWidth = x32 pixels +HalveDownsampleFunc DyadicBilinearDownsamplerWidthx32_AArch64_neon; + +GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_AArch64_neon; + +SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_AArch64_neon; + +SpecificDownsampleFunc DyadicBilinearQuarterDownsampler_AArch64_neon; + +void GeneralBilinearAccurateDownsampler_AArch64_neon (uint8_t* pDst, const int32_t kiDstStride, + const int32_t kiDstWidth, const int32_t kiDstHeight, + uint8_t* pSrc, const int32_t kiSrcStride, const uint32_t kuiScaleX, const uint32_t kuiScaleY); + +WELSVP_EXTERN_C_END +#endif + + +class CDownsampling : public IStrategy { + public: + CDownsampling (int32_t iCpuFlag); + ~CDownsampling(); + + EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* pDst); + + private: + void InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int32_t iCpuFlag); + + void DownsampleHalfAverage (uint8_t* pDst, int32_t iDstStride, + uint8_t* pSrc, int32_t iSrcStride, int32_t iSrcWidth, int32_t iSrcHeight); + bool AllocateSampleBuffer(); + void FreeSampleBuffer(); + private: + SDownsampleFuncs m_pfDownsample; + int32_t m_iCPUFlag; + uint8_t *m_pSampleBuffer[2][3]; + bool m_bNoSampleBuffer; +}; + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsamplefuncs.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsamplefuncs.cpp new file mode 100644 index 000000000..ed5e7572b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/downsample/downsamplefuncs.cpp @@ -0,0 +1,300 @@ +/*! + * \copy + * Copyright (c) 2008-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * downsample_yuv.c + * + * Abstract + * Implementation for source yuv data downsampling used before spatial encoding. + * + * History + * 10/24/2008 Created + * + *****************************************************************************/ + +#include "downsample.h" + + +WELSVP_NAMESPACE_BEGIN + + +void DyadicBilinearDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, + uint8_t* pSrc, const int32_t kiSrcStride, + const int32_t kiSrcWidth, const int32_t kiSrcHeight) + +{ + uint8_t* pDstLine = pDst; + uint8_t* pSrcLine = pSrc; + const int32_t kiSrcStridex2 = kiSrcStride << 1; + const int32_t kiDstWidth = kiSrcWidth >> 1; + const int32_t kiDstHeight = kiSrcHeight >> 1; + + for (int32_t j = 0; j < kiDstHeight; j ++) { + for (int32_t i = 0; i < kiDstWidth; i ++) { + const int32_t kiSrcX = i << 1; + const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1; + const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1; + + pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1); + } + pDstLine += kiDstStride; + pSrcLine += kiSrcStridex2; + } +} + +void DyadicBilinearQuarterDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, + uint8_t* pSrc, const int32_t kiSrcStride, + const int32_t kiSrcWidth, const int32_t kiSrcHeight) + +{ + uint8_t* pDstLine = pDst; + uint8_t* pSrcLine = pSrc; + const int32_t kiSrcStridex4 = kiSrcStride << 2; + const int32_t kiDstWidth = kiSrcWidth >> 2; + const int32_t kiDstHeight = kiSrcHeight >> 2; + + for (int32_t j = 0; j < kiDstHeight; j ++) { + for (int32_t i = 0; i < kiDstWidth; i ++) { + const int32_t kiSrcX = i << 2; + const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1; + const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1; + + pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1); + } + pDstLine += kiDstStride; + pSrcLine += kiSrcStridex4; + } +} + +void DyadicBilinearOneThirdDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, + uint8_t* pSrc, const int32_t kiSrcStride, + const int32_t kiSrcWidth, const int32_t kiDstHeight) + +{ + uint8_t* pDstLine = pDst; + uint8_t* pSrcLine = pSrc; + const int32_t kiSrcStridex3 = kiSrcStride * 3; + const int32_t kiDstWidth = kiSrcWidth / 3; + + for (int32_t j = 0; j < kiDstHeight; j ++) { + for (int32_t i = 0; i < kiDstWidth; i ++) { + const int32_t kiSrcX = i * 3; + const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1; + const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1; + + pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1); + } + pDstLine += kiDstStride; + pSrcLine += kiSrcStridex3; + } +} + +void GeneralBilinearFastDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, + const int32_t kiDstHeight, + uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { + const uint32_t kuiScaleBitWidth = 16, kuiScaleBitHeight = 15; + const uint32_t kuiScaleWidth = (1 << kuiScaleBitWidth), kuiScaleHeight = (1 << kuiScaleBitHeight); + int32_t fScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth); + int32_t fScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight); + uint32_t x; + int32_t iYInverse, iXInverse; + + uint8_t* pByDst = pDst; + uint8_t* pByLineDst = pDst; + + iYInverse = 1 << (kuiScaleBitHeight - 1); + for (int32_t i = 0; i < kiDstHeight - 1; i++) { + int32_t iYy = iYInverse >> kuiScaleBitHeight; + int32_t fv = iYInverse & (kuiScaleHeight - 1); + + uint8_t* pBySrc = pSrc + iYy * kiSrcStride; + + pByDst = pByLineDst; + iXInverse = 1 << (kuiScaleBitWidth - 1); + for (int32_t j = 0; j < kiDstWidth - 1; j++) { + int32_t iXx = iXInverse >> kuiScaleBitWidth; + int32_t iFu = iXInverse & (kuiScaleWidth - 1); + + uint8_t* pByCurrent = pBySrc + iXx; + uint8_t a, b, c, d; + + a = *pByCurrent; + b = * (pByCurrent + 1); + c = * (pByCurrent + kiSrcStride); + d = * (pByCurrent + kiSrcStride + 1); + + x = (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * a; + x += (((uint32_t) (iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * b; + x += (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (fv) >> kuiScaleBitWidth) * c; + x += (((uint32_t) (iFu)) * (fv) >> kuiScaleBitWidth) * d; + x >>= (kuiScaleBitHeight - 1); + x += 1; + x >>= 1; + //x = (((__int64)(SCALE_BIG - 1 - iFu))*(SCALE_BIG - 1 - fv)*a + ((__int64)iFu)*(SCALE_BIG - 1 -fv)*b + ((__int64)(SCALE_BIG - 1 -iFu))*fv*c + + // ((__int64)iFu)*fv*d + (1 << (2*SCALE_BIT_BIG-1)) ) >> (2*SCALE_BIT_BIG); + x = WELS_CLAMP (x, 0, 255); + *pByDst++ = (uint8_t)x; + + iXInverse += fScalex; + } + *pByDst = * (pBySrc + (iXInverse >> kuiScaleBitWidth)); + pByLineDst += kiDstStride; + iYInverse += fScaley; + } + + // last row special + { + int32_t iYy = iYInverse >> kuiScaleBitHeight; + uint8_t* pBySrc = pSrc + iYy * kiSrcStride; + + pByDst = pByLineDst; + iXInverse = 1 << (kuiScaleBitWidth - 1); + for (int32_t j = 0; j < kiDstWidth; j++) { + int32_t iXx = iXInverse >> kuiScaleBitWidth; + *pByDst++ = * (pBySrc + iXx); + + iXInverse += fScalex; + } + } +} + +void GeneralBilinearAccurateDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, + const int32_t kiDstHeight, + uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { + const int32_t kiScaleBit = 15; + const int32_t kiScale = (1 << kiScaleBit); + int32_t iScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kiScale); + int32_t iScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kiScale); + int64_t x; + int32_t iYInverse, iXInverse; + + uint8_t* pByDst = pDst; + uint8_t* pByLineDst = pDst; + + iYInverse = 1 << (kiScaleBit - 1); + for (int32_t i = 0; i < kiDstHeight - 1; i++) { + int32_t iYy = iYInverse >> kiScaleBit; + int32_t iFv = iYInverse & (kiScale - 1); + + uint8_t* pBySrc = pSrc + iYy * kiSrcStride; + + pByDst = pByLineDst; + iXInverse = 1 << (kiScaleBit - 1); + for (int32_t j = 0; j < kiDstWidth - 1; j++) { + int32_t iXx = iXInverse >> kiScaleBit; + int32_t iFu = iXInverse & (kiScale - 1); + + uint8_t* pByCurrent = pBySrc + iXx; + uint8_t a, b, c, d; + + a = *pByCurrent; + b = * (pByCurrent + 1); + c = * (pByCurrent + kiSrcStride); + d = * (pByCurrent + kiSrcStride + 1); + + x = (((int64_t) (kiScale - 1 - iFu)) * (kiScale - 1 - iFv) * a + ((int64_t)iFu) * (kiScale - 1 - iFv) * b + ((int64_t) ( + kiScale - 1 - iFu)) * iFv * c + + ((int64_t)iFu) * iFv * d + (int64_t) (1 << (2 * kiScaleBit - 1))) >> (2 * kiScaleBit); + x = WELS_CLAMP (x, 0, 255); + *pByDst++ = (uint8_t)x; + + iXInverse += iScalex; + } + *pByDst = * (pBySrc + (iXInverse >> kiScaleBit)); + pByLineDst += kiDstStride; + iYInverse += iScaley; + } + + // last row special + { + int32_t iYy = iYInverse >> kiScaleBit; + uint8_t* pBySrc = pSrc + iYy * kiSrcStride; + + pByDst = pByLineDst; + iXInverse = 1 << (kiScaleBit - 1); + for (int32_t j = 0; j < kiDstWidth; j++) { + int32_t iXx = iXInverse >> kiScaleBit; + *pByDst++ = * (pBySrc + iXx); + + iXInverse += iScalex; + } + } +} + +#if defined(X86_ASM) || defined(HAVE_NEON) || defined(HAVE_NEON_AARCH64) +static void GeneralBilinearDownsamplerWrap (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, + const int32_t kiDstHeight, + uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight, + const int32_t kiScaleBitWidth, const int32_t kiScaleBitHeight, + void (*func) (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, int32_t iDstHeight, + uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, uint32_t uiScaleY)) { + const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight); + + uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth); + uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight); + + func (pDst, kiDstStride, kiDstWidth, kiDstHeight, pSrc, kiSrcStride, uiScalex, uiScaley); +} + +#define DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP(suffix) \ + void GeneralBilinearFastDownsamplerWrap_ ## suffix ( \ + uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \ + uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \ + GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \ + pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 16, 15, GeneralBilinearFastDownsampler_ ## suffix); \ + } + +#define DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP(suffix) \ + void GeneralBilinearAccurateDownsamplerWrap_ ## suffix ( \ + uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \ + uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \ + GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \ + pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 15, 15, GeneralBilinearAccurateDownsampler_ ## suffix); \ + } +#endif + +#ifdef X86_ASM +DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (sse2) +DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse2) +DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (ssse3) +DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse41) +#ifdef HAVE_AVX2 +DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (avx2) +DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (avx2) +#endif +#endif //X86_ASM + +#ifdef HAVE_NEON +DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (neon) +#endif + +#ifdef HAVE_NEON_AARCH64 +DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (AArch64_neon) +#endif +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotate.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotate.cpp new file mode 100644 index 000000000..610a3c8a9 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotate.cpp @@ -0,0 +1,92 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "imagerotate.h" + +WELSVP_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +CImageRotating::CImageRotating (int32_t iCpuFlag) { + m_iCPUFlag = iCpuFlag; + m_eMethod = METHOD_IMAGE_ROTATE; + WelsMemset (&m_pfRotateImage, 0, sizeof (m_pfRotateImage)); + InitImageRotateFuncs (m_pfRotateImage, m_iCPUFlag); +} + +CImageRotating::~CImageRotating() { +} + +void CImageRotating::InitImageRotateFuncs (SImageRotateFuncs& sImageRotateFuncs, int32_t iCpuFlag) { + sImageRotateFuncs.pfImageRotate90D = ImageRotate90D_c; + sImageRotateFuncs.pfImageRotate180D = ImageRotate180D_c; + sImageRotateFuncs.pfImageRotate270D = ImageRotate270D_c; +} +EResult CImageRotating::ProcessImageRotate (int32_t iType, uint8_t* pSrc, uint32_t uiBytesPerPixel, uint32_t iWidth, + uint32_t iHeight, uint8_t* pDst) { + if (iType == 90) { + m_pfRotateImage.pfImageRotate90D (pSrc, uiBytesPerPixel, iWidth, iHeight, pDst); + } else if (iType == 180) { + m_pfRotateImage.pfImageRotate180D (pSrc, uiBytesPerPixel, iWidth, iHeight, pDst); + } else if (iType == 270) { + m_pfRotateImage.pfImageRotate270D (pSrc, uiBytesPerPixel, iWidth, iHeight, pDst); + } else { + return RET_NOTSUPPORTED; + } + return RET_SUCCESS; +} + +EResult CImageRotating::Process (int32_t iType, SPixMap* pSrc, SPixMap* pDst) { + EResult eReturn = RET_INVALIDPARAM; + + if ((pSrc->eFormat == VIDEO_FORMAT_RGBA) || + (pSrc->eFormat == VIDEO_FORMAT_BGRA) || + (pSrc->eFormat == VIDEO_FORMAT_ABGR) || + (pSrc->eFormat == VIDEO_FORMAT_ARGB)) { + eReturn = ProcessImageRotate (iType, (uint8_t*)pSrc->pPixel[0], pSrc->iSizeInBits * 8, pSrc->sRect.iRectWidth, + pSrc->sRect.iRectHeight, (uint8_t*)pDst->pPixel[0]); + } else if (pSrc->eFormat == VIDEO_FORMAT_I420) { + ProcessImageRotate (iType, (uint8_t*)pSrc->pPixel[0], pSrc->iSizeInBits * 8, pSrc->sRect.iRectWidth, + pSrc->sRect.iRectHeight, (uint8_t*)pDst->pPixel[0]); + ProcessImageRotate (iType, (uint8_t*)pSrc->pPixel[1], pSrc->iSizeInBits * 8, (pSrc->sRect.iRectWidth >> 1), + (pSrc->sRect.iRectHeight >> 1), (uint8_t*)pDst->pPixel[1]); + eReturn = ProcessImageRotate (iType, (uint8_t*)pSrc->pPixel[2], pSrc->iSizeInBits * 8, (pSrc->sRect.iRectWidth >> 1), + (pSrc->sRect.iRectHeight >> 1), (uint8_t*)pDst->pPixel[2]); + } else { + eReturn = RET_NOTSUPPORTED; + } + + return eReturn; +} + + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotate.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotate.h new file mode 100644 index 000000000..98619b605 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotate.h @@ -0,0 +1,85 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : downsample.h + * + * \brief : image rotate class of wels video processor class + * + * \date : 2011/04/06 + * + * \description : + * + ************************************************************************************* + */ + +#ifndef WELSVP_IMAGEROTATE_H +#define WELSVP_IMAGEROTATE_H + +#include "util.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" + +WELSVP_NAMESPACE_BEGIN + +typedef void (ImageRotateFunc) (uint8_t* pSrc, uint32_t uiBytesPerPixel, uint32_t iWidth, uint32_t iHeight, + uint8_t* pDst); + +typedef ImageRotateFunc* ImageRotateFuncPtr; + +ImageRotateFunc ImageRotate90D_c; +ImageRotateFunc ImageRotate180D_c; +ImageRotateFunc ImageRotate270D_c; + +typedef struct { + ImageRotateFuncPtr pfImageRotate90D; + ImageRotateFuncPtr pfImageRotate180D; + ImageRotateFuncPtr pfImageRotate270D; +} SImageRotateFuncs; + +class CImageRotating : public IStrategy { + public: + CImageRotating (int32_t iCpuFlag); + ~CImageRotating(); + + EResult Process (int32_t iType, SPixMap* pSrc, SPixMap* pDst); + + private: + void InitImageRotateFuncs (SImageRotateFuncs& pf, int32_t iCpuFlag); + EResult ProcessImageRotate (int32_t iType, uint8_t* pSrc, uint32_t uiBytesPerPixel, uint32_t iWidth, uint32_t iHeight, + uint8_t* pDst); + + private: + SImageRotateFuncs m_pfRotateImage; + int32_t m_iCPUFlag; +}; + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotatefuncs.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotatefuncs.cpp new file mode 100644 index 000000000..e4a72fdfe --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/imagerotate/imagerotatefuncs.cpp @@ -0,0 +1,65 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * image_rotate.c + * + * Created on 11-2-21. + * + */ + +#include "imagerotate.h" + +WELSVP_NAMESPACE_BEGIN + +void ImageRotate90D_c (uint8_t* pSrc, uint32_t uiBytesPerPixel, uint32_t iWidth, uint32_t iHeight, uint8_t* pDst) { + for (uint32_t j = 0; j < iHeight; j++) { + for (uint32_t i = 0; i < iWidth; i++) { + for (uint32_t n = 0; n < uiBytesPerPixel; n++) + pDst[ (i * iHeight + iHeight - 1 - j)*uiBytesPerPixel + n] = pSrc[ (iWidth * j + i) * uiBytesPerPixel + n]; + } + } +} +void ImageRotate180D_c (uint8_t* pSrc, uint32_t uiBytesPerPixel, uint32_t iWidth, uint32_t iHeight, uint8_t* pDst) { + for (uint32_t j = 0; j < iHeight; j++) { + for (uint32_t i = 0; i < iWidth; i++) { + for (uint32_t n = 0; n < uiBytesPerPixel; n++) + pDst[ ((iHeight - 1 - j)*iWidth + iWidth - 1 - i)*uiBytesPerPixel + n] = pSrc[ (iWidth * j + i) * uiBytesPerPixel + n]; + } + } +} +void ImageRotate270D_c (uint8_t* pSrc, uint32_t uiBytesPerPixel, uint32_t iWidth, uint32_t iHeight, uint8_t* pDst) { + for (uint32_t j = 0; j < iWidth; j++) { + for (uint32_t i = 0; i < iHeight; i++) { + for (uint32_t n = 0; n < uiBytesPerPixel; n++) + pDst[ ((iWidth - 1 - j)*iHeight + i)*uiBytesPerPixel + n] = pSrc[ (iWidth * i + j) * uiBytesPerPixel + n]; + } + } +} +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.cpp new file mode 100644 index 000000000..59d19eb58 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.cpp @@ -0,0 +1,53 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "SceneChangeDetection.h" +#include "cpu.h" + +WELSVP_NAMESPACE_BEGIN + +IStrategy* BuildSceneChangeDetection (EMethods eMethod, int32_t iCpuFlag) { + switch (eMethod) { + case METHOD_SCENE_CHANGE_DETECTION_VIDEO: + return new CSceneChangeDetection (eMethod, iCpuFlag); + break; + case METHOD_SCENE_CHANGE_DETECTION_SCREEN: + return new CSceneChangeDetection (eMethod, iCpuFlag); + break; + default: + // not support yet + return NULL; + } +} + +WELSVP_NAMESPACE_END + diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h new file mode 100644 index 000000000..78c225ee7 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h @@ -0,0 +1,270 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : SceneChangeDetection.h + * + * \brief : scene change detection class of wels video processor class + * + * \date : 2011/03/14 + * + * \description : 1. rewrite the package code of scene change detection class + * + ************************************************************************************* + */ + +#ifndef WELSVP_SCENECHANGEDETECTION_H +#define WELSVP_SCENECHANGEDETECTION_H + +#include "util.h" +#include "memory.h" +#include "cpu.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" +#include "common.h" + +#define HIGH_MOTION_BLOCK_THRESHOLD 320 +#define SCENE_CHANGE_MOTION_RATIO_LARGE_VIDEO 0.85f +#define SCENE_CHANGE_MOTION_RATIO_MEDIUM 0.50f +#define SCENE_CHANGE_MOTION_RATIO_LARGE_SCREEN 0.80f + +WELSVP_NAMESPACE_BEGIN + +typedef struct { + int32_t iWidth; + int32_t iHeight; + int32_t iBlock8x8Width; + int32_t iBlock8x8Height; + uint8_t* pRefY; + uint8_t* pCurY; + int32_t iRefStride; + int32_t iCurStride; + uint8_t* pStaticBlockIdc; +} SLocalParam; + +class CSceneChangeDetectorVideo { + public: + CSceneChangeDetectorVideo (SSceneChangeResult& sParam, int32_t iCpuFlag) : m_sParam (sParam) { + m_pfSad = WelsSampleSad8x8_c; +#ifdef X86_ASM + if (iCpuFlag & WELS_CPU_SSE2) { + m_pfSad = WelsSampleSad8x8_sse21; + } +#endif +#ifdef HAVE_NEON + if (iCpuFlag & WELS_CPU_NEON) { + m_pfSad = WelsProcessingSampleSad8x8_neon; + } +#endif + +#ifdef HAVE_NEON_AARCH64 + if (iCpuFlag & WELS_CPU_NEON) { + m_pfSad = WelsProcessingSampleSad8x8_AArch64_neon; + } +#endif + +#ifdef HAVE_MMI + if (iCpuFlag & WELS_CPU_MMI) { + m_pfSad = WelsSampleSad8x8_mmi; + } +#endif + + m_fSceneChangeMotionRatioLarge = SCENE_CHANGE_MOTION_RATIO_LARGE_VIDEO; + m_fSceneChangeMotionRatioMedium = SCENE_CHANGE_MOTION_RATIO_MEDIUM; + } + virtual ~CSceneChangeDetectorVideo() { + } + void operator() (SLocalParam& sLocalParam) { + int32_t iRefRowStride = 0, iCurRowStride = 0; + uint8_t* pRefY = sLocalParam.pRefY; + uint8_t* pCurY = sLocalParam.pCurY; + uint8_t* pRefTmp = NULL, *pCurTmp = NULL; + + iRefRowStride = sLocalParam.iRefStride << 3; + iCurRowStride = sLocalParam.iCurStride << 3; + + for (int32_t j = 0; j < sLocalParam.iBlock8x8Height; j++) { + pRefTmp = pRefY; + pCurTmp = pCurY; + for (int32_t i = 0; i < sLocalParam.iBlock8x8Width; i++) { + int32_t iSad = m_pfSad (pCurTmp, sLocalParam.iCurStride, pRefTmp, sLocalParam.iRefStride); + m_sParam.iMotionBlockNum += iSad > HIGH_MOTION_BLOCK_THRESHOLD; + pRefTmp += 8; + pCurTmp += 8; + } + pRefY += iRefRowStride; + pCurY += iCurRowStride; + } + } + float GetSceneChangeMotionRatioLarge() const { + return m_fSceneChangeMotionRatioLarge; + } + float GetSceneChangeMotionRatioMedium() const { + return m_fSceneChangeMotionRatioMedium; + } + protected: + SadFuncPtr m_pfSad; + SSceneChangeResult& m_sParam; + float m_fSceneChangeMotionRatioLarge; + float m_fSceneChangeMotionRatioMedium; +}; + +class CSceneChangeDetectorScreen : public CSceneChangeDetectorVideo { + public: + CSceneChangeDetectorScreen (SSceneChangeResult& sParam, int32_t iCpuFlag) : CSceneChangeDetectorVideo (sParam, + iCpuFlag) { + m_fSceneChangeMotionRatioLarge = SCENE_CHANGE_MOTION_RATIO_LARGE_SCREEN; + m_fSceneChangeMotionRatioMedium = SCENE_CHANGE_MOTION_RATIO_MEDIUM; + } + virtual ~CSceneChangeDetectorScreen() { + } + void operator() (SLocalParam& sLocalParam) { + bool bScrollDetectFlag = m_sParam.sScrollResult.bScrollDetectFlag; + int32_t iScrollMvX = m_sParam.sScrollResult.iScrollMvX; + int32_t iScrollMvY = m_sParam.sScrollResult.iScrollMvY; + + int32_t iRefRowStride = 0, iCurRowStride = 0; + uint8_t* pRefY = sLocalParam.pRefY; + uint8_t* pCurY = sLocalParam.pCurY; + uint8_t* pRefTmp = NULL, *pCurTmp = NULL; + int32_t iWidth = sLocalParam.iWidth; + int32_t iHeight = sLocalParam.iHeight; + + iRefRowStride = sLocalParam.iRefStride << 3; + iCurRowStride = sLocalParam.iCurStride << 3; + + for (int32_t j = 0; j < sLocalParam.iBlock8x8Height; j++) { + pRefTmp = pRefY; + pCurTmp = pCurY; + for (int32_t i = 0; i < sLocalParam.iBlock8x8Width; i++) { + int32_t iBlockPointX = i << 3; + int32_t iBlockPointY = j << 3; + uint8_t uiBlockIdcTmp = NO_STATIC; + int32_t iSad = m_pfSad (pCurTmp, sLocalParam.iCurStride, pRefTmp, sLocalParam.iRefStride); + if (iSad == 0) { + uiBlockIdcTmp = COLLOCATED_STATIC; + } else if (bScrollDetectFlag && (!iScrollMvX || !iScrollMvY) && (iBlockPointX + iScrollMvX >= 0) + && (iBlockPointX + iScrollMvX <= iWidth - 8) && + (iBlockPointY + iScrollMvY >= 0) && (iBlockPointY + iScrollMvY <= iHeight - 8)) { + uint8_t* pRefTmpScroll = pRefTmp + iScrollMvY * sLocalParam.iRefStride + iScrollMvX; + int32_t iSadScroll = m_pfSad (pCurTmp, sLocalParam.iCurStride, pRefTmpScroll, sLocalParam.iRefStride); + + if (iSadScroll == 0) { + uiBlockIdcTmp = SCROLLED_STATIC; + } else { + m_sParam.iFrameComplexity += iSad; + m_sParam.iMotionBlockNum += iSad > HIGH_MOTION_BLOCK_THRESHOLD; + } + } else { + m_sParam.iFrameComplexity += iSad; + m_sParam.iMotionBlockNum += iSad > HIGH_MOTION_BLOCK_THRESHOLD; + } + * (sLocalParam.pStaticBlockIdc) ++ = uiBlockIdcTmp; + pRefTmp += 8; + pCurTmp += 8; + } + pRefY += iRefRowStride; + pCurY += iCurRowStride; + } + } +}; + +template +class CSceneChangeDetection : public IStrategy { + public: + CSceneChangeDetection (EMethods eMethod, int32_t iCpuFlag): m_cDetector (m_sSceneChangeParam, iCpuFlag) { + m_eMethod = eMethod; + WelsMemset (&m_sSceneChangeParam, 0, sizeof (m_sSceneChangeParam)); + } + + ~CSceneChangeDetection() { + } + + EResult Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + EResult eReturn = RET_INVALIDPARAM; + + m_sLocalParam.iWidth = pSrcPixMap->sRect.iRectWidth; + m_sLocalParam.iHeight = pSrcPixMap->sRect.iRectHeight; + m_sLocalParam.iBlock8x8Width = m_sLocalParam.iWidth >> 3; + m_sLocalParam.iBlock8x8Height = m_sLocalParam.iHeight >> 3; + m_sLocalParam.pRefY = (uint8_t*)pRefPixMap->pPixel[0]; + m_sLocalParam.pCurY = (uint8_t*)pSrcPixMap->pPixel[0]; + m_sLocalParam.iRefStride = pRefPixMap->iStride[0]; + m_sLocalParam.iCurStride = pSrcPixMap->iStride[0]; + m_sLocalParam.pStaticBlockIdc = m_sSceneChangeParam.pStaticBlockIdc; + + int32_t iBlock8x8Num = m_sLocalParam.iBlock8x8Width * m_sLocalParam.iBlock8x8Height; + int32_t iSceneChangeThresholdLarge = WelsStaticCast (int32_t, + m_cDetector.GetSceneChangeMotionRatioLarge() * iBlock8x8Num + 0.5f + PESN); + int32_t iSceneChangeThresholdMedium = WelsStaticCast (int32_t, + m_cDetector.GetSceneChangeMotionRatioMedium() * iBlock8x8Num + 0.5f + PESN); + + m_sSceneChangeParam.iMotionBlockNum = 0; + m_sSceneChangeParam.iFrameComplexity = 0; + m_sSceneChangeParam.eSceneChangeIdc = SIMILAR_SCENE; + + m_cDetector (m_sLocalParam); + + if (m_sSceneChangeParam.iMotionBlockNum >= iSceneChangeThresholdLarge) { + m_sSceneChangeParam.eSceneChangeIdc = LARGE_CHANGED_SCENE; + } else if (m_sSceneChangeParam.iMotionBlockNum >= iSceneChangeThresholdMedium) { + m_sSceneChangeParam.eSceneChangeIdc = MEDIUM_CHANGED_SCENE; + } + + eReturn = RET_SUCCESS; + + return eReturn; + } + + EResult Get (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + * (SSceneChangeResult*)pParam = m_sSceneChangeParam; + return RET_SUCCESS; + } + + EResult Set (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + m_sSceneChangeParam = * (SSceneChangeResult*)pParam; + return RET_SUCCESS; + } + private: + SSceneChangeResult m_sSceneChangeParam; + SLocalParam m_sLocalParam; + T m_cDetector; +}; + +IStrategy* BuildSceneChangeDetection (EMethods eMethod, int32_t iCpuFlag); + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetection.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetection.cpp new file mode 100644 index 000000000..d067e8e64 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetection.cpp @@ -0,0 +1,115 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#include "ScrollDetection.h" +#include "ScrollDetectionFuncs.h" +#include "cpu.h" + +WELSVP_NAMESPACE_BEGIN + +EResult CScrollDetection::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + if (pRefPixMap->pPixel[0] == NULL || pSrcPixMap->pPixel[0] == NULL || + pRefPixMap->sRect.iRectWidth != pSrcPixMap->sRect.iRectWidth + || pRefPixMap->sRect.iRectHeight != pSrcPixMap->sRect.iRectHeight) { + return RET_INVALIDPARAM; + } + + if (!m_sScrollDetectionParam.bMaskInfoAvailable) + ScrollDetectionWithoutMask (pSrcPixMap, pRefPixMap); + else + ScrollDetectionWithMask (pSrcPixMap, pRefPixMap); + + return RET_SUCCESS; +} + +EResult CScrollDetection::Set (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + m_sScrollDetectionParam = * ((SScrollDetectionParam*)pParam); + return RET_SUCCESS; +} + +EResult CScrollDetection::Get (int32_t iType, void* pParam) { + if (pParam == NULL) { + return RET_INVALIDPARAM; + } + * ((SScrollDetectionParam*)pParam) = m_sScrollDetectionParam; + return RET_SUCCESS; +} + +void CScrollDetection::ScrollDetectionWithMask (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + int32_t iStartX, iStartY, iWidth, iHeight; + + iStartX = m_sScrollDetectionParam.sMaskRect.iRectLeft; + iStartY = m_sScrollDetectionParam.sMaskRect.iRectTop; + iWidth = m_sScrollDetectionParam.sMaskRect.iRectWidth; + iHeight = m_sScrollDetectionParam.sMaskRect.iRectHeight; + + iWidth /= 2; + iStartX += iWidth / 2; + + m_sScrollDetectionParam.iScrollMvX = 0; + m_sScrollDetectionParam.iScrollMvY = 0; + m_sScrollDetectionParam.bScrollDetectFlag = false; + + if (iStartX >= 0 && iWidth > MINIMUM_DETECT_WIDTH && iHeight > 2 * CHECK_OFFSET) { + ScrollDetectionCore (pSrcPixMap, pRefPixMap, iWidth, iHeight, iStartX, iStartY, m_sScrollDetectionParam); + } +} + +void CScrollDetection::ScrollDetectionWithoutMask (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + int32_t iStartX, iStartY, iWidth, iHeight; + + const int32_t kiPicBorderWidth = pSrcPixMap->sRect.iRectHeight >> 4; + const int32_t kiRegionWidth = (int) (pSrcPixMap->sRect.iRectWidth - (kiPicBorderWidth << 1)) / 3; + const int32_t kiRegionHeight = (pSrcPixMap->sRect.iRectHeight * 7) >> 3; + const int32_t kiHieghtStride = (int) pSrcPixMap->sRect.iRectHeight * 5 / 24; + + for (int32_t i = 0; i < REGION_NUMBER; i++) { + iStartX = kiPicBorderWidth + (i % 3) * kiRegionWidth; + iStartY = -pSrcPixMap->sRect.iRectHeight * 7 / 48 + (int) (i / 3) * (kiHieghtStride); + iWidth = kiRegionWidth; + iHeight = kiRegionHeight; + + iWidth /= 2; + iStartX += iWidth / 2; + + ScrollDetectionCore (pSrcPixMap, pRefPixMap, iWidth, iHeight, iStartX, iStartY, m_sScrollDetectionParam); + + if (m_sScrollDetectionParam.bScrollDetectFlag && m_sScrollDetectionParam.iScrollMvY) + break; + } +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetection.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetection.h new file mode 100644 index 000000000..758b149ad --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetection.h @@ -0,0 +1,68 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : ScrollDectection.h + * + * \brief : scroll detection class of wels video processor class + * + * \date : 2011/04/26 + * + * \description : rewrite the package code of scroll detection class + * + ************************************************************************************* + */ + +#include "util.h" +#include "memory.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" + +WELSVP_NAMESPACE_BEGIN + +class CScrollDetection : public IStrategy { + public: + CScrollDetection (int32_t iCpuFlag) { + m_eMethod = METHOD_SCROLL_DETECTION; + WelsMemset (&m_sScrollDetectionParam, 0, sizeof (m_sScrollDetectionParam)); + } + ~CScrollDetection() { + } + EResult Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap); + EResult Set (int32_t iType, void* pParam); + EResult Get (int32_t iType, void* pParam); + + private: + void ScrollDetectionWithMask (SPixMap* pSrcPixMap, SPixMap* pRefPixMap); + void ScrollDetectionWithoutMask (SPixMap* pSrcPixMap, SPixMap* pRefPixMap); + private: + SScrollDetectionParam m_sScrollDetectionParam; +}; + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetectionFuncs.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetectionFuncs.cpp new file mode 100644 index 000000000..e7c9d2ced --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetectionFuncs.cpp @@ -0,0 +1,200 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "ScrollDetection.h" +#include "ScrollDetectionFuncs.h" +#include "ls_defines.h" + +WELSVP_NAMESPACE_BEGIN + +int32_t CheckLine (uint8_t* pData, int32_t iWidth) { + int32_t iQualified = 0; + int32_t iColorMap[8] = {0}; + int32_t iChangedTimes = 0; + int32_t iColorCounts = 0; + + RECORD_COLOR (pData[0], iColorMap); + + for (int32_t i = 1; i < iWidth; i++) { + RECORD_COLOR (pData[i], iColorMap); + iChangedTimes += (pData[i] != pData[i - 1]); + } + for (int32_t i = 0; i < 8; i++) + for (int32_t j = 0; j < 32; j++) + iColorCounts += ((iColorMap[i] >> j) & 1); + + switch (iColorCounts) { + case 1: + iQualified = 0; + break; + case 2: + case 3: + iQualified = (iChangedTimes > 3); + break; + default: + iQualified = 1; + break; + } + return iQualified; +} + +int32_t SelectTestLine (uint8_t* pY, int32_t iWidth, int32_t iHeight, int32_t iPicHeight, + int32_t iStride, int32_t iOffsetX, int32_t iOffsetY) { + const int32_t kiHalfHeight = iHeight >> 1; + const int32_t kiMidPos = iOffsetY + kiHalfHeight; + int32_t TestPos = kiMidPos; + int32_t iOffsetAbs; + uint8_t* pTmp; + + for (iOffsetAbs = 0; iOffsetAbs < kiHalfHeight; iOffsetAbs++) { + TestPos = kiMidPos + iOffsetAbs; + if (TestPos < iPicHeight) { + pTmp = pY + TestPos * iStride + iOffsetX; + if (CheckLine (pTmp, iWidth)) break; + } + TestPos = kiMidPos - iOffsetAbs; + if (TestPos >= 0) { + pTmp = pY + TestPos * iStride + iOffsetX; + if (CheckLine (pTmp, iWidth)) break; + } + } + if (iOffsetAbs == kiHalfHeight) + TestPos = -1; + return TestPos; +} + +/* + * compare pixel line between previous and current one + * return: 0 for totally equal, otherwise 1 + */ +int32_t CompareLine (uint8_t* pYSrc, uint8_t* pYRef, const int32_t kiWidth) { + int32_t iCmp = 1; + + if (LD32 (pYSrc) != LD32 (pYRef)) return 1; + if (LD32 (pYSrc + 4) != LD32 (pYRef + 4)) return 1; + if (LD32 (pYSrc + 8) != LD32 (pYRef + 8)) return 1; + if (kiWidth > 12) + iCmp = WelsMemcmp (pYSrc + 12, pYRef + 12, kiWidth - 12); + return iCmp; +} + +void ScrollDetectionCore (SPixMap* pSrcPixMap, SPixMap* pRefPixMap, int32_t iWidth, int32_t iHeight, + int32_t iOffsetX, int32_t iOffsetY, SScrollDetectionParam& sScrollDetectionParam) { + bool bScrollDetected = 0; + uint8_t* pYLine; + uint8_t* pYTmp; + int32_t iTestPos, iSearchPos = 0, iOffsetAbs, iMaxAbs; + int32_t iPicHeight = pRefPixMap->sRect.iRectHeight; + int32_t iMinHeight = WELS_MAX (iOffsetY, 0); + int32_t iMaxHeight = WELS_MIN (iOffsetY + iHeight - 1, iPicHeight - 1) ; //offset_y + height - 1;// + uint8_t* pYRef, *pYSrc; + int32_t iYStride; + + pYRef = (uint8_t*)pRefPixMap->pPixel[0]; + pYSrc = (uint8_t*)pSrcPixMap->pPixel[0]; + iYStride = pRefPixMap->iStride[0]; + + iTestPos = SelectTestLine (pYSrc, iWidth, iHeight, iPicHeight, iYStride, iOffsetX, iOffsetY); + + if (iTestPos == -1) { + sScrollDetectionParam.bScrollDetectFlag = 0; + return; + } + pYLine = pYSrc + iYStride * iTestPos + iOffsetX; + iMaxAbs = WELS_MIN (WELS_MAX (iTestPos - iMinHeight - 1, iMaxHeight - iTestPos), MAX_SCROLL_MV_Y); + iSearchPos = iTestPos; + for (iOffsetAbs = 0; iOffsetAbs <= iMaxAbs; iOffsetAbs++) { + iSearchPos = iTestPos + iOffsetAbs; + if (iSearchPos <= iMaxHeight) { + pYTmp = pYRef + iSearchPos * iYStride + iOffsetX; + if (!CompareLine (pYLine, pYTmp, iWidth)) { + uint8_t* pYUpper, *pYLineUpper; + int32_t iCheckedLines; + int32_t iLowOffset = WELS_MIN (iMaxHeight - iSearchPos, CHECK_OFFSET); + int32_t i; + + iCheckedLines = WELS_MIN (iTestPos - iMinHeight + iLowOffset, 2 * CHECK_OFFSET); + pYUpper = pYTmp - (iCheckedLines - iLowOffset) * iYStride; + pYLineUpper = pYLine - (iCheckedLines - iLowOffset) * iYStride; + + for (i = 0; i < iCheckedLines; i ++) { + if (CompareLine (pYLineUpper, pYUpper, iWidth)) { + break; + } + pYUpper += iYStride; + pYLineUpper += iYStride; + } + if (i == iCheckedLines) { + bScrollDetected = 1; + break; + } + } + } + + iSearchPos = iTestPos - iOffsetAbs - 1; + if (iSearchPos >= iMinHeight) { + pYTmp = pYRef + iSearchPos * iYStride + iOffsetX; + if (!CompareLine (pYLine, pYTmp, iWidth)) { + uint8_t* pYUpper, *pYLineUpper; + int32_t iCheckedLines; + int32_t iUpOffset = WELS_MIN (iSearchPos - iMinHeight, CHECK_OFFSET); + int32_t i; + + pYUpper = pYTmp - iUpOffset * iYStride; + pYLineUpper = pYLine - iUpOffset * iYStride; + iCheckedLines = WELS_MIN (iMaxHeight - iTestPos + iUpOffset, 2 * CHECK_OFFSET); + + for (i = 0; i < iCheckedLines; i ++) { + if (CompareLine (pYLineUpper, pYUpper, iWidth)) { + break; + } + pYUpper += iYStride; + pYLineUpper += iYStride; + } + if (i == iCheckedLines) { + bScrollDetected = 1; + break; + } + } + } + } + + if (!bScrollDetected) { + sScrollDetectionParam.bScrollDetectFlag = 0; + } else { + sScrollDetectionParam.bScrollDetectFlag = 1; + sScrollDetectionParam.iScrollMvY = iSearchPos - iTestPos; // pre_pos - cur_pos, change to mv + sScrollDetectionParam.iScrollMvX = 0; + } +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetectionFuncs.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetectionFuncs.h new file mode 100644 index 000000000..27d7dfc90 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetectionFuncs.h @@ -0,0 +1,61 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : ScrollDetectionFuncs.h + * + * \brief : scroll detection class of wels video processor class + * + * \date : 2011/04/26 + * + * \description : rewrite the package code of scroll detection class + * + ************************************************************************************* + */ + +WELSVP_NAMESPACE_BEGIN + +#define MINIMUM_DETECT_WIDTH 50 // no less than 16 +#define CHECK_OFFSET 25 +#define MAX_SCROLL_MV_Y 511 +#define REGION_NUMBER 9 +#define RECORD_COLOR(a, x) \ +{ \ + int32_t _t = (uint8_t)(a); \ + x[_t>>5] |= (1 << (_t&31)); \ +} + +int32_t CheckLine (uint8_t* pData, int32_t iWidth); +int32_t SelectTestLine (uint8_t* pY, int32_t iWidth, int32_t iHeight, int32_t iPicHeight, + int32_t iStride, int32_t iOffsetX, int32_t iOffsetY); +int32_t CompareLine (uint8_t* pYSrc, uint8_t* pYRef, const int32_t kiWidth); +void ScrollDetectionCore (SPixMap* pSrcPixMap, SPixMap* pRefPixMap, int32_t iWidth, int32_t iHeight, + int32_t iOffsetX, int32_t iOffsetY, SScrollDetectionParam& sScrollDetectionParam); + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalcfuncs.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalcfuncs.cpp new file mode 100644 index 000000000..4aabda273 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalcfuncs.cpp @@ -0,0 +1,598 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "util.h" + +WELSVP_NAMESPACE_BEGIN + +void VAACalcSadSsd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16) { + const uint8_t* tmp_ref = pRefData; + const uint8_t* tmp_cur = pCurData; + int32_t iMbWidth = (iPicWidth >> 4); + int32_t mb_height = (iPicHeight >> 4); + int32_t mb_index = 0; + int32_t pic_stride_x8 = iPicStride << 3; + int32_t step = (iPicStride << 4) - iPicWidth; + + *pFrameSad = 0; + for (int32_t i = 0; i < mb_height; i ++) { + for (int32_t j = 0; j < iMbWidth; j ++) { + int32_t k, l; + int32_t l_sad, l_sqdiff, l_sum, l_sqsum; + const uint8_t* tmp_cur_row; + const uint8_t* tmp_ref_row; + + pSum16x16[mb_index] = 0; + psqsum16x16[mb_index] = 0; + psqdiff16x16[mb_index] = 0; + + l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur; + tmp_ref_row = tmp_ref; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sqdiff += diff * diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 0] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + + l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + 8; + tmp_ref_row = tmp_ref + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sqdiff += diff * diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 1] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + + l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + pic_stride_x8; + tmp_ref_row = tmp_ref + pic_stride_x8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sqdiff += diff * diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 2] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + + l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + pic_stride_x8 + 8; + tmp_ref_row = tmp_ref + pic_stride_x8 + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sqdiff += diff * diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 3] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + + + tmp_ref += 16; + tmp_cur += 16; + ++mb_index; + } + tmp_ref += step; + tmp_cur += step; + } +} +void VAACalcSadVar_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16) { + const uint8_t* tmp_ref = pRefData; + const uint8_t* tmp_cur = pCurData; + int32_t iMbWidth = (iPicWidth >> 4); + int32_t mb_height = (iPicHeight >> 4); + int32_t mb_index = 0; + int32_t pic_stride_x8 = iPicStride << 3; + int32_t step = (iPicStride << 4) - iPicWidth; + + *pFrameSad = 0; + for (int32_t i = 0; i < mb_height; i ++) { + for (int32_t j = 0; j < iMbWidth; j ++) { + int32_t k, l; + int32_t l_sad, l_sum, l_sqsum; + const uint8_t* tmp_cur_row; + const uint8_t* tmp_ref_row; + + pSum16x16[mb_index] = 0; + psqsum16x16[mb_index] = 0; + + l_sad = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur; + tmp_ref_row = tmp_ref; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 0] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + + l_sad = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + 8; + tmp_ref_row = tmp_ref + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 1] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + + l_sad = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + pic_stride_x8; + tmp_ref_row = tmp_ref + pic_stride_x8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 2] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + + l_sad = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + pic_stride_x8 + 8; + tmp_ref_row = tmp_ref + pic_stride_x8 + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 3] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + + + tmp_ref += 16; + tmp_cur += 16; + ++mb_index; + } + tmp_ref += step; + tmp_cur += step; + } +} + + +void VAACalcSad_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8) { + const uint8_t* tmp_ref = pRefData; + const uint8_t* tmp_cur = pCurData; + int32_t iMbWidth = (iPicWidth >> 4); + int32_t mb_height = (iPicHeight >> 4); + int32_t mb_index = 0; + int32_t pic_stride_x8 = iPicStride << 3; + int32_t step = (iPicStride << 4) - iPicWidth; + + *pFrameSad = 0; + for (int32_t i = 0; i < mb_height; i ++) { + for (int32_t j = 0; j < iMbWidth; j ++) { + int32_t k, l; + int32_t l_sad; + const uint8_t* tmp_cur_row; + const uint8_t* tmp_ref_row; + + l_sad = 0; + tmp_cur_row = tmp_cur; + tmp_ref_row = tmp_ref; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 0] = l_sad; + + l_sad = 0; + tmp_cur_row = tmp_cur + 8; + tmp_ref_row = tmp_ref + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 1] = l_sad; + + l_sad = 0; + tmp_cur_row = tmp_cur + pic_stride_x8; + tmp_ref_row = tmp_ref + pic_stride_x8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 2] = l_sad; + + l_sad = 0; + tmp_cur_row = tmp_cur + pic_stride_x8 + 8; + tmp_ref_row = tmp_ref + pic_stride_x8 + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]); + l_sad += diff; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 3] = l_sad; + + tmp_ref += 16; + tmp_cur += 16; + ++mb_index; + } + tmp_ref += step; + tmp_cur += step; + } +} + +void VAACalcSadSsdBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16, int32_t* pSd8x8, + uint8_t* pMad8x8) + +{ + const uint8_t* tmp_ref = pRefData; + const uint8_t* tmp_cur = pCurData; + int32_t iMbWidth = (iPicWidth >> 4); + int32_t mb_height = (iPicHeight >> 4); + int32_t mb_index = 0; + int32_t pic_stride_x8 = iPicStride << 3; + int32_t step = (iPicStride << 4) - iPicWidth; + + *pFrameSad = 0; + for (int32_t i = 0; i < mb_height; i ++) { + for (int32_t j = 0; j < iMbWidth; j ++) { + int32_t k, l; + int32_t l_sad, l_sqdiff, l_sum, l_sqsum, l_sd, l_mad; + const uint8_t* tmp_cur_row; + const uint8_t* tmp_ref_row; + + pSum16x16[mb_index] = 0; + psqsum16x16[mb_index] = 0; + psqdiff16x16[mb_index] = 0; + + l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur; + tmp_ref_row = tmp_ref; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + + l_sd += diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + l_sad += abs_diff; + l_sqdiff += abs_diff * abs_diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 0] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + pSd8x8[ (mb_index << 2) + 0] = l_sd; + pMad8x8[ (mb_index << 2) + 0] = l_mad; + + + l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + 8; + tmp_ref_row = tmp_ref + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + + l_sd += diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + l_sad += abs_diff; + l_sqdiff += abs_diff * abs_diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 1] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + pSd8x8[ (mb_index << 2) + 1] = l_sd; + pMad8x8[ (mb_index << 2) + 1] = l_mad; + + l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + pic_stride_x8; + tmp_ref_row = tmp_ref + pic_stride_x8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + + l_sd += diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + l_sad += abs_diff; + l_sqdiff += abs_diff * abs_diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 2] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + pSd8x8[ (mb_index << 2) + 2] = l_sd; + pMad8x8[ (mb_index << 2) + 2] = l_mad; + + l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0; + tmp_cur_row = tmp_cur + pic_stride_x8 + 8; + tmp_ref_row = tmp_ref + pic_stride_x8 + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + + l_sd += diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + l_sad += abs_diff; + l_sqdiff += abs_diff * abs_diff; + l_sum += tmp_cur_row[l]; + l_sqsum += tmp_cur_row[l] * tmp_cur_row[l]; + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 3] = l_sad; + pSum16x16[mb_index] += l_sum; + psqsum16x16[mb_index] += l_sqsum; + psqdiff16x16[mb_index] += l_sqdiff; + pSd8x8[ (mb_index << 2) + 3] = l_sd; + pMad8x8[ (mb_index << 2) + 3] = l_mad; + + tmp_ref += 16; + tmp_cur += 16; + ++mb_index; + } + tmp_ref += step; + tmp_cur += step; + } +} + +void VAACalcSadBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8) { + const uint8_t* tmp_ref = pRefData; + const uint8_t* tmp_cur = pCurData; + int32_t iMbWidth = (iPicWidth >> 4); + int32_t mb_height = (iPicHeight >> 4); + int32_t mb_index = 0; + int32_t pic_stride_x8 = iPicStride << 3; + int32_t step = (iPicStride << 4) - iPicWidth; + + *pFrameSad = 0; + for (int32_t i = 0; i < mb_height; i ++) { + for (int32_t j = 0; j < iMbWidth; j ++) { + int32_t k, l; + int32_t l_sad, l_sd, l_mad; + const uint8_t* tmp_cur_row; + const uint8_t* tmp_ref_row; + + l_mad = l_sd = l_sad = 0; + tmp_cur_row = tmp_cur; + tmp_ref_row = tmp_ref; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + l_sd += diff; + l_sad += abs_diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 0] = l_sad; + pSd8x8[ (mb_index << 2) + 0] = l_sd; + pMad8x8[ (mb_index << 2) + 0] = l_mad; + + l_mad = l_sd = l_sad = 0; + tmp_cur_row = tmp_cur + 8; + tmp_ref_row = tmp_ref + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + l_sd += diff; + l_sad += abs_diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 1] = l_sad; + pSd8x8[ (mb_index << 2) + 1] = l_sd; + pMad8x8[ (mb_index << 2) + 1] = l_mad; + + l_mad = l_sd = l_sad = 0; + tmp_cur_row = tmp_cur + pic_stride_x8; + tmp_ref_row = tmp_ref + pic_stride_x8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + l_sd += diff; + l_sad += abs_diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 2] = l_sad; + pSd8x8[ (mb_index << 2) + 2] = l_sd; + pMad8x8[ (mb_index << 2) + 2] = l_mad; + + l_mad = l_sd = l_sad = 0; + tmp_cur_row = tmp_cur + pic_stride_x8 + 8; + tmp_ref_row = tmp_ref + pic_stride_x8 + 8; + for (k = 0; k < 8; k ++) { + for (l = 0; l < 8; l ++) { + int32_t diff = tmp_cur_row[l] - tmp_ref_row[l]; + int32_t abs_diff = WELS_ABS (diff); + l_sd += diff; + l_sad += abs_diff; + if (abs_diff > l_mad) { + l_mad = abs_diff; + } + } + tmp_cur_row += iPicStride; + tmp_ref_row += iPicStride; + } + *pFrameSad += l_sad; + pSad8x8[ (mb_index << 2) + 3] = l_sad; + pSd8x8[ (mb_index << 2) + 3] = l_sd; + pMad8x8[ (mb_index << 2) + 3] = l_mad; + + tmp_ref += 16; + tmp_cur += 16; + ++mb_index; + } + tmp_ref += step; + tmp_cur += step; + } +} + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalculation.cpp b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalculation.cpp new file mode 100644 index 000000000..bc9120087 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalculation.cpp @@ -0,0 +1,161 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "vaacalculation.h" +#include "cpu.h" + +WELSVP_NAMESPACE_BEGIN + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +CVAACalculation::CVAACalculation (int32_t iCpuFlag) { + m_iCPUFlag = iCpuFlag; + m_eMethod = METHOD_VAA_STATISTICS; + + WelsMemset (&m_sCalcParam, 0, sizeof (m_sCalcParam)); + WelsMemset (&m_sVaaFuncs, 0, sizeof (m_sVaaFuncs)); + InitVaaFuncs (m_sVaaFuncs, m_iCPUFlag); +} + +CVAACalculation::~CVAACalculation() { +} + +void CVAACalculation::InitVaaFuncs (SVaaFuncs& sVaaFuncs, int32_t iCpuFlag) { + sVaaFuncs.pfVAACalcSad = VAACalcSad_c; + sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_c; + sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_c; + sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_c; + sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_c; +#ifdef X86_ASM + if ((iCpuFlag & WELS_CPU_SSE2) == WELS_CPU_SSE2) { + sVaaFuncs.pfVAACalcSad = VAACalcSad_sse2; + sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_sse2; + sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_sse2; + sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_sse2; + sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_sse2; + } +#ifdef HAVE_AVX2 + if (iCpuFlag & WELS_CPU_AVX2) { + sVaaFuncs.pfVAACalcSad = VAACalcSad_avx2; + sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_avx2; + sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_avx2; + sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_avx2; + sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_avx2; + } +#endif +#endif//X86_ASM +#ifdef HAVE_NEON + if ((iCpuFlag & WELS_CPU_NEON) == WELS_CPU_NEON) { + sVaaFuncs.pfVAACalcSad = VAACalcSad_neon; + sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_neon; + sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_neon; + sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_neon; + sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_neon; + } +#endif//HAVE_NEON + +#ifdef HAVE_NEON_AARCH64 + if ((iCpuFlag & WELS_CPU_NEON) == WELS_CPU_NEON) { + sVaaFuncs.pfVAACalcSad = VAACalcSad_AArch64_neon; + sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_AArch64_neon; + sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_AArch64_neon; + sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_AArch64_neon; + sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_AArch64_neon; + } +#endif//HAVE_NEON_AARCH64 + +#ifdef HAVE_MMI + if ((iCpuFlag & WELS_CPU_MMI) == WELS_CPU_MMI) { + sVaaFuncs.pfVAACalcSad = VAACalcSad_mmi; + sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_mmi; + sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_mmi; + sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_mmi; + sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_mmi; + } +#endif//HAVE_MMI +} + +EResult CVAACalculation::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) { + uint8_t* pCurData = (uint8_t*)pSrcPixMap->pPixel[0]; + uint8_t* pRefData = (uint8_t*)pRefPixMap->pPixel[0]; + int32_t iPicWidth = pSrcPixMap->sRect.iRectWidth; + int32_t iPicHeight = pSrcPixMap->sRect.iRectHeight; + int32_t iPicStride = pSrcPixMap->iStride[0]; + + SVAACalcResult* pResult = m_sCalcParam.pCalcResult; + + if (pCurData == NULL || pRefData == NULL) { + return RET_INVALIDPARAM; + } + + pResult->pCurY = pCurData; + pResult->pRefY = pRefData; + if (m_sCalcParam.iCalcBgd) { + if (m_sCalcParam.iCalcSsd) { + m_sVaaFuncs.pfVAACalcSadSsdBgd (pCurData, pRefData, iPicWidth, iPicHeight, iPicStride, &pResult->iFrameSad, + (int32_t*)pResult->pSad8x8, pResult->pSum16x16, pResult->pSumOfSquare16x16, pResult->pSsd16x16, + (int32_t*)pResult->pSumOfDiff8x8, (uint8_t*)pResult->pMad8x8); + } else { + m_sVaaFuncs.pfVAACalcSadBgd (pCurData, pRefData, iPicWidth, iPicHeight, iPicStride, &pResult->iFrameSad, + (int32_t*) (pResult->pSad8x8), (int32_t*) (pResult->pSumOfDiff8x8), (uint8_t*)pResult->pMad8x8); + } + } else { + if (m_sCalcParam.iCalcSsd) { + m_sVaaFuncs.pfVAACalcSadSsd (pCurData, pRefData, iPicWidth, iPicHeight, iPicStride, &pResult->iFrameSad, + (int32_t*)pResult->pSad8x8, pResult->pSum16x16, pResult->pSumOfSquare16x16, pResult->pSsd16x16); + } else { + if (m_sCalcParam.iCalcVar) { + m_sVaaFuncs.pfVAACalcSadVar (pCurData, pRefData, iPicWidth, iPicHeight, iPicStride, &pResult->iFrameSad, + (int32_t*)pResult->pSad8x8, pResult->pSum16x16, pResult->pSumOfSquare16x16); + } else { + m_sVaaFuncs.pfVAACalcSad (pCurData, pRefData, iPicWidth, iPicHeight, iPicStride, &pResult->iFrameSad, + (int32_t*)pResult->pSad8x8); + } + } + } + + return RET_SUCCESS; +} + +EResult CVAACalculation::Set (int32_t iType, void* pParam) { + if (pParam == NULL || ((SVAACalcParam*)pParam)->pCalcResult == NULL) { + return RET_INVALIDPARAM; + } + + m_sCalcParam = * (SVAACalcParam*)pParam; + + return RET_SUCCESS; +} + + +WELSVP_NAMESPACE_END diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalculation.h b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalculation.h new file mode 100644 index 000000000..876204d20 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/vaacalc/vaacalculation.h @@ -0,0 +1,164 @@ +/*! + * \copy + * Copyright (c) 2011-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * \file : vaacalculation.h + * + * \brief : pVaa calculation class of wels video processor class + * + * \date : 2011/03/18 + * + * \description : 1. rewrite the package code of pVaa calculation class + * + ************************************************************************************* + */ + +#ifndef WELSVP_VAACALCULATION_H +#define WELSVP_VAACALCULATION_H + +#include "util.h" +#include "memory.h" +#include "WelsFrameWork.h" +#include "IWelsVP.h" + +WELSVP_NAMESPACE_BEGIN + +typedef void (VAACalcSadBgdFunc) (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, + int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8); + +typedef void (VAACalcSadSsdBgdFunc) (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, + int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* pSumSquare16x16, + int32_t* pSsd16x16, int32_t* pSd8x8, uint8_t* pMad8x8); + +typedef void (VAACalcSadFunc) (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8); + +typedef void (VAACalcSadVarFunc) (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, + int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* pSumSquare16x16); + +typedef void (VAACalcSadSsdFunc) (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, + int32_t iPicHeight, + int32_t iPicStride, + int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* pSumSquare16x16, int32_t* pSsd16x16); + + +typedef VAACalcSadBgdFunc* PVAACalcSadBgdFunc; +typedef VAACalcSadSsdBgdFunc* PVAACalcSadSsdBgdFunc; +typedef VAACalcSadFunc* PVAACalcSadFunc; +typedef VAACalcSadVarFunc* PVAACalcSadVarFunc; +typedef VAACalcSadSsdFunc* PVAACalcSadSsdFunc; + +typedef struct TagVaaFuncs { + PVAACalcSadBgdFunc pfVAACalcSadBgd; + PVAACalcSadSsdBgdFunc pfVAACalcSadSsdBgd; + PVAACalcSadFunc pfVAACalcSad; + PVAACalcSadVarFunc pfVAACalcSadVar; + PVAACalcSadSsdFunc pfVAACalcSadSsd; +} SVaaFuncs; + + +VAACalcSadBgdFunc VAACalcSadBgd_c; +VAACalcSadSsdBgdFunc VAACalcSadSsdBgd_c; +VAACalcSadFunc VAACalcSad_c; +VAACalcSadVarFunc VAACalcSadVar_c; +VAACalcSadSsdFunc VAACalcSadSsd_c; + + +#ifdef X86_ASM +WELSVP_EXTERN_C_BEGIN +VAACalcSadBgdFunc VAACalcSadBgd_sse2; +VAACalcSadSsdBgdFunc VAACalcSadSsdBgd_sse2; +VAACalcSadFunc VAACalcSad_sse2; +VAACalcSadVarFunc VAACalcSadVar_sse2; +VAACalcSadSsdFunc VAACalcSadSsd_sse2; +VAACalcSadBgdFunc VAACalcSadBgd_avx2; +VAACalcSadSsdBgdFunc VAACalcSadSsdBgd_avx2; +VAACalcSadFunc VAACalcSad_avx2; +VAACalcSadVarFunc VAACalcSadVar_avx2; +VAACalcSadSsdFunc VAACalcSadSsd_avx2; +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_NEON +WELSVP_EXTERN_C_BEGIN +VAACalcSadBgdFunc VAACalcSadBgd_neon; +VAACalcSadSsdBgdFunc VAACalcSadSsdBgd_neon; +VAACalcSadFunc VAACalcSad_neon; +VAACalcSadVarFunc VAACalcSadVar_neon; +VAACalcSadSsdFunc VAACalcSadSsd_neon; +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_NEON_AARCH64 +WELSVP_EXTERN_C_BEGIN +VAACalcSadBgdFunc VAACalcSadBgd_AArch64_neon; +VAACalcSadSsdBgdFunc VAACalcSadSsdBgd_AArch64_neon; +VAACalcSadFunc VAACalcSad_AArch64_neon; +VAACalcSadVarFunc VAACalcSadVar_AArch64_neon; +VAACalcSadSsdFunc VAACalcSadSsd_AArch64_neon; +WELSVP_EXTERN_C_END +#endif + +#ifdef HAVE_MMI +WELSVP_EXTERN_C_BEGIN +VAACalcSadBgdFunc VAACalcSadBgd_mmi; +VAACalcSadSsdBgdFunc VAACalcSadSsdBgd_mmi; +VAACalcSadFunc VAACalcSad_mmi; +VAACalcSadVarFunc VAACalcSadVar_mmi; +VAACalcSadSsdFunc VAACalcSadSsd_mmi; +WELSVP_EXTERN_C_END +#endif + +class CVAACalculation : public IStrategy { + public: + CVAACalculation (int32_t iCpuFlag); + ~CVAACalculation(); + + EResult Process (int32_t iType, SPixMap* pCurPixMap, SPixMap* pRefPixMap); + EResult Set (int32_t iType, void* pParam); + + private: + void InitVaaFuncs (SVaaFuncs& sVaaFunc, int32_t iCpuFlag); + + private: + SVaaFuncs m_sVaaFuncs; + int32_t m_iCPUFlag; + SVAACalcParam m_sCalcParam; +}; + +WELSVP_NAMESPACE_END + +#endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/denoisefilter.asm b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/denoisefilter.asm new file mode 100644 index 000000000..7d75e72c3 --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/denoisefilter.asm @@ -0,0 +1,284 @@ +;*! +;* \copy +;* Copyright (c) 2010-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* predenoise.asm +;* +;* Abstract +;* denoise for SVC2.1 +;* History +;* 4/13/2010 Created +;* 7/30/2010 Modified +;* +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;*********************************************************************** +; Constant +;*********************************************************************** +%ifdef X86_32_PICASM +SECTION .text align=16 +%else +SECTION .rodata align=16 +%endif + +sse2_32 times 8 dw 32 +sse2_20 times 8 dw 20 + + + +;*********************************************************************** +; Code +;*********************************************************************** +SECTION .text + +%macro WEIGHT_LINE 9 + movq %2, %9 + punpcklbw %2, %7 + movdqa %8, %2 + + movdqa %1, %6 + psubusb %1, %8 + psubusb %8, %6 + por %8, %1 ; ABS(curPixel - centerPixel); + + movdqa %1, %3 + psubusb %1, %8 + + pmullw %1, %1 + psrlw %1, 5 + pmullw %2, %1 + paddusw %4, %1 + paddusw %5, %2 +%endmacro + +%macro WEIGHT_LINE1_UV 4 + movdqa %2, %1 + punpcklbw %2, %4 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 1 + punpcklbw %2, %4 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 2 + punpcklbw %2, %4 + psllw %2, 1 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 3 + punpcklbw %2, %4 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 4 + punpcklbw %2, %4 + paddw %3, %2 +%endmacro + +%macro WEIGHT_LINE2_UV 4 + movdqa %2, %1 + punpcklbw %2, %4 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 1 + punpcklbw %2, %4 + psllw %2, 1 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 2 + punpcklbw %2, %4 + psllw %2, 2 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 3 + punpcklbw %2, %4 + psllw %2, 1 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 4 + punpcklbw %2, %4 + paddw %3, %2 +%endmacro + +%macro WEIGHT_LINE3_UV 4 + movdqa %2, %1 + punpcklbw %2, %4 + psllw %2, 1 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 1 + punpcklbw %2, %4 + psllw %2, 2 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 2 + punpcklbw %2, %4 + pmullw %2, [pic(sse2_20)] + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 3 + punpcklbw %2, %4 + psllw %2, 2 + paddw %3, %2 + + movdqa %2, %1 + psrldq %2, 4 + punpcklbw %2, %4 + psllw %2, 1 + paddw %3, %2 +%endmacro + +;*********************************************************************** +; BilateralLumaFilter8_sse2(uint8_t *pixels, int stride); +;*********************************************************************** +; 1 2 3 +; 4 0 5 +; 6 7 8 +; 0: the center point + +WELS_EXTERN BilateralLumaFilter8_sse2 + + push r3 + %assign push_num 1 + LOAD_2_PARA + PUSH_XMM 8 + + pxor xmm7, xmm7 + + mov r3, r0 + + movq xmm6, [r0] + punpcklbw xmm6, xmm7 +%ifdef X86_32_PICASM + pcmpeqw xmm3, xmm3 + psrlw xmm3, 15 + psllw xmm3, 5 +%else + movdqa xmm3, [sse2_32] +%endif + pxor xmm4, xmm4 ; nTotWeight + pxor xmm5, xmm5 ; nSum + + dec r0 + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0] ; pixel 4 + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 2] ; pixel 5 + + sub r0, r1 + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0] ; pixel 1 + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 1] ; pixel 2 + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 2] ; pixel 3 + + lea r0, [r0 + r1 * 2] + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0] ; pixel 6 + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 1] ; pixel 7 + WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 2] ; pixel 8 + + pcmpeqw xmm0, xmm0 + psrlw xmm0, 15 + psllw xmm0, 8 + psubusw xmm0, xmm4 + pmullw xmm0, xmm6 + paddusw xmm5, xmm0 + psrlw xmm5, 8 + packuswb xmm5, xmm5 + movq [r3], xmm5 + + + POP_XMM + pop r3 + %assign push_num 0 + + ret + +;*********************************************************************** +; void WaverageChromaFilter8_sse2(uint8_t *pixels, int stride); +;*********************************************************************** +;5x5 filter: +;1 1 2 1 1 +;1 2 4 2 1 +;2 4 20 4 2 +;1 2 4 2 1 +;1 1 2 1 1 + +WELS_EXTERN WaverageChromaFilter8_sse2 + + push r3 + + %assign push_num 1 + + INIT_X86_32_PIC r4 + LOAD_2_PARA + + mov r3, r1 + add r3, r3 + sub r0, r3 ; pixels - 2 * stride + sub r0, 2 + + pxor xmm0, xmm0 + pxor xmm3, xmm3 + + movdqu xmm1, [r0] + WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0 + + movdqu xmm1, [r0 + r1] + WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0 + + add r0, r3 + movdqu xmm1, [r0] + WEIGHT_LINE3_UV xmm1, xmm2, xmm3, xmm0 + + movdqu xmm1, [r0 + r1] + WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0 + + movdqu xmm1, [r0 + r1 * 2] + WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0 + + psrlw xmm3, 6 + packuswb xmm3, xmm3 + movq [r0 + 2], xmm3 + + + DEINIT_X86_32_PIC + pop r3 + + %assign push_num 0 + ret diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/downsample_bilinear.asm b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/downsample_bilinear.asm new file mode 100644 index 000000000..ece7883ec --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/downsample_bilinear.asm @@ -0,0 +1,4686 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* upsampling.asm +;* +;* Abstract +;* SIMD for pixel domain down sampling +;* +;* History +;* 10/22/2009 Created +;* +;*************************************************************************/ +%include "asm_inc.asm" + +%ifdef __NASM_VER__ + %use smartalign +%endif + +;*********************************************************************** +; Macros and other preprocessor constants +;*********************************************************************** + + +;*********************************************************************** +; Some constants +;*********************************************************************** + +;*********************************************************************** +; Local Data (Read Only) +;*********************************************************************** + +%ifdef X86_32_PICASM +SECTION .text align=32 +%else +SECTION .rodata align=32 +%endif + +;*********************************************************************** +; Various memory constants (trigonometric values or rounding values) +;*********************************************************************** + +ALIGN 32 +%ifndef X86_32_PICASM +db80h_256: + times 32 db 80h +shufb_0000000088888888: + times 8 db 0 + times 8 db 8 +shufb_000044448888CCCC: + times 4 db 0 + times 4 db 4 + times 4 db 8 + times 4 db 12 +%endif +shufb_mask_low: + db 00h, 80h, 02h, 80h, 04h, 80h, 06h, 80h, 08h, 80h, 0ah, 80h, 0ch, 80h, 0eh, 80h +shufb_mask_high: + db 01h, 80h, 03h, 80h, 05h, 80h, 07h, 80h, 09h, 80h, 0bh, 80h, 0dh, 80h, 0fh, 80h +add_extra_half: + dd 16384,0,0,0 + +shufb_mask_quarter: +db 00h, 04h, 08h, 0ch, 80h, 80h, 80h, 80h, 01h, 05h, 09h, 0dh, 80h, 80h, 80h, 80h + +shufb_mask_onethird_low_1: +db 00h, 03h, 06h, 09h, 0ch, 0fh, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h +shufb_mask_onethird_low_2: +db 80h, 80h, 80h, 80h, 80h, 80h, 02h, 05h, 08h, 0bh, 0eh, 80h, 80h, 80h, 80h, 80h +shufb_mask_onethird_low_3: +db 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 01h, 04h, 07h, 0ah, 0dh + +shufb_mask_onethird_high_1: +db 01h, 04h, 07h, 0ah, 0dh, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h +shufb_mask_onethird_high_2: +db 80h, 80h, 80h, 80h, 80h, 00h, 03h, 06h, 09h, 0ch, 0fh, 80h, 80h, 80h, 80h, 80h +shufb_mask_onethird_high_3: +db 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 80h, 02h, 05h, 08h, 0bh, 0eh + +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + +;*********************************************************************** +; void DyadicBilinearDownsamplerWidthx32_sse( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearDownsamplerWidthx32_sse +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $01 ; iSrcHeight >> 1 + +.yloops1: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + sar r4, $01 ; iSrcWidth >> 1 + mov r6, r4 ; iDstWidth restored at ebx + sar r4, $04 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb + neg r6 ; - (iSrcWidth >> 1) + ; each loop = source bandwidth: 32 bytes +.xloops1: + ; 1st part horizonal loop: x16 bytes + ; mem hi<- ->lo + ;1st Line Src: mm0: d D c C b B a A mm1: h H g G f F e E + ;2nd Line Src: mm2: l L k K j J i I mm3: p P o O n N m M + ;=> target: + ;: H G F E D C B A, P O N M L K J I + ;: h g f e d c b a, p o n m l k j i + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + movq mm0, [r2] ; 1st pSrc line + movq mm1, [r2+8] ; 1st pSrc line + 8 + movq mm2, [r2+r3] ; 2nd pSrc line + movq mm3, [r2+r3+8] ; 2nd pSrc line + 8 + + ; to handle mm0, mm1, mm2, mm3 + pshufw mm4, mm0, 0d8h ; d D b B c C a A ; 11011000 B + pshufw mm5, mm4, 04eh ; c C a A d D b B ; 01001110 B + punpcklbw mm4, mm5 ; d c D C b a B A + pshufw mm4, mm4, 0d8h ; d c b a D C B A ; 11011000 B: mm4 + + pshufw mm5, mm1, 0d8h ; h H f F g G e E ; 11011000 B + pshufw mm6, mm5, 04eh ; g G e E h H f F ; 01001110 B + punpcklbw mm5, mm6 ; h g H G f e F E + pshufw mm5, mm5, 0d8h ; h g f e H G F E ; 11011000 B: mm5 + + pshufw mm6, mm2, 0d8h ; l L j J k K i I ; 11011000 B + pshufw mm7, mm6, 04eh ; k K i I l L j J ; 01001110 B + punpcklbw mm6, mm7 ; l k L K j i J I + pshufw mm6, mm6, 0d8h ; l k j i L K J I ; 11011000 B: mm6 + + pshufw mm7, mm3, 0d8h ; p P n N o O m M ; 11011000 B + pshufw mm0, mm7, 04eh ; o O m M p P n N ; 01001110 B + punpcklbw mm7, mm0 ; p o P O n m N M + pshufw mm7, mm7, 0d8h ; p o n m P O N M ; 11011000 B: mm7 + + ; to handle mm4, mm5, mm6, mm7 + movq mm0, mm4 ; + punpckldq mm0, mm5 ; H G F E D C B A + punpckhdq mm4, mm5 ; h g f e d c b a + + movq mm1, mm6 + punpckldq mm1, mm7 ; P O N M L K J I + punpckhdq mm6, mm7 ; p o n m l k j i + + ; avg within MB horizon width (16 x 2 lines) + pavgb mm0, mm4 ; (A+a+1)>>1, .., (H+h+1)>>1, temp_row1 + pavgb mm1, mm6 ; (I+i+1)>>1, .., (P+p+1)>>1, temp_row2 + pavgb mm0, mm1 ; (temp_row1+temp_row2+1)>>1, pending here and wait another horizonal part done then write memory once + + ; 2nd part horizonal loop: x16 bytes + ; mem hi<- ->lo + ;1st Line Src: mm0: d D c C b B a A mm1: h H g G f F e E + ;2nd Line Src: mm2: l L k K j J i I mm3: p P o O n N m M + ;=> target: + ;: H G F E D C B A, P O N M L K J I + ;: h g f e d c b a, p o n m l k j i + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + movq mm1, [r2+16] ; 1st pSrc line + 16 + movq mm2, [r2+24] ; 1st pSrc line + 24 + movq mm3, [r2+r3+16] ; 2nd pSrc line + 16 + movq mm4, [r2+r3+24] ; 2nd pSrc line + 24 + + ; to handle mm1, mm2, mm3, mm4 + pshufw mm5, mm1, 0d8h ; d D b B c C a A ; 11011000 B + pshufw mm6, mm5, 04eh ; c C a A d D b B ; 01001110 B + punpcklbw mm5, mm6 ; d c D C b a B A + pshufw mm5, mm5, 0d8h ; d c b a D C B A ; 11011000 B: mm5 + + pshufw mm6, mm2, 0d8h ; h H f F g G e E ; 11011000 B + pshufw mm7, mm6, 04eh ; g G e E h H f F ; 01001110 B + punpcklbw mm6, mm7 ; h g H G f e F E + pshufw mm6, mm6, 0d8h ; h g f e H G F E ; 11011000 B: mm6 + + pshufw mm7, mm3, 0d8h ; l L j J k K i I ; 11011000 B + pshufw mm1, mm7, 04eh ; k K i I l L j J ; 01001110 B + punpcklbw mm7, mm1 ; l k L K j i J I + pshufw mm7, mm7, 0d8h ; l k j i L K J I ; 11011000 B: mm7 + + pshufw mm1, mm4, 0d8h ; p P n N o O m M ; 11011000 B + pshufw mm2, mm1, 04eh ; o O m M p P n N ; 01001110 B + punpcklbw mm1, mm2 ; p o P O n m N M + pshufw mm1, mm1, 0d8h ; p o n m P O N M ; 11011000 B: mm1 + + ; to handle mm5, mm6, mm7, mm1 + movq mm2, mm5 + punpckldq mm2, mm6 ; H G F E D C B A + punpckhdq mm5, mm6 ; h g f e d c b a + + movq mm3, mm7 + punpckldq mm3, mm1 ; P O N M L K J I + punpckhdq mm7, mm1 ; p o n m l k j i + + ; avg within MB horizon width (16 x 2 lines) + pavgb mm2, mm5 ; (A+a+1)>>1, .., (H+h+1)>>1, temp_row1 + pavgb mm3, mm7 ; (I+i+1)>>1, .., (P+p+1)>>1, temp_row2 + pavgb mm2, mm3 ; (temp_row1+temp_row2+1)>>1, done in another 2nd horizonal part + + movq [r0 ], mm0 + movq [r0+8], mm2 + + ; next SMB + lea r2, [r2+32] + lea r0, [r0+16] + + dec r4 + jg near .xloops1 + + ; next line + lea r2, [r2+2*r3] ; next end of lines + lea r2, [r2+2*r6] ; reset to base 0 [- 2 * iDstWidth] + lea r0, [r0+r1] + lea r0, [r0+r6] ; reset to base 0 [- iDstWidth] + + dec r5 + jg near .yloops1 + + WELSEMMS +%ifndef X86_32 + pop r12 +%endif + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +;*********************************************************************** +; void DyadicBilinearDownsamplerWidthx16_sse( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearDownsamplerWidthx16_sse +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $01 ; iSrcHeight >> 1 + +.yloops2: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + sar r4, $01 ; iSrcWidth >> 1 + mov r6, r4 ; iDstWidth restored at ebx + sar r4, $03 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb + neg r6 ; - (iSrcWidth >> 1) + ; each loop = source bandwidth: 16 bytes +.xloops2: + ; 1st part horizonal loop: x16 bytes + ; mem hi<- ->lo + ;1st Line Src: mm0: d D c C b B a A mm1: h H g G f F e E + ;2nd Line Src: mm2: l L k K j J i I mm3: p P o O n N m M + ;=> target: + ;: H G F E D C B A, P O N M L K J I + ;: h g f e d c b a, p o n m l k j i + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + movq mm0, [r2] ; 1st pSrc line + movq mm1, [r2+8] ; 1st pSrc line + 8 + movq mm2, [r2+r3] ; 2nd pSrc line + movq mm3, [r2+r3+8] ; 2nd pSrc line + 8 + + ; to handle mm0, mm1, mm2, mm3 + pshufw mm4, mm0, 0d8h ; d D b B c C a A ; 11011000 B + pshufw mm5, mm4, 04eh ; c C a A d D b B ; 01001110 B + punpcklbw mm4, mm5 ; d c D C b a B A + pshufw mm4, mm4, 0d8h ; d c b a D C B A ; 11011000 B: mm4 + + pshufw mm5, mm1, 0d8h ; h H f F g G e E ; 11011000 B + pshufw mm6, mm5, 04eh ; g G e E h H f F ; 01001110 B + punpcklbw mm5, mm6 ; h g H G f e F E + pshufw mm5, mm5, 0d8h ; h g f e H G F E ; 11011000 B: mm5 + + pshufw mm6, mm2, 0d8h ; l L j J k K i I ; 11011000 B + pshufw mm7, mm6, 04eh ; k K i I l L j J ; 01001110 B + punpcklbw mm6, mm7 ; l k L K j i J I + pshufw mm6, mm6, 0d8h ; l k j i L K J I ; 11011000 B: mm6 + + pshufw mm7, mm3, 0d8h ; p P n N o O m M ; 11011000 B + pshufw mm0, mm7, 04eh ; o O m M p P n N ; 01001110 B + punpcklbw mm7, mm0 ; p o P O n m N M + pshufw mm7, mm7, 0d8h ; p o n m P O N M ; 11011000 B: mm7 + + ; to handle mm4, mm5, mm6, mm7 + movq mm0, mm4 ; + punpckldq mm0, mm5 ; H G F E D C B A + punpckhdq mm4, mm5 ; h g f e d c b a + + movq mm1, mm6 + punpckldq mm1, mm7 ; P O N M L K J I + punpckhdq mm6, mm7 ; p o n m l k j i + + ; avg within MB horizon width (16 x 2 lines) + pavgb mm0, mm4 ; (A+a+1)>>1, .., (H+h+1)>>1, temp_row1 + pavgb mm1, mm6 ; (I+i+1)>>1, .., (P+p+1)>>1, temp_row2 + pavgb mm0, mm1 ; (temp_row1+temp_row2+1)>>1, pending here and wait another horizonal part done then write memory once + + movq [r0 ], mm0 + + ; next SMB + lea r2, [r2+16] + lea r0, [r0+8] + + dec r4 + jg near .xloops2 + + ; next line + lea r2, [r2+2*r3] ; next end of lines + lea r2, [r2+2*r6] ; reset to base 0 [- 2 * iDstWidth] + lea r0, [r0+r1] + lea r0, [r0+r6] ; reset to base 0 [- iDstWidth] + + dec r5 + jg near .yloops2 + + WELSEMMS +%ifndef X86_32 + pop r12 +%endif + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +;*********************************************************************** +; void DyadicBilinearDownsamplerWidthx8_sse( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearDownsamplerWidthx8_sse +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $01 ; iSrcHeight >> 1 + +.yloops3: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + sar r4, $01 ; iSrcWidth >> 1 + mov r6, r4 ; iDstWidth restored at ebx + sar r4, $02 ; (iSrcWidth >> 1) / 4 ; loop count = num_of_mb + neg r6 ; - (iSrcWidth >> 1) + ; each loop = source bandwidth: 8 bytes +.xloops3: + ; 1st part horizonal loop: x8 bytes + ; mem hi<- ->lo + ;1st Line Src: mm0: d D c C b B a A + ;2nd Line Src: mm1: h H g G f F e E + ;=> target: + ;: H G F E D C B A + ;: h g f e d c b a + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + movq mm0, [r2] ; 1st pSrc line + movq mm1, [r2+r3] ; 2nd pSrc line + + ; to handle mm0, mm1, mm2, mm3 + pshufw mm2, mm0, 0d8h ; d D b B c C a A ; 11011000 B + pshufw mm3, mm2, 04eh ; c C a A d D b B ; 01001110 B + punpcklbw mm2, mm3 ; d c D C b a B A + pshufw mm2, mm2, 0d8h ; d c b a D C B A ; 11011000 B: mm4 + + pshufw mm4, mm1, 0d8h ; h H f F g G e E ; 11011000 B + pshufw mm5, mm4, 04eh ; g G e E h H f F ; 01001110 B + punpcklbw mm4, mm5 ; h g H G f e F E + pshufw mm4, mm4, 0d8h ; h g f e H G F E ; 11011000 B: mm5 + + ; to handle mm2, mm4 + movq mm0, mm2 ; + punpckldq mm0, mm4 ; H G F E D C B A + punpckhdq mm2, mm4 ; h g f e d c b a + + ; avg within MB horizon width (16 x 2 lines) + pavgb mm0, mm2 ; (H+h+1)>>1, .., (A+a+1)>>1, temp_row1, 2 + pshufw mm1, mm0, 04eh ; 01001110 B + pavgb mm0, mm1 ; (temp_row1+temp_row2+1)>>1, pending here and wait another horizonal part done then write memory once + + movd [r0], mm0 + + ; next unit + lea r2, [r2+8] + lea r0, [r0+4] + + dec r4 + jg near .xloops3 + + ; next line + lea r2, [r2+2*r3] ; next end of lines + lea r2, [r2+2*r6] ; reset to base 0 [- 2 * iDstWidth] + lea r0, [r0+r1] + lea r0, [r0+r6] ; reset to base 0 [- iDstWidth] + + dec r5 + jg near .yloops3 + + WELSEMMS +%ifndef X86_32 + pop r12 +%endif + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + + + +;*********************************************************************** +; void DyadicBilinearDownsamplerWidthx32_ssse3( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearDownsamplerWidthx32_ssse3 +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + PUSH_XMM 4 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $01 ; iSrcHeight >> 1 + + WELS_DB1 xmm3 + WELS_Zero xmm2 + sar r4, $01 ; iSrcWidth >> 1 + add r0, r4 ; pDst += iSrcWidth >> 1 + +.yloops4: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + sar r4, $01 ; iSrcWidth >> 1 + neg r4 ; -(iSrcWidth >> 1) + mov r6, r4 + align 16 + ; each loop = source bandwidth: 32 bytes +.xloops4: + movdqa xmm0, [r2+r3] + movdqa xmm1, [r2+r3+16] + pavgb xmm0, [r2] ; avg vertical pixels 0-15 + pavgb xmm1, [r2+16] ; avg vertical pixels 16-31 + add r2, 32 ; pSrc += 32 + pmaddubsw xmm0, xmm3 ; pairwise horizontal sum neighboring pixels 0-15 + pmaddubsw xmm1, xmm3 ; pairwise horizontal sum neighboring pixels 16-31 + pavgw xmm0, xmm2 ; (sum + 1) >> 1 + pavgw xmm1, xmm2 ; (sum + 1) >> 1 + packuswb xmm0, xmm1 ; pack words to bytes + movdqa [r0+r4], xmm0 ; store results + add r4, 16 + jl .xloops4 + + ; next line + lea r2, [r2+2*r3] ; next end of lines + lea r2, [r2+2*r6] ; reset to base 0 [- 2 * iDstWidth] + lea r0, [r0+r1] + + sub r5, 1 + jg .yloops4 + +%ifndef X86_32 + pop r12 +%endif + + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +;*********************************************************************** +; void DyadicBilinearDownsamplerWidthx16_ssse3( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearDownsamplerWidthx16_ssse3 +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + PUSH_XMM 4 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $01 ; iSrcHeight >> 1 + WELS_DB1 xmm3 + WELS_Zero xmm2 + add r2, r4 ; pSrc += iSrcWidth + sar r4, $01 ; iSrcWidth >> 1 + add r0, r4 ; pDst += iSrcWidth >> 1 + +.yloops5: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + sar r4, $01 ; iSrcWidth >> 1 + neg r4 ; -(iSrcWidth >> 1) + lea r6, [r2+r3] ; pSrc + iSrcStride + align 16 + ; each loop = source bandwidth: 16 bytes +.xloops5: + movdqa xmm0, [r2+2*r4] + pavgb xmm0, [r6+2*r4] ; avg vertical pixels + pmaddubsw xmm0, xmm3 ; pairwise horizontal sum neighboring pixels + pavgw xmm0, xmm2 ; (sum + 1) >> 1 + packuswb xmm0, xmm0 ; pack words to bytes + movlps [r0+r4], xmm0 ; store results + add r4, 8 + jl .xloops5 + + ; next line + lea r2, [r2+2*r3] ; next end of lines + lea r0, [r0+r1] + + sub r5, 1 + jg .yloops5 + +%ifndef X86_32 + pop r12 +%endif + + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + + +%ifdef X86_32 +;************************************************************************************************************** +;int GeneralBilinearAccurateDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight, +; unsigned char* pSrc, const int iSrcStride, +; unsigned int uiScaleX, unsigned int uiScaleY ); +;{ +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearAccurateDownsampler_sse2 + push ebp + push esi + push edi + push ebx +%define pushsize 16 +%define localsize 16 +%define pDstData esp + pushsize + localsize + 4 +%define dwDstStride esp + pushsize + localsize + 8 +%define dwDstWidth esp + pushsize + localsize + 12 +%define dwDstHeight esp + pushsize + localsize + 16 +%define pSrcData esp + pushsize + localsize + 20 +%define dwSrcStride esp + pushsize + localsize + 24 +%define uiScaleX esp + pushsize + localsize + 28 +%define uiScaleY esp + pushsize + localsize + 32 +%define tmpHeight esp + 0 +%define yInverse esp + 4 +%define xInverse esp + 8 +%define dstStep esp + 12 + sub esp, localsize + + pxor xmm0, xmm0 + mov eax, [uiScaleX] + and eax, 32767 + mov ebx, eax + neg ebx + and ebx, 32767 + movd xmm1, eax ; uinc(uiScaleX mod 32767) + movd xmm2, ebx ; -uinc + psllq xmm1, 32 + por xmm1, xmm2 ; 0 0 uinc -uinc (dword) + pshufd xmm7, xmm1, 01000100b ; xmm7: uinc -uinc uinc -uinc + + mov eax, [uiScaleY] + and eax, 32767 + mov ebx, eax + neg ebx + and ebx, 32767 + movd xmm6, eax ; vinc(uiScaleY mod 32767) + movd xmm2, ebx ; -vinc + psllq xmm6, 32 + por xmm6, xmm2 ; 0 0 vinc -vinc (dword) + pshufd xmm6, xmm6, 01010000b ; xmm6: vinc vinc -vinc -vinc + + mov edx, 40003fffh + movd xmm5, edx + punpcklwd xmm5, xmm0 ; 16384 16383 + pshufd xmm5, xmm5, 01000100b ; xmm5: 16384 16383 16384 16383 + + +DOWNSAMPLE: + + mov eax, [dwDstHeight] + mov edi, [pDstData] + mov edx, [dwDstStride] + mov ecx, [dwDstWidth] + sub edx, ecx + mov [dstStep], edx ; stride - width + dec eax + mov [tmpHeight], eax + mov eax, 16384 + mov [yInverse], eax + + pshufd xmm4, xmm5, 01010000b ; initial v to 16384 16384 16383 16383 + +HEIGHT: + mov eax, [yInverse] + mov esi, [pSrcData] + shr eax, 15 + mul dword [dwSrcStride] + add esi, eax ; get current row address + mov ebp, esi + add ebp, [dwSrcStride] + + mov eax, 16384 + mov [xInverse], eax + mov ecx, [dwDstWidth] + dec ecx + + movdqa xmm3, xmm5 ; initial u to 16384 16383 16384 16383 + +WIDTH: + mov eax, [xInverse] + shr eax, 15 + + movd xmm1, [esi+eax] ; xxxxxxba + movd xmm2, [ebp+eax] ; xxxxxxdc + pxor xmm0, xmm0 + punpcklwd xmm1, xmm2 ; xxxxdcba + punpcklbw xmm1, xmm0 ; 0d0c0b0a + punpcklwd xmm1, xmm0 ; 000d000c000b000a + + movdqa xmm2, xmm4 ; xmm2: vv(1-v)(1-v) tmpv + pmaddwd xmm2, xmm3 ; mul u(1-u)u(1-u) on xmm2 + movdqa xmm0, xmm2 + pmuludq xmm2, xmm1 + psrlq xmm0, 32 + psrlq xmm1, 32 + pmuludq xmm0, xmm1 + paddq xmm2, xmm0 + pshufd xmm1, xmm2, 00001110b + paddq xmm2, xmm1 + psrlq xmm2, 29 + + movd eax, xmm2 + inc eax + shr eax, 1 + mov [edi], al + inc edi + + mov eax, [uiScaleX] + add [xInverse], eax + + paddw xmm3, xmm7 ; inc u + psllw xmm3, 1 + psrlw xmm3, 1 + + loop WIDTH + +WIDTH_END: + mov eax, [xInverse] + shr eax, 15 + mov cl, [esi+eax] + mov [edi], cl + inc edi + + mov eax, [uiScaleY] + add [yInverse], eax + add edi, [dstStep] + + paddw xmm4, xmm6 ; inc v + psllw xmm4, 1 + psrlw xmm4, 1 + + dec dword [tmpHeight] + jg HEIGHT + + +LAST_ROW: + mov eax, [yInverse] + mov esi, [pSrcData] + shr eax, 15 + mul dword [dwSrcStride] + add esi, eax ; get current row address + + mov eax, 16384 + mov [xInverse], eax + mov ecx, [dwDstWidth] + +LAST_ROW_WIDTH: + mov eax, [xInverse] + shr eax, 15 + + mov al, [esi+eax] + mov [edi], al + inc edi + + mov eax, [uiScaleX] + add [xInverse], eax + + loop LAST_ROW_WIDTH + +LAST_ROW_END: + + add esp, localsize + pop ebx + pop edi + pop esi + pop ebp +%undef pushsize +%undef localsize +%undef pSrcData +%undef dwSrcWidth +%undef dwSrcHeight +%undef dwSrcStride +%undef pDstData +%undef dwDstWidth +%undef dwDstHeight +%undef dwDstStride +%undef uiScaleX +%undef uiScaleY +%undef tmpHeight +%undef yInverse +%undef xInverse +%undef dstStep + ret + + + + +;************************************************************************************************************** +;int GeneralBilinearFastDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight, +; unsigned char* pSrc, const int iSrcStride, +; unsigned int uiScaleX, unsigned int uiScaleY ); +;{ +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearFastDownsampler_sse2 + push ebp + push esi + push edi + push ebx +%define pushsize 16 +%define localsize 16 +%define pDstData esp + pushsize + localsize + 4 +%define dwDstStride esp + pushsize + localsize + 8 +%define dwDstWidth esp + pushsize + localsize + 12 +%define dwDstHeight esp + pushsize + localsize + 16 +%define pSrcData esp + pushsize + localsize + 20 +%define dwSrcStride esp + pushsize + localsize + 24 +%define uiScaleX esp + pushsize + localsize + 28 +%define uiScaleY esp + pushsize + localsize + 32 +%define tmpHeight esp + 0 +%define yInverse esp + 4 +%define xInverse esp + 8 +%define dstStep esp + 12 + sub esp, localsize + + pxor xmm0, xmm0 + mov edx, 65535 + mov eax, [uiScaleX] + and eax, edx + mov ebx, eax + neg ebx + and ebx, 65535 + movd xmm1, eax ; uinc(uiScaleX mod 65536) + movd xmm2, ebx ; -uinc + psllq xmm1, 32 + por xmm1, xmm2 ; 0 uinc 0 -uinc + pshuflw xmm7, xmm1, 10001000b ; xmm7: uinc -uinc uinc -uinc + + mov eax, [uiScaleY] + and eax, 32767 + mov ebx, eax + neg ebx + and ebx, 32767 + movd xmm6, eax ; vinc(uiScaleY mod 32767) + movd xmm2, ebx ; -vinc + psllq xmm6, 32 + por xmm6, xmm2 ; 0 vinc 0 -vinc + pshuflw xmm6, xmm6, 10100000b ; xmm6: vinc vinc -vinc -vinc + + mov edx, 80007fffh ; 32768 32767 + movd xmm5, edx + pshuflw xmm5, xmm5, 01000100b ; 32768 32767 32768 32767 + mov ebx, 16384 + + +FAST_DOWNSAMPLE: + + mov eax, [dwDstHeight] + mov edi, [pDstData] + mov edx, [dwDstStride] + mov ecx, [dwDstWidth] + sub edx, ecx + mov [dstStep], edx ; stride - width + dec eax + mov [tmpHeight], eax + mov eax, 16384 + mov [yInverse], eax + + pshuflw xmm4, xmm5, 01010000b + psrlw xmm4, 1 ; initial v to 16384 16384 16383 16383 + +FAST_HEIGHT: + mov eax, [yInverse] + mov esi, [pSrcData] + shr eax, 15 + mul dword [dwSrcStride] + add esi, eax ; get current row address + mov ebp, esi + add ebp, [dwSrcStride] + + mov eax, 32768 + mov [xInverse], eax + mov ecx, [dwDstWidth] + dec ecx + + movdqa xmm3, xmm5 ; initial u to 32768 32767 32768 32767 + +FAST_WIDTH: + mov eax, [xInverse] + shr eax, 16 + + movd xmm1, [esi+eax] ; xxxxxxba + movd xmm2, [ebp+eax] ; xxxxxxdc + punpcklwd xmm1, xmm2 ; xxxxdcba + punpcklbw xmm1, xmm0 ; 0d0c0b0a + + movdqa xmm2, xmm4 ; xmm2: vv(1-v)(1-v) tmpv + pmulhuw xmm2, xmm3 ; mul u(1-u)u(1-u) on xmm2 + pmaddwd xmm2, xmm1 + pshufd xmm1, xmm2, 00000001b + paddd xmm2, xmm1 + movd xmm1, ebx + paddd xmm2, xmm1 + psrld xmm2, 15 + + packuswb xmm2, xmm0 + movd eax, xmm2 + mov [edi], al + inc edi + + mov eax, [uiScaleX] + add [xInverse], eax + + paddw xmm3, xmm7 ; inc u + + loop FAST_WIDTH + +FAST_WIDTH_END: + mov eax, [xInverse] + shr eax, 16 + mov cl, [esi+eax] + mov [edi], cl + inc edi + + mov eax, [uiScaleY] + add [yInverse], eax + add edi, [dstStep] + + paddw xmm4, xmm6 ; inc v + psllw xmm4, 1 + psrlw xmm4, 1 + + dec dword [tmpHeight] + jg FAST_HEIGHT + + +FAST_LAST_ROW: + mov eax, [yInverse] + mov esi, [pSrcData] + shr eax, 15 + mul dword [dwSrcStride] + add esi, eax ; get current row address + + mov eax, 32768 + mov [xInverse], eax + mov ecx, [dwDstWidth] + +FAST_LAST_ROW_WIDTH: + mov eax, [xInverse] + shr eax, 16 + + mov al, [esi+eax] + mov [edi], al + inc edi + + mov eax, [uiScaleX] + add [xInverse], eax + + loop FAST_LAST_ROW_WIDTH + +FAST_LAST_ROW_END: + + add esp, localsize + pop ebx + pop edi + pop esi + pop ebp +%undef pushsize +%undef localsize +%undef pSrcData +%undef dwSrcWidth +%undef dwSrcHeight +%undef dwSrcStride +%undef pDstData +%undef dwDstStride +%undef uiScaleX +%undef uiScaleY +%undef tmpHeight +%undef yInverse +%undef xInverse +%undef dstStep + ret + +%elifdef WIN64 + +;************************************************************************************************************** +;int GeneralBilinearAccurateDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight, +; unsigned char* pSrc, const int iSrcStride, +; unsigned int uiScaleX, unsigned int uiScaleY ); +;{ +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearAccurateDownsampler_sse2 + push r12 + push r13 + push r14 + push r15 + push rsi + push rdi + push rbx + push rbp + %assign push_num 8 + LOAD_7_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + SIGN_EXTENSION r6, r6d + + pxor xmm0, xmm0 + mov r12d, r6d + and r12d, 32767 + mov r13d, r12d + neg r13d + and r13d, 32767 + movd xmm1, r12d ; uinc(uiScaleX mod 32767) + movd xmm2, r13d ; -uinc + psllq xmm1, 32 + por xmm1, xmm2 ; 0 0 uinc -uinc (dword) + pshufd xmm7, xmm1, 01000100b ; xmm7: uinc -uinc uinc -uinc + + mov r12, arg8 + SIGN_EXTENSION r12, r12d + mov rbp, r12 + and r12d, 32767 + mov r13d, r12d + neg r13d + and r13d, 32767 + movd xmm6, r12d ; vinc(uiScaleY mod 32767) + movd xmm2, r13d ; -vinc + psllq xmm6, 32 + por xmm6, xmm2 ; 0 0 vinc -vinc (dword) + pshufd xmm6, xmm6, 01010000b ; xmm6: vinc vinc -vinc -vinc + + mov r12d, 40003fffh + movd xmm5, r12d + punpcklwd xmm5, xmm0 ; 16384 16383 + pshufd xmm5, xmm5, 01000100b ; xmm5: 16384 16383 16384 16383 + +DOWNSAMPLE: + sub r1, r2 ; stride - width + dec r3 + mov r14,16384 + pshufd xmm4, xmm5, 01010000b ; initial v to 16384 16384 16383 16383 + +HEIGHT: + ;mov r12, r4 + mov r12, r14 + shr r12, 15 + imul r12, r5 + add r12, r4 ; get current row address + mov r13, r12 + add r13, r5 + + mov r15, 16384 + mov rsi, r2 + dec rsi + movdqa xmm3, xmm5 ; initial u to 16384 16383 16384 16383 + +WIDTH: + mov rdi, r15 + shr rdi, 15 + + movd xmm1, [r12+rdi] ; xxxxxxba + movd xmm2, [r13+rdi] ; xxxxxxdc + pxor xmm0, xmm0 + punpcklwd xmm1, xmm2 ; xxxxdcba + punpcklbw xmm1, xmm0 ; 0d0c0b0a + punpcklwd xmm1, xmm0 ; 000d000c000b000a + + movdqa xmm2, xmm4 ; xmm2: vv(1-v)(1-v) tmpv + pmaddwd xmm2, xmm3 ; mul u(1-u)u(1-u) on xmm2 + movdqa xmm0, xmm2 + pmuludq xmm2, xmm1 + psrlq xmm0, 32 + psrlq xmm1, 32 + pmuludq xmm0, xmm1 + paddq xmm2, xmm0 + pshufd xmm1, xmm2, 00001110b + paddq xmm2, xmm1 + psrlq xmm2, 29 + + movd ebx, xmm2 + inc ebx + shr ebx, 1 + mov [r0], bl + inc r0 + + add r15, r6 + paddw xmm3, xmm7 ; inc u + psllw xmm3, 1 + psrlw xmm3, 1 + + dec rsi + jg WIDTH + +WIDTH_END: + shr r15, 15 + mov bl, [r12+r15] + mov [r0],bl + inc r0 + add r14, rbp + add r0, r1 + + paddw xmm4, xmm6 ; inc v + psllw xmm4, 1 + psrlw xmm4, 1 + + dec r3 + jg HEIGHT + +LAST_ROW: + shr r14, 15 + imul r14, r5 + add r4, r14 + mov r15, 16384 + +LAST_ROW_WIDTH: + mov rdi, r15 + shr rdi, 15 + mov bl, [r4+rdi] + mov [r0],bl + inc r0 + + add r15, r6 + dec r2 + jg LAST_ROW_WIDTH + +LAST_ROW_END: + + POP_XMM + pop rbp + pop rbx + pop rdi + pop rsi + pop r15 + pop r14 + pop r13 + pop r12 + ret + +;************************************************************************************************************** +;int GeneralBilinearFastDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight, +; unsigned char* pSrc, const int iSrcStride, +; unsigned int uiScaleX, unsigned int uiScaleY ); +;{ +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearFastDownsampler_sse2 + push r12 + push r13 + push r14 + push r15 + push rsi + push rdi + push rbx + push rbp + %assign push_num 8 + LOAD_7_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + SIGN_EXTENSION r6, r6d + + pxor xmm0, xmm0 + mov r12d, r6d + and r12d, 65535 + mov r13d, r12d + neg r13d + and r13d, 65535 + movd xmm1, r12d ; uinc(uiScaleX mod 65536) + movd xmm2, r13d ; -uinc + psllq xmm1, 32 + por xmm1, xmm2 ; 0 uinc 0 -uinc + pshuflw xmm7, xmm1, 10001000b ; xmm7: uinc -uinc uinc -uinc + + mov r12, arg8 + SIGN_EXTENSION r12, r12d + mov rbp, r12 + and r12d, 32767 + mov r13d, r12d + neg r13d + and r13d, 32767 + movd xmm6, r12d ; vinc(uiScaleY mod 32767) + movd xmm2, r13d ; -vinc + psllq xmm6, 32 + por xmm6, xmm2 ; 0 vinc 0 -vinc + pshuflw xmm6, xmm6, 10100000b ; xmm6: vinc vinc -vinc -vinc + + mov r12d, 80007fffh ; 32768 32767 + movd xmm5, r12d + pshuflw xmm5, xmm5, 01000100b ; 32768 32767 32768 32767 + +FAST_DOWNSAMPLE: + sub r1, r2 ; stride - width + dec r3 + mov r14,16384 + + pshuflw xmm4, xmm5, 01010000b + psrlw xmm4, 1 ; initial v to 16384 16384 16383 16383 + +FAST_HEIGHT: + mov r12, r14 + shr r12, 15 + imul r12, r5 + add r12, r4 ; get current row address + mov r13, r12 + add r13, r5 + + mov r15, 32768 + mov rsi, r2 + dec rsi + + movdqa xmm3, xmm5 ; initial u to 32768 32767 32768 32767 + +FAST_WIDTH: + mov rdi, r15 + shr rdi, 16 + + movd xmm1, [r12+rdi] ; xxxxxxba + movd xmm2, [r13+rdi] ; xxxxxxdc + punpcklwd xmm1, xmm2 ; xxxxdcba + punpcklbw xmm1, xmm0 ; 0d0c0b0a + + movdqa xmm2, xmm4 ; xmm2: vv(1-v)(1-v) tmpv + pmulhuw xmm2, xmm3 ; mul u(1-u)u(1-u) on xmm2 + pmaddwd xmm2, xmm1 + pshufd xmm1, xmm2, 00000001b + paddd xmm2, xmm1 + movdqa xmm1, [add_extra_half] + paddd xmm2, xmm1 + psrld xmm2, 15 + + packuswb xmm2, xmm0 + movd ebx, xmm2 + mov [r0], bl + inc r0 + + add r15, r6 + + paddw xmm3, xmm7 ; inc u + dec rsi + jg FAST_WIDTH + +FAST_WIDTH_END: + shr r15, 16 + mov bl, [r12+r15] + mov [r0],bl + inc r0 + add r14, rbp + add r0, r1 + + paddw xmm4, xmm6 ; inc v + psllw xmm4, 1 + psrlw xmm4, 1 + + dec r3 + jg FAST_HEIGHT + + +FAST_LAST_ROW: + shr r14, 15 + imul r14, r5 + add r4, r14 + mov r15, 32768 + +FAST_LAST_ROW_WIDTH: + mov rdi, r15 + shr rdi, 16 + mov bl, [r4+rdi] + mov [r0],bl + inc r0 + + add r15, r6 + dec r2 + jg FAST_LAST_ROW_WIDTH + +FAST_LAST_ROW_END: + + POP_XMM + pop rbp + pop rbx + pop rdi + pop rsi + pop r15 + pop r14 + pop r13 + pop r12 + ret + +%elifdef UNIX64 + +;************************************************************************************************************** +;int GeneralBilinearAccurateDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight, +; unsigned char* pSrc, const int iSrcStride, +; unsigned int uiScaleX, unsigned int uiScaleY ); +;{ +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearAccurateDownsampler_sse2 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %assign push_num 6 + LOAD_7_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + SIGN_EXTENSION r6, r6d + + pxor xmm0, xmm0 + mov r12d, r6d + and r12d, 32767 + mov r13d, r12d + neg r13d + and r13d, 32767 + movd xmm1, r12d ; uinc(uiScaleX mod 32767) + movd xmm2, r13d ; -uinc + psllq xmm1, 32 + por xmm1, xmm2 ; 0 0 uinc -uinc (dword) + pshufd xmm7, xmm1, 01000100b ; xmm7: uinc -uinc uinc -uinc + + mov r12, arg8 + SIGN_EXTENSION r12, r12d + mov rbp, r12 + and r12d, 32767 + mov r13d, r12d + neg r13d + and r13d, 32767 + movd xmm6, r12d ; vinc(uiScaleY mod 32767) + movd xmm2, r13d ; -vinc + psllq xmm6, 32 + por xmm6, xmm2 ; 0 0 vinc -vinc (dword) + pshufd xmm6, xmm6, 01010000b ; xmm6: vinc vinc -vinc -vinc + + mov r12d, 40003fffh + movd xmm5, r12d + punpcklwd xmm5, xmm0 ; 16384 16383 + pshufd xmm5, xmm5, 01000100b ; xmm5: 16384 16383 16384 16383 + +DOWNSAMPLE: + sub r1, r2 ; stride - width + dec r3 + mov r14,16384 + pshufd xmm4, xmm5, 01010000b ; initial v to 16384 16384 16383 16383 + +HEIGHT: + ;mov r12, r4 + mov r12, r14 + shr r12, 15 + imul r12, r5 + add r12, r4 ; get current row address + mov r13, r12 + add r13, r5 + + mov r15, 16384 + mov rax, r2 + dec rax + movdqa xmm3, xmm5 ; initial u to 16384 16383 16384 16383 + +WIDTH: + mov r11, r15 + shr r11, 15 + + movd xmm1, [r12+r11] ; xxxxxxba + movd xmm2, [r13+r11] ; xxxxxxdc + pxor xmm0, xmm0 + punpcklwd xmm1, xmm2 ; xxxxdcba + punpcklbw xmm1, xmm0 ; 0d0c0b0a + punpcklwd xmm1, xmm0 ; 000d000c000b000a + + movdqa xmm2, xmm4 ; xmm2: vv(1-v)(1-v) tmpv + pmaddwd xmm2, xmm3 ; mul u(1-u)u(1-u) on xmm2 + movdqa xmm0, xmm2 + pmuludq xmm2, xmm1 + psrlq xmm0, 32 + psrlq xmm1, 32 + pmuludq xmm0, xmm1 + paddq xmm2, xmm0 + pshufd xmm1, xmm2, 00001110b + paddq xmm2, xmm1 + psrlq xmm2, 29 + + movd ebx, xmm2 + inc ebx + shr ebx, 1 + mov [r0], bl + inc r0 + + add r15, r6 + paddw xmm3, xmm7 ; inc u + psllw xmm3, 1 + psrlw xmm3, 1 + + dec rax + jg WIDTH + +WIDTH_END: + shr r15, 15 + mov bl, [r12+r15] + mov [r0],bl + inc r0 + add r14, rbp + add r0, r1 + + paddw xmm4, xmm6 ; inc v + psllw xmm4, 1 + psrlw xmm4, 1 + + dec r3 + jg HEIGHT + +LAST_ROW: + shr r14, 15 + imul r14, r5 + add r4, r14 + mov r15, 16384 + +LAST_ROW_WIDTH: + mov r11, r15 + shr r11, 15 + mov bl, [r4+r11] + mov [r0],bl + inc r0 + + add r15, r6 + dec r2 + jg LAST_ROW_WIDTH + +LAST_ROW_END: + + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +;************************************************************************************************************** +;int GeneralBilinearFastDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight, +; unsigned char* pSrc, const int iSrcStride, +; unsigned int uiScaleX, unsigned int uiScaleY ); +;{ +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearFastDownsampler_sse2 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %assign push_num 6 + LOAD_7_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + SIGN_EXTENSION r6, r6d + + pxor xmm0, xmm0 + mov r12d, r6d + and r12d, 65535 + mov r13d, r12d + neg r13d + and r13d, 65535 + movd xmm1, r12d ; uinc(uiScaleX mod 65536) + movd xmm2, r13d ; -uinc + psllq xmm1, 32 + por xmm1, xmm2 ; 0 uinc 0 -uinc + pshuflw xmm7, xmm1, 10001000b ; xmm7: uinc -uinc uinc -uinc + + mov r12, arg8 + SIGN_EXTENSION r12, r12d + mov rbp, r12 + and r12d, 32767 + mov r13d, r12d + neg r13d + and r13d, 32767 + movd xmm6, r12d ; vinc(uiScaleY mod 32767) + movd xmm2, r13d ; -vinc + psllq xmm6, 32 + por xmm6, xmm2 ; 0 vinc 0 -vinc + pshuflw xmm6, xmm6, 10100000b ; xmm6: vinc vinc -vinc -vinc + + mov r12d, 80007fffh ; 32768 32767 + movd xmm5, r12d + pshuflw xmm5, xmm5, 01000100b ; 32768 32767 32768 32767 + +FAST_DOWNSAMPLE: + sub r1, r2 ; stride - width + dec r3 + mov r14,16384 + + pshuflw xmm4, xmm5, 01010000b + psrlw xmm4, 1 ; initial v to 16384 16384 16383 16383 + +FAST_HEIGHT: + mov r12, r14 + shr r12, 15 + imul r12, r5 + add r12, r4 ; get current row address + mov r13, r12 + add r13, r5 + + mov r15, 32768 + mov rax, r2 + dec rax + + movdqa xmm3, xmm5 ; initial u to 32768 32767 32768 32767 + +FAST_WIDTH: + mov r11, r15 + shr r11, 16 + + movd xmm1, [r12+r11] ; xxxxxxba + movd xmm2, [r13+r11] ; xxxxxxdc + punpcklwd xmm1, xmm2 ; xxxxdcba + punpcklbw xmm1, xmm0 ; 0d0c0b0a + + movdqa xmm2, xmm4 ; xmm2: vv(1-v)(1-v) tmpv + pmulhuw xmm2, xmm3 ; mul u(1-u)u(1-u) on xmm2 + pmaddwd xmm2, xmm1 + pshufd xmm1, xmm2, 00000001b + paddd xmm2, xmm1 + movdqa xmm1, [add_extra_half] + paddd xmm2, xmm1 + psrld xmm2, 15 + + packuswb xmm2, xmm0 + movd ebx, xmm2 + mov [r0], bl + inc r0 + + add r15, r6 + + paddw xmm3, xmm7 ; inc u + dec rax + jg FAST_WIDTH + +FAST_WIDTH_END: + shr r15, 16 + mov bl, [r12+r15] + mov [r0],bl + inc r0 + add r14, rbp + add r0, r1 + + paddw xmm4, xmm6 ; inc v + psllw xmm4, 1 + psrlw xmm4, 1 + + dec r3 + jg FAST_HEIGHT + + +FAST_LAST_ROW: + shr r14, 15 + imul r14, r5 + add r4, r14 + mov r15, 32768 + +FAST_LAST_ROW_WIDTH: + mov r11, r15 + shr r11, 16 + mov bl, [r4+r11] + mov [r0],bl + inc r0 + + add r15, r6 + dec r2 + jg FAST_LAST_ROW_WIDTH + +FAST_LAST_ROW_END: + + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret +%endif + +;*********************************************************************** +; void DyadicBilinearOneThirdDownsampler_ssse3( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearOneThirdDownsampler_ssse3 +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d +%ifdef X86_32_PICASM + %define i_height dword arg6 +%else + %define i_height r5 +%endif + INIT_X86_32_PIC_NOPRESERVE r5 + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + + mov r6, r1 ;Save the tailer for the unasigned size + imul r6, i_height + add r6, r0 + movdqa xmm7, [r6] + +.yloops_onethird_sse3: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + + mov r6, r0 ;save base address + ; each loop = source bandwidth: 48 bytes +.xloops_onethird_sse3: + ; 1st part horizonal loop: x48 bytes + ; mem hi<- ->lo + ;1st Line Src: xmm0: F * e E * d D * c C * b B * a A + ; xmm2: k K * j J * i I * h H * g G * f + ; xmm2: * p P * o O * n N * m M * l L * + ; + ;2nd Line Src: xmm2: F' * e' E' * d' D' * c' C' * b' B' * a' A' + ; xmm1: k' K' * j' J' * i' I' * h' H' * g' G' * f' + ; xmm1: * p' P' * o' O' * n' N' * m' M' * l' L' * + ;=> target: + ;: P O N M L K J I H G F E D C B A + ;: p o n m l k j i h g f e d c b a + ;: P' .. A' + ;: p' .. a' + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;1st line + movdqa xmm0, [r2] ;F * e E * d D * c C * b B * a A + movdqa xmm1, xmm0 + movdqa xmm5, [pic(shufb_mask_onethird_low_1)] + movdqa xmm6, [pic(shufb_mask_onethird_high_1)] + pshufb xmm0, xmm5 ;0 0 0 0 0 0 0 0 0 0 F E D C B A -> xmm0 + pshufb xmm1, xmm6 ;0 0 0 0 0 0 0 0 0 0 0 e d c b a -> xmm1 + + movdqa xmm2, [r2+16] ;k K * j J * i I * h H * g G * f + movdqa xmm3, xmm2 + movdqa xmm5, [pic(shufb_mask_onethird_low_2)] + movdqa xmm6, [pic(shufb_mask_onethird_high_2)] + pshufb xmm2, xmm5 ;0 0 0 0 0 K J I H G 0 0 0 0 0 0 -> xmm2 + pshufb xmm3, xmm6 ;0 0 0 0 0 k j i h g f 0 0 0 0 0 -> xmm3 + + paddusb xmm0, xmm2 ;0 0 0 0 0 K J I H G F E D C B A -> xmm0 + paddusb xmm1, xmm3 ;0 0 0 0 0 k j i h g f e d c b a -> xmm1 + + movdqa xmm2, [r2+32] ;* p P * o O * n N * m M * l L * + movdqa xmm3, xmm2 + movdqa xmm5, [pic(shufb_mask_onethird_low_3)] + movdqa xmm6, [pic(shufb_mask_onethird_high_3)] + pshufb xmm2, xmm5 ;P O N M L 0 0 0 0 0 0 0 0 0 0 0 -> xmm2 + pshufb xmm3, xmm6 ;p o n m l 0 0 0 0 0 0 0 0 0 0 0 -> xmm3 + + paddusb xmm0, xmm2 ;P O N M L K J I H G F E D C B A -> xmm0 + paddusb xmm1, xmm3 ;p o n m l k j i h g f e d c b a -> xmm1 + pavgb xmm0, xmm1 ;1st line average -> xmm0 + + ;2nd line + movdqa xmm2, [r2+r3] ;F' * e' E' * d' D' * c' C' * b' B' * a' A' + movdqa xmm3, xmm2 + movdqa xmm5, [pic(shufb_mask_onethird_low_1)] + movdqa xmm6, [pic(shufb_mask_onethird_high_1)] + pshufb xmm2, xmm5 ;0 0 0 0 0 0 0 0 0 0 F' E' D' C' B' A' -> xmm2 + pshufb xmm3, xmm6 ;0 0 0 0 0 0 0 0 0 0 0 e' d' c' b' a' -> xmm3 + + movdqa xmm1, [r2+r3+16] ;k' K' * j' J' * i' I' * h' H' * g' G' * f' + movdqa xmm4, xmm1 + movdqa xmm5, [pic(shufb_mask_onethird_low_2)] + movdqa xmm6, [pic(shufb_mask_onethird_high_2)] + pshufb xmm1, xmm5 ;0 0 0 0 0 K' J' I' H' G' 0 0 0 0 0 0 -> xmm1 + pshufb xmm4, xmm6 ;0 0 0 0 0 k' j' i' h' g' f' 0 0 0 0 0 -> xmm4 + + paddusb xmm2, xmm1 ;0 0 0 0 0 K' J' I' H' G' F' E' D' C' B' A' -> xmm2 + paddusb xmm3, xmm4 ;0 0 0 0 0 k' j' i' h' g' f' e' d' c' b' a' -> xmm3 + + movdqa xmm1, [r2+r3+32] ; * p' P' * o' O' * n' N' * m' M' * l' L' * + movdqa xmm4, xmm1 + movdqa xmm5, [pic(shufb_mask_onethird_low_3)] + movdqa xmm6, [pic(shufb_mask_onethird_high_3)] + pshufb xmm1, xmm5 ;P' O' N' M' L' 0 0 0 0 0 0 0 0 0 0 0 -> xmm1 + pshufb xmm4, xmm6 ;p' o' n' m' l' 0 0 0 0 0 0 0 0 0 0 0 -> xmm4 + + paddusb xmm2, xmm1 ;P' O' N' M' L' K' J' I' H' G' F' E' D' C' B' A' -> xmm2 + paddusb xmm3, xmm4 ;p' o' n' m' l' k' j' i' h' g' f' e' d' c' b' a' -> xmm3 + pavgb xmm2, xmm3 ;2nd line average -> xmm2 + + pavgb xmm0, xmm2 ; bytes-average(1st line , 2nd line ) + + ; write pDst + movdqa [r0], xmm0 ;write result in dst + + ; next SMB + lea r2, [r2+48] ;current src address + lea r0, [r0+16] ;current dst address + + sub r4, 48 ;xloops counter + cmp r4, 0 + jg near .xloops_onethird_sse3 + + sub r6, r0 ;offset = base address - current address + lea r2, [r2+2*r3] ; + lea r2, [r2+r3] ; + lea r2, [r2+2*r6] ;current line + 3 lines + lea r2, [r2+r6] + lea r0, [r0+r1] + lea r0, [r0+r6] ;current dst lien + 1 line + + dec i_height + jg near .yloops_onethird_sse3 + + movdqa [r0], xmm7 ;restore the tailer for the unasigned size + +%ifndef X86_32 + pop r12 +%endif + + DEINIT_X86_32_PIC + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret +%undef i_height + +;*********************************************************************** +; void DyadicBilinearOneThirdDownsampler_sse4( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearOneThirdDownsampler_sse4 +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d +%ifdef X86_32_PICASM + %define i_height dword arg6 +%else + %define i_height r5 +%endif + INIT_X86_32_PIC_NOPRESERVE r5 + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + + mov r6, r1 ;Save the tailer for the unasigned size + imul r6, i_height + add r6, r0 + movdqa xmm7, [r6] + +.yloops_onethird_sse4: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + + mov r6, r0 ;save base address + ; each loop = source bandwidth: 48 bytes +.xloops_onethird_sse4: + ; 1st part horizonal loop: x48 bytes + ; mem hi<- ->lo + ;1st Line Src: xmm0: F * e E * d D * c C * b B * a A + ; xmm2: k K * j J * i I * h H * g G * f + ; xmm2: * p P * o O * n N * m M * l L * + ; + ;2nd Line Src: xmm2: F' * e' E' * d' D' * c' C' * b' B' * a' A' + ; xmm1: k' K' * j' J' * i' I' * h' H' * g' G' * f' + ; xmm1: * p' P' * o' O' * n' N' * m' M' * l' L' * + ;=> target: + ;: P O N M L K J I H G F E D C B A + ;: p o n m l k j i h g f e d c b a + ;: P' .. A' + ;: p' .. a' + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;1st line + movntdqa xmm0, [r2] ;F * e E * d D * c C * b B * a A + movdqa xmm1, xmm0 + movdqa xmm5, [pic(shufb_mask_onethird_low_1)] + movdqa xmm6, [pic(shufb_mask_onethird_high_1)] + pshufb xmm0, xmm5 ;0 0 0 0 0 0 0 0 0 0 F E D C B A -> xmm0 + pshufb xmm1, xmm6 ;0 0 0 0 0 0 0 0 0 0 0 e d c b a -> xmm1 + + movntdqa xmm2, [r2+16] ;k K * j J * i I * h H * g G * f + movdqa xmm3, xmm2 + movdqa xmm5, [pic(shufb_mask_onethird_low_2)] + movdqa xmm6, [pic(shufb_mask_onethird_high_2)] + pshufb xmm2, xmm5 ;0 0 0 0 0 K J I H G 0 0 0 0 0 0 -> xmm2 + pshufb xmm3, xmm6 ;0 0 0 0 0 k j i h g f 0 0 0 0 0 -> xmm3 + + paddusb xmm0, xmm2 ;0 0 0 0 0 K J I H G F E D C B A -> xmm0 + paddusb xmm1, xmm3 ;0 0 0 0 0 k j i h g f e d c b a -> xmm1 + + movntdqa xmm2, [r2+32] ;* p P * o O * n N * m M * l L * + movdqa xmm3, xmm2 + movdqa xmm5, [pic(shufb_mask_onethird_low_3)] + movdqa xmm6, [pic(shufb_mask_onethird_high_3)] + pshufb xmm2, xmm5 ;P O N M L 0 0 0 0 0 0 0 0 0 0 0 -> xmm2 + pshufb xmm3, xmm6 ;p o n m l 0 0 0 0 0 0 0 0 0 0 0 -> xmm3 + + paddusb xmm0, xmm2 ;P O N M L K J I H G F E D C B A -> xmm0 + paddusb xmm1, xmm3 ;p o n m l k j i h g f e d c b a -> xmm1 + pavgb xmm0, xmm1 ;1st line average -> xmm0 + + ;2nd line + movntdqa xmm2, [r2+r3] ;F' * e' E' * d' D' * c' C' * b' B' * a' A' + movdqa xmm3, xmm2 + movdqa xmm5, [pic(shufb_mask_onethird_low_1)] + movdqa xmm6, [pic(shufb_mask_onethird_high_1)] + pshufb xmm2, xmm5 ;0 0 0 0 0 0 0 0 0 0 F' E' D' C' B' A' -> xmm2 + pshufb xmm3, xmm6 ;0 0 0 0 0 0 0 0 0 0 0 e' d' c' b' a' -> xmm3 + + movntdqa xmm1, [r2+r3+16] ;k' K' * j' J' * i' I' * h' H' * g' G' * f' + movdqa xmm4, xmm1 + movdqa xmm5, [pic(shufb_mask_onethird_low_2)] + movdqa xmm6, [pic(shufb_mask_onethird_high_2)] + pshufb xmm1, xmm5 ;0 0 0 0 0 K' J' I' H' G' 0 0 0 0 0 0 -> xmm1 + pshufb xmm4, xmm6 ;0 0 0 0 0 k' j' i' h' g' f' 0 0 0 0 0 -> xmm4 + + paddusb xmm2, xmm1 ;0 0 0 0 0 K' J' I' H' G' F' E' D' C' B' A' -> xmm2 + paddusb xmm3, xmm4 ;0 0 0 0 0 k' j' i' h' g' f' e' d' c' b' a' -> xmm3 + + movntdqa xmm1, [r2+r3+32] ; * p' P' * o' O' * n' N' * m' M' * l' L' * + movdqa xmm4, xmm1 + movdqa xmm5, [pic(shufb_mask_onethird_low_3)] + movdqa xmm6, [pic(shufb_mask_onethird_high_3)] + pshufb xmm1, xmm5 ;P' O' N' M' L' 0 0 0 0 0 0 0 0 0 0 0 -> xmm1 + pshufb xmm4, xmm6 ;p' o' n' m' l' 0 0 0 0 0 0 0 0 0 0 0 -> xmm4 + + paddusb xmm2, xmm1 ;P' O' N' M' L' K' J' I' H' G' F' E' D' C' B' A' -> xmm2 + paddusb xmm3, xmm4 ;p' o' n' m' l' k' j' i' h' g' f' e' d' c' b' a' -> xmm3 + pavgb xmm2, xmm3 ;2nd line average -> xmm2 + + pavgb xmm0, xmm2 ; bytes-average(1st line , 2nd line ) + + ; write pDst + movdqa [r0], xmm0 ;write result in dst + + ; next SMB + lea r2, [r2+48] ;current src address + lea r0, [r0+16] ;current dst address + + sub r4, 48 ;xloops counter + cmp r4, 0 + jg near .xloops_onethird_sse4 + + sub r6, r0 ;offset = base address - current address + lea r2, [r2+2*r3] ; + lea r2, [r2+r3] ; + lea r2, [r2+2*r6] ;current line + 3 lines + lea r2, [r2+r6] + lea r0, [r0+r1] + lea r0, [r0+r6] ;current dst lien + 1 line + + dec i_height + jg near .yloops_onethird_sse4 + + movdqa [r0], xmm7 ;restore the tailer for the unasigned size + +%ifndef X86_32 + pop r12 +%endif + + DEINIT_X86_32_PIC + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret +%undef i_height + +;*********************************************************************** +; void DyadicBilinearQuarterDownsampler_sse( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearQuarterDownsampler_sse +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $02 ; iSrcHeight >> 2 + + mov r6, r1 ;Save the tailer for the unasigned size + imul r6, r5 + add r6, r0 + movq xmm7, [r6] + +.yloops_quarter_sse: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + + mov r6, r0 ;save base address + ; each loop = source bandwidth: 32 bytes +.xloops_quarter_sse: + ; 1st part horizonal loop: x16 bytes + ; mem hi<- ->lo + ;1st Line Src: mm0: d D c C b B a A mm1: h H g G f F e E + ;2nd Line Src: mm2: l L k K j J i I mm3: p P o O n N m M + ; + ;=> target: + ;: G E C A, + ;: + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + movq mm0, [r2] ; 1st pSrc line + movq mm1, [r2+8] ; 1st pSrc line + 8 + movq mm2, [r2+r3] ; 2nd pSrc line + movq mm3, [r2+r3+8] ; 2nd pSrc line + 8 + + pshufw mm0, mm0, 0d8h ; x X x X c C a A + pshufw mm1, mm1, 0d8h ; x X x X g G e E + pshufw mm2, mm2, 0d8h ; x X x X k K i I + pshufw mm3, mm3, 0d8h ; x X x X o O m M + + punpckldq mm0, mm1 ; g G e E c C a A + punpckldq mm2, mm3 ; o O m M k K i I + + ; to handle mm0,mm2 + pshufw mm4, mm0, 0d8h ;g G c C e E a A + pshufw mm5, mm4, 04eh ;e E a A g G c C + punpcklbw mm4, mm5 ;g e G E c a C A -> mm4 + pshufw mm4, mm4, 0d8h ;g e c a G E C A -> mm4 + + pshufw mm5, mm2, 0d8h ;o O k K m M i I + pshufw mm6, mm5, 04eh ;m M i I o O k K + punpcklbw mm5, mm6 ;o m O M k i K I + pshufw mm5, mm5, 0d8h ;o m k i O M K I -> mm5 + + ; to handle mm4, mm5 + movq mm0, mm4 + punpckldq mm0, mm6 ;x x x x G E C A + punpckhdq mm4, mm6 ;x x x x g e c a + + movq mm1, mm5 + punpckldq mm1, mm6 ;x x x x O M K I + punpckhdq mm5, mm6 ;x x x x o m k i + + ; avg within MB horizon width (8 x 2 lines) + pavgb mm0, mm4 ; (A+a+1)>>1, .., (H+h+1)>>1, temp_row1 + pavgb mm1, mm5 ; (I+i+1)>>1, .., (P+p+1)>>1, temp_row2 + pavgb mm0, mm1 ; (temp_row1+temp_row2+1)>>1, pending here and wait another horizonal part done then write memory once + + ; 2nd part horizonal loop: x16 bytes + movq mm1, [r2+16] ; 1st pSrc line + 16 + movq mm2, [r2+24] ; 1st pSrc line + 24 + movq mm3, [r2+r3+16] ; 2nd pSrc line + 16 + movq mm4, [r2+r3+24] ; 2nd pSrc line + 24 + + pshufw mm1, mm1, 0d8h + pshufw mm2, mm2, 0d8h + pshufw mm3, mm3, 0d8h + pshufw mm4, mm4, 0d8h + + punpckldq mm1, mm2 + punpckldq mm3, mm4 + + ; to handle mm1, mm3 + pshufw mm4, mm1, 0d8h + pshufw mm5, mm4, 04eh + punpcklbw mm4, mm5 + pshufw mm4, mm4, 0d8h + + pshufw mm5, mm3, 0d8h + pshufw mm6, mm5, 04eh + punpcklbw mm5, mm6 + pshufw mm5, mm5, 0d8h + + ; to handle mm4, mm5 + movq mm2, mm4 + punpckldq mm2, mm6 + punpckhdq mm4, mm6 + + movq mm3, mm5 + punpckldq mm3, mm6 + punpckhdq mm5, mm6 + + ; avg within MB horizon width (8 x 2 lines) + pavgb mm2, mm4 ; (A+a+1)>>1, .., (H+h+1)>>1, temp_row1 + pavgb mm3, mm5 ; (I+i+1)>>1, .., (P+p+1)>>1, temp_row2 + pavgb mm2, mm3 ; (temp_row1+temp_row2+1)>>1, done in another 2nd horizonal part + + movd [r0 ], mm0 + movd [r0+4], mm2 + + ; next SMB + lea r2, [r2+32] + lea r0, [r0+8] + + sub r4, 32 + cmp r4, 0 + jg near .xloops_quarter_sse + + sub r6, r0 + ; next line + lea r2, [r2+4*r3] ; next 4 end of lines + lea r2, [r2+4*r6] ; reset to base 0 [- 4 * iDstWidth] + lea r0, [r0+r1] + lea r0, [r0+r6] ; reset to base 0 [- iDstWidth] + + dec r5 + jg near .yloops_quarter_sse + + movq [r0], xmm7 ;restored the tailer for the unasigned size + + WELSEMMS +%ifndef X86_32 + pop r12 +%endif + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +;*********************************************************************** +; void DyadicBilinearQuarterDownsampler_ssse3( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearQuarterDownsampler_ssse3 + ;push ebx + ;push edx + ;push esi + ;push edi + ;push ebp + + ;mov edi, [esp+24] ; pDst + ;mov edx, [esp+28] ; iDstStride + ;mov esi, [esp+32] ; pSrc + ;mov ecx, [esp+36] ; iSrcStride + ;mov ebp, [esp+44] ; iSrcHeight +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $02 ; iSrcHeight >> 2 + + mov r6, r1 ;Save the tailer for the unasigned size + imul r6, r5 + add r6, r0 + movq xmm7, [r6] + + INIT_X86_32_PIC_NOPRESERVE r4 + movdqa xmm6, [pic(shufb_mask_quarter)] + DEINIT_X86_32_PIC + +.yloops_quarter_sse3: + ;mov eax, [esp+40] ; iSrcWidth + ;sar eax, $02 ; iSrcWidth >> 2 + ;mov ebx, eax ; iDstWidth restored at ebx + ;sar eax, $04 ; (iSrcWidth >> 2) / 16 ; loop count = num_of_mb + ;neg ebx ; - (iSrcWidth >> 2) +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + + mov r6, r0 + ; each loop = source bandwidth: 32 bytes +.xloops_quarter_sse3: + ; 1st part horizonal loop: x32 bytes + ; mem hi<- ->lo + ;1st Line Src: xmm0: h H g G f F e E d D c C b B a A + ; xmm1: p P o O n N m M l L k K j J i I + ;2nd Line Src: xmm2: h H g G f F e E d D c C b B a A + ; xmm3: p P o O n N m M l L k K j J i I + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + movdqa xmm0, [r2] ; 1st_src_line + movdqa xmm1, [r2+16] ; 1st_src_line + 16 + movdqa xmm2, [r2+r3] ; 2nd_src_line + movdqa xmm3, [r2+r3+16] ; 2nd_src_line + 16 + + pshufb xmm0, xmm6 ;1st line: 0 0 0 0 g e c a 0 0 0 0 G E C A + pshufb xmm1, xmm6 ;1st line: 0 0 0 0 o m k i 0 0 0 0 O M K I + pshufb xmm2, xmm6 ;2nd line: 0 0 0 0 g e c a 0 0 0 0 G E C A + pshufb xmm3, xmm6 ;2nd line: 0 0 0 0 o m k i 0 0 0 0 O M K I + + movdqa xmm4, xmm0 + movdqa xmm5, xmm2 + punpckldq xmm0, xmm1 ;1st line: 0 0 0 0 0 0 0 0 O M K I G E C A -> xmm0 + punpckhdq xmm4, xmm1 ;1st line: 0 0 0 0 0 0 0 0 o m k i g e c a -> xmm4 + punpckldq xmm2, xmm3 ;2nd line: 0 0 0 0 0 0 0 0 O M K I G E C A -> xmm2 + punpckhdq xmm5, xmm3 ;2nd line: 0 0 0 0 0 0 0 0 o m k i g e c a -> xmm5 + + pavgb xmm0, xmm4 + pavgb xmm2, xmm5 + pavgb xmm0, xmm2 ;average + + ; write pDst + movq [r0], xmm0 + + ; next SMB + lea r2, [r2+32] + lea r0, [r0+8] + + sub r4, 32 + cmp r4, 0 + jg near .xloops_quarter_sse3 + + sub r6, r0 + ; next line + lea r2, [r2+4*r3] ; next end of lines + lea r2, [r2+4*r6] ; reset to base 0 [- 4 * iDstWidth] + lea r0, [r0+r1] + lea r0, [r0+r6] ; reset to base 0 [- iDstWidth] + + dec r5 + jg near .yloops_quarter_sse3 + + movq [r0], xmm7 ;restored the tailer for the unasigned size + +%ifndef X86_32 + pop r12 +%endif + + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +;*********************************************************************** +; void DyadicBilinearQuarterDownsampler_sse4( unsigned char* pDst, const int iDstStride, +; unsigned char* pSrc, const int iSrcStride, +; const int iSrcWidth, const int iSrcHeight ); +;*********************************************************************** +WELS_EXTERN DyadicBilinearQuarterDownsampler_sse4 +%ifdef X86_32 + push r6 + %assign push_num 1 +%else + %assign push_num 0 +%endif + LOAD_6_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + SIGN_EXTENSION r5, r5d + +%ifndef X86_32 + push r12 + mov r12, r4 +%endif + sar r5, $02 ; iSrcHeight >> 2 + + mov r6, r1 ;Save the tailer for the unasigned size + imul r6, r5 + add r6, r0 + movq xmm7, [r6] + + INIT_X86_32_PIC_NOPRESERVE r4 + movdqa xmm6, [pic(shufb_mask_quarter)] ;mask + DEINIT_X86_32_PIC + +.yloops_quarter_sse4: +%ifdef X86_32 + mov r4, arg5 +%else + mov r4, r12 +%endif + + mov r6, r0 + ; each loop = source bandwidth: 32 bytes +.xloops_quarter_sse4: + ; 1st part horizonal loop: x16 bytes + ; mem hi<- ->lo + ;1st Line Src: xmm0: h H g G f F e E d D c C b B a A + ; xmm1: p P o O n N m M l L k K j J i I + ;2nd Line Src: xmm2: h H g G f F e E d D c C b B a A + ; xmm3: p P o O n N m M l L k K j J i I + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + movntdqa xmm0, [r2] ; 1st_src_line + movntdqa xmm1, [r2+16] ; 1st_src_line + 16 + movntdqa xmm2, [r2+r3] ; 2nd_src_line + movntdqa xmm3, [r2+r3+16] ; 2nd_src_line + 16 + + pshufb xmm0, xmm6 ;1st line: 0 0 0 0 g e c a 0 0 0 0 G E C A + pshufb xmm1, xmm6 ;1st line: 0 0 0 0 o m k i 0 0 0 0 O M K I + pshufb xmm2, xmm6 ;2nd line: 0 0 0 0 g e c a 0 0 0 0 G E C A + pshufb xmm3, xmm6 ;2nd line: 0 0 0 0 o m k i 0 0 0 0 O M K I + + movdqa xmm4, xmm0 + movdqa xmm5, xmm2 + punpckldq xmm0, xmm1 ;1st line: 0 0 0 0 0 0 0 0 O M K I G E C A -> xmm0 + punpckhdq xmm4, xmm1 ;1st line: 0 0 0 0 0 0 0 0 o m k i g e c a -> xmm4 + punpckldq xmm2, xmm3 ;2nd line: 0 0 0 0 0 0 0 0 O M K I G E C A -> xmm2 + punpckhdq xmm5, xmm3 ;2nd line: 0 0 0 0 0 0 0 0 o m k i g e c a -> xmm5 + + pavgb xmm0, xmm4 + pavgb xmm2, xmm5 + pavgb xmm0, xmm2 ;average + + ; write pDst + movq [r0], xmm0 + + ; next SMB + lea r2, [r2+32] + lea r0, [r0+8] + + sub r4, 32 + cmp r4, 0 + jg near .xloops_quarter_sse4 + + sub r6, r0 + lea r2, [r2+4*r3] ; next end of lines + lea r2, [r2+4*r6] ; reset to base 0 [- 2 * iDstWidth] + lea r0, [r0+r1] + lea r0, [r0+r6] ; reset to base 0 [- iDstWidth] + + dec r5 + jg near .yloops_quarter_sse4 + + movq [r0], xmm7 ;restore the tailer for the unasigned size + +%ifndef X86_32 + pop r12 +%endif + + POP_XMM + LOAD_6_PARA_POP +%ifdef X86_32 + pop r6 +%endif + ret + +; xpos_int=%1 xpos_frac=%2 inc_int+1=%3 inc_frac=%4 tmp=%5 +%macro SSE2_BilinearIncXposuw 5 + movdqa %5, %2 + paddw %2, %4 + paddusw %5, %4 + pcmpeqw %5, %2 + paddb %1, %3 + paddb %1, %5 ; subtract 1 if no carry +%endmacro + +; outl=%1 outh=%2 in=%3 +%macro SSE2_UnpckXFracuw 3 + pcmpeqw %1, %1 + pxor %1, %3 + movdqa %2, %1 + punpcklwd %1, %3 + punpckhwd %2, %3 +%endmacro + +; [in:xfrac out:xyfrac0]=%1 [out:xyfrac1]=%2 yfrac0=%3 yfrac1=%4 +%macro SSE2_BilinearFastCalcXYFrac 4 + movdqa %2, %1 + pmulhuw %1, %3 + pmulhuw %2, %4 +%endmacro + +; [in:dwordsl out:bytes] dwordsh=%2 zero=%3 +%macro SSE2_BilinearFastPackDwordsToBytes 3 + psrld %1, 14 + psrld %2, 14 + packssdw %1, %2 + pavgw %1, %3 + packuswb %1, %1 +%endmacro + +%macro SSSE3_BilinearFastDownsample2xOrLess_8px 0 + movdqa xmm_tmp0, xmm_xpos_int + pshufb xmm_tmp0, xmm_0 + psubb xmm_xpos_int, xmm_tmp0 + SSE2_UnpckXFracuw xmm_tmp0, xmm_tmp1, xmm_xpos_frac + mov r_tmp0, i_xpos + lea i_xpos, [i_xpos + 8 * i_scalex] + shr r_tmp0, 16 + lddqu xmm_tmp4, [p_src_row0 + r_tmp0] + pshufb xmm_tmp4, xmm_xpos_int + movdqa xmm_tmp5, xmm_tmp4 + punpcklbw xmm_tmp4, xmm_0 + punpckhbw xmm_tmp5, xmm_0 + SSE2_BilinearFastCalcXYFrac xmm_tmp0, xmm_tmp2, xmm_yfrac0, xmm_yfrac1 + SSE2_BilinearFastCalcXYFrac xmm_tmp1, xmm_tmp3, xmm_yfrac0, xmm_yfrac1 + pmaddwd xmm_tmp0, xmm_tmp4 + pmaddwd xmm_tmp1, xmm_tmp5 + lddqu xmm_tmp4, [p_src_row1 + r_tmp0] + pshufb xmm_tmp4, xmm_xpos_int + movdqa xmm_tmp5, xmm_tmp4 + punpcklbw xmm_tmp4, xmm_0 + punpckhbw xmm_tmp5, xmm_0 + pmaddwd xmm_tmp2, xmm_tmp4 + pmaddwd xmm_tmp3, xmm_tmp5 + paddd xmm_tmp0, xmm_tmp2 + paddd xmm_tmp1, xmm_tmp3 + SSE2_BilinearFastPackDwordsToBytes xmm_tmp0, xmm_tmp1, xmm_0 + movlps [p_dst], xmm_tmp0 + add p_dst, 8 + SSE2_BilinearIncXposuw xmm_xpos_int, xmm_xpos_frac, xmm_xpos_int_inc, xmm_xpos_frac_inc, xmm_tmp0 +%endmacro + +%macro SSSE3_BilinearFastDownsample4xOrLess_8px 0 + movdqa xmm_tmp0, xmm_xpos_int + pshufb xmm_tmp0, xmm_shufb_0000000088888888 + psubb xmm_xpos_int, xmm_tmp0 + SSE2_UnpckXFracuw xmm_tmp0, xmm_tmp1, xmm_xpos_frac + mov r_tmp0, i_xpos + shr r_tmp0, 16 + lddqu xmm_tmp3, [p_src_row0 + r_tmp0] + lddqu xmm_tmp4, [p_src_row1 + r_tmp0] + movdqa xmm_tmp2, xmm_xpos_int + punpcklbw xmm_tmp2, xmm_db80h + pshufb xmm_tmp3, xmm_tmp2 + pshufb xmm_tmp4, xmm_tmp2 + SSE2_BilinearFastCalcXYFrac xmm_tmp0, xmm_tmp2, xmm_yfrac0, xmm_yfrac1 + pmaddwd xmm_tmp0, xmm_tmp3 + pmaddwd xmm_tmp2, xmm_tmp4 + paddd xmm_tmp0, xmm_tmp2 + lea r_tmp0, [i_xpos + 4 * i_scalex] + lea i_xpos, [i_xpos + 8 * i_scalex] + shr r_tmp0, 16 + lddqu xmm_tmp3, [p_src_row0 + r_tmp0] + lddqu xmm_tmp4, [p_src_row1 + r_tmp0] + movdqa xmm_tmp2, xmm_xpos_int + punpckhbw xmm_tmp2, xmm_db80h + pshufb xmm_tmp3, xmm_tmp2 + pshufb xmm_tmp4, xmm_tmp2 + SSE2_BilinearFastCalcXYFrac xmm_tmp1, xmm_tmp2, xmm_yfrac0, xmm_yfrac1 + pmaddwd xmm_tmp1, xmm_tmp3 + pmaddwd xmm_tmp2, xmm_tmp4 + paddd xmm_tmp1, xmm_tmp2 + SSE2_BilinearFastPackDwordsToBytes xmm_tmp0, xmm_tmp1, xmm_0 + movlps [p_dst], xmm_tmp0 + add p_dst, 8 + SSE2_BilinearIncXposuw xmm_xpos_int, xmm_xpos_frac, xmm_xpos_int_inc, xmm_xpos_frac_inc, xmm_tmp0 +%endmacro + +%macro SSE2_GeneralBilinearFastDownsample_8px 0 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movd xmm_tmp3, [p_src_row0 + r_tmp0] + movd xmm_tmp4, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp3, [p_src_row0 + r_tmp0], 1 + pinsrw xmm_tmp4, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp3, [p_src_row0 + r_tmp0], 2 + pinsrw xmm_tmp4, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + pinsrw xmm_tmp3, [p_src_row0 + r_tmp0], 3 + pinsrw xmm_tmp4, [p_src_row1 + r_tmp0], 3 + punpcklbw xmm_tmp3, xmm_0 + punpcklbw xmm_tmp4, xmm_0 + movdqa xmm_tmp0, xmm_xfrac0 + SSE2_BilinearFastCalcXYFrac xmm_tmp0, xmm_tmp2, xmm_yfrac0, xmm_yfrac1 + pmaddwd xmm_tmp0, xmm_tmp3 + pmaddwd xmm_tmp2, xmm_tmp4 + paddd xmm_tmp0, xmm_tmp2 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movd xmm_tmp3, [p_src_row0 + r_tmp0] + movd xmm_tmp4, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp3, [p_src_row0 + r_tmp0], 1 + pinsrw xmm_tmp4, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp3, [p_src_row0 + r_tmp0], 2 + pinsrw xmm_tmp4, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + pinsrw xmm_tmp3, [p_src_row0 + r_tmp0], 3 + pinsrw xmm_tmp4, [p_src_row1 + r_tmp0], 3 + punpcklbw xmm_tmp3, xmm_0 + punpcklbw xmm_tmp4, xmm_0 + movdqa xmm_tmp1, xmm_xfrac1 + SSE2_BilinearFastCalcXYFrac xmm_tmp1, xmm_tmp2, xmm_yfrac0, xmm_yfrac1 + pmaddwd xmm_tmp1, xmm_tmp3 + pmaddwd xmm_tmp2, xmm_tmp4 + paddd xmm_tmp1, xmm_tmp2 + SSE2_BilinearFastPackDwordsToBytes xmm_tmp0, xmm_tmp1, xmm_0 + movlps [p_dst], xmm_tmp0 + add p_dst, 8 + paddw xmm_xfrac0, xmm_xfrac_inc + paddw xmm_xfrac1, xmm_xfrac_inc +%endmacro + +; xpos_int=%1 xpos_frac=%2 inc_int=%3 inc_frac=%4 7FFFh=%5 tmp=%6 +%macro SSE2_BilinearIncXposw 6 + pxor %6, %6 + paddw %2, %4 + pcmpgtw %6, %2 + paddb %1, %3 + psubb %1, %6 ; add carry + pand %2, %5 +%endmacro + +; outl=%1 outh=%2 in=%3 7FFFh=%4 +%macro SSE2_UnpckXFracw 4 + movdqa %1, %3 + pxor %1, %4 + movdqa %2, %1 + punpcklwd %1, %3 + punpckhwd %2, %3 +%endmacro + +; res>>29=%1 data0=%2 data1=%3 frac0=%4 frac1=%5 tmp=%6 +%macro SSE41_LinearAccurateInterpolateVerticalDwords 6 + pshufd %1, %2, 10110001b + pshufd %6, %3, 10110001b + pmuludq %1, %4 + pmuludq %6, %5 + paddq %1, %6 + pmuludq %2, %4 + pmuludq %3, %5 + paddq %2, %3 + psllq %1, 3 + psrlq %2, 29 + blendps %1, %2, 0101b +%endmacro + +%macro SSE41_BilinearAccurateDownsample2xOrLess_8px 0 + movdqa xmm_tmp0, xmm_xpos_int + pshufb xmm_tmp0, xmm_0 + psubb xmm_xpos_int, xmm_tmp0 + SSE2_UnpckXFracw xmm_tmp0, xmm_tmp1, xmm_xpos_frac, xmm_7fff + mov r_tmp0, i_xpos + lea i_xpos, [i_xpos + 8 * i_scalex] + shr r_tmp0, 16 + lddqu xmm_tmp4, [p_src_row0 + r_tmp0] + pshufb xmm_tmp4, xmm_xpos_int + movdqa xmm_tmp5, xmm_tmp4 + punpcklbw xmm_tmp4, xmm_0 + punpckhbw xmm_tmp5, xmm_0 + pmaddwd xmm_tmp4, xmm_tmp0 + pmaddwd xmm_tmp5, xmm_tmp1 + lddqu xmm_tmp2, [p_src_row1 + r_tmp0] + pshufb xmm_tmp2, xmm_xpos_int + movdqa xmm_tmp3, xmm_tmp2 + punpcklbw xmm_tmp2, xmm_0 + punpckhbw xmm_tmp3, xmm_0 + pmaddwd xmm_tmp2, xmm_tmp0 + pmaddwd xmm_tmp3, xmm_tmp1 + SSE41_LinearAccurateInterpolateVerticalDwords xmm_tmp0, xmm_tmp4, xmm_tmp2, xmm_yfrac0, xmm_yfrac1, xmm_tmp1 + SSE41_LinearAccurateInterpolateVerticalDwords xmm_tmp1, xmm_tmp5, xmm_tmp3, xmm_yfrac0, xmm_yfrac1, xmm_tmp2 + packssdw xmm_tmp0, xmm_tmp1 + pavgw xmm_tmp0, xmm_0 + packuswb xmm_tmp0, xmm_tmp0 + movlps [p_dst], xmm_tmp0 + add p_dst, 8 + SSE2_BilinearIncXposw xmm_xpos_int, xmm_xpos_frac, xmm_xpos_int_inc, xmm_xpos_frac_inc, xmm_7fff, xmm_tmp0 +%endmacro + +%macro SSE41_BilinearAccurateDownsample4xOrLess_8px 0 + movdqa xmm_tmp0, xmm_xpos_int + pshufb xmm_tmp0, xmm_shufb_0000000088888888 + psubb xmm_xpos_int, xmm_tmp0 + SSE2_UnpckXFracw xmm_tmp0, xmm_tmp1, xmm_xpos_frac, xmm_7fff + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movdqa xmm_tmp3, xmm_xpos_int + punpcklbw xmm_tmp3, xmm_db80h + lddqu xmm_tmp4, [p_src_row0 + r_tmp0] + lddqu xmm_tmp2, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex] + lea i_xpos, [i_xpos + 8 * i_scalex] + shr r_tmp0, 16 + pshufb xmm_tmp4, xmm_tmp3 + pshufb xmm_tmp2, xmm_tmp3 + pmaddwd xmm_tmp4, xmm_tmp0 + pmaddwd xmm_tmp2, xmm_tmp0 + SSE41_LinearAccurateInterpolateVerticalDwords xmm_tmp0, xmm_tmp4, xmm_tmp2, xmm_yfrac0, xmm_yfrac1, xmm_tmp3 + movdqa xmm_tmp2, xmm_xpos_int + punpckhbw xmm_tmp2, xmm_db80h + lddqu xmm_tmp4, [p_src_row0 + r_tmp0] + lddqu xmm_tmp3, [p_src_row1 + r_tmp0] + pshufb xmm_tmp4, xmm_tmp2 + pshufb xmm_tmp3, xmm_tmp2 + pmaddwd xmm_tmp4, xmm_tmp1 + pmaddwd xmm_tmp3, xmm_tmp1 + SSE41_LinearAccurateInterpolateVerticalDwords xmm_tmp1, xmm_tmp4, xmm_tmp3, xmm_yfrac0, xmm_yfrac1, xmm_tmp2 + packssdw xmm_tmp0, xmm_tmp1 + pavgw xmm_tmp0, xmm_0 + packuswb xmm_tmp0, xmm_tmp0 + movlps [p_dst], xmm_tmp0 + add p_dst, 8 + SSE2_BilinearIncXposw xmm_xpos_int, xmm_xpos_frac, xmm_xpos_int_inc, xmm_xpos_frac_inc, xmm_7fff, xmm_tmp0 +%endmacro + +%macro SSE41_GeneralBilinearAccurateDownsample_8px 0 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movd xmm_tmp4, [p_src_row0 + r_tmp0] + movd xmm_tmp2, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 1 * i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp4, [p_src_row0 + r_tmp0], 1 + pinsrw xmm_tmp2, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp4, [p_src_row0 + r_tmp0], 2 + pinsrw xmm_tmp2, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + pinsrw xmm_tmp4, [p_src_row0 + r_tmp0], 3 + pinsrw xmm_tmp2, [p_src_row1 + r_tmp0], 3 + punpcklbw xmm_tmp4, xmm_0 + punpcklbw xmm_tmp2, xmm_0 + pmaddwd xmm_tmp4, xmm_xfrac0 + pmaddwd xmm_tmp2, xmm_xfrac0 + SSE41_LinearAccurateInterpolateVerticalDwords xmm_tmp0, xmm_tmp4, xmm_tmp2, xmm_yfrac0, xmm_yfrac1, xmm_tmp3 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movd xmm_tmp4, [p_src_row0 + r_tmp0] + movd xmm_tmp3, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 1 * i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp4, [p_src_row0 + r_tmp0], 1 + pinsrw xmm_tmp3, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + pinsrw xmm_tmp4, [p_src_row0 + r_tmp0], 2 + pinsrw xmm_tmp3, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + pinsrw xmm_tmp4, [p_src_row0 + r_tmp0], 3 + pinsrw xmm_tmp3, [p_src_row1 + r_tmp0], 3 + punpcklbw xmm_tmp4, xmm_0 + punpcklbw xmm_tmp3, xmm_0 + pmaddwd xmm_tmp4, xmm_xfrac1 + pmaddwd xmm_tmp3, xmm_xfrac1 + SSE41_LinearAccurateInterpolateVerticalDwords xmm_tmp1, xmm_tmp4, xmm_tmp3, xmm_yfrac0, xmm_yfrac1, xmm_tmp2 + packssdw xmm_tmp0, xmm_tmp1 + pavgw xmm_tmp0, xmm_0 + packuswb xmm_tmp0, xmm_tmp0 + movlps [p_dst], xmm_tmp0 + add p_dst, 8 + paddw xmm_xfrac0, xmm_xfrac_inc + paddw xmm_xfrac1, xmm_xfrac_inc + pand xmm_xfrac0, xmm_7fff + pand xmm_xfrac1, xmm_7fff +%endmacro + +; downsample_8px_macro=%1 b_fast=%2 +%macro SSE2_GeneralBilinearDownsampler_loop 2 +%%height: + mov p_src_row0, i_ypos + shr p_src_row0, 15 + imul p_src_row0, i_src_stride + add p_src_row0, p_src + mov p_src_row1, p_src_row0 + add p_src_row1, i_src_stride + movd xmm_tmp1, i_yposd +%if %2 + pshuflw xmm_tmp1, xmm_tmp1, 0 + psllw xmm_tmp1, 1 + psrlw xmm_tmp1, 1 +%else + pslld xmm_tmp1, 17 + psrld xmm_tmp1, 17 +%endif +%ifdef X86_32 + pshufd xmm_tmp1, xmm_tmp1, 0 + pcmpeqw xmm_tmp0, xmm_tmp0 +%if %2 + psrlw xmm_tmp0, 1 +%else + psrld xmm_tmp0, 17 +%endif + pxor xmm_tmp0, xmm_tmp1 + movdqa xmm_yfrac0, xmm_tmp0 + movdqa xmm_yfrac1, xmm_tmp1 +%else + pshufd xmm_yfrac1, xmm_tmp1, 0 + pcmpeqw xmm_yfrac0, xmm_yfrac0 +%if %2 + psrlw xmm_yfrac0, 1 +%else + psrld xmm_yfrac0, 17 +%endif + pxor xmm_yfrac0, xmm_yfrac1 +%endif + + mov i_xpos, 1 << 15 + mov i_width_cnt, i_dst_width + sub i_width_cnt, 1 + +%ifdef xmm_xpos_int + movdqa xmm_xpos_int, xmm_xpos_int_begin + movdqa xmm_xpos_frac, xmm_xpos_frac_begin +%else + movdqa xmm_xfrac0, xmm_xfrac0_begin + movdqa xmm_xfrac1, xmm_xfrac1_begin +%endif + +%%width: + %1 + sub i_width_cnt, 8 + jg %%width + + lea p_dst, [p_dst + i_width_cnt + 1] + imul i_width_cnt, i_scalex + add i_xpos, i_width_cnt + shr i_xpos, 16 + movzx r_tmp0, byte [p_src_row0 + i_xpos] + mov [p_dst - 1], r_tmp0b +%ifdef X86_32 + mov r_tmp0, i_scaleyd + add i_yposd, r_tmp0 +%else + add i_yposd, i_scaleyd +%endif + add p_dst, i_dst_stride_less_width + sub i_dst_height, 1 + jg %%height +%endmacro + +;************************************************************************************************************** +;void GeneralBilinearFastDownsampler_ssse3 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, +; int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, +; uint32_t uiScaleY); +; +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearFastDownsampler_ssse3 + %assign push_num 0 +%ifndef X86_32 + push r12 + push r13 + push rbx + push rbp + %assign push_num 4 +%ifdef WIN64 + push rdi + push rsi + %assign push_num push_num + 2 +%endif +%endif + LOAD_7_PARA + PUSH_XMM 16 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + ZERO_EXTENSION r6d + sub r1, r2 ; dst_stride - dst_width +%ifdef X86_32 + movd xmm0, arg8 + movd xmm1, esp + and esp, -16 +%ifdef X86_32_PICASM + sub esp, 8 * 4 + 9 * 16 +%else + sub esp, 8 * 4 + 7 * 16 +%endif + movd [esp], xmm1 + %define p_dst r0 + %define i_dst_stride_less_width [esp + 1 * 4] + %define i_dst_width [esp + 2 * 4] + %define i_dst_height dword [esp + 3 * 4] + %define p_src [esp + 4 * 4] + %define i_src_stride [esp + 5 * 4] + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd [esp + 6 * 4] + %define i_xpos r2 + %define i_ypos dword [esp + 7 * 4] + %define i_yposd dword [esp + 7 * 4] + %define p_src_row0 r3 + %define p_src_row1 r4 + %define i_width_cnt r5 + %define r_tmp0 r1 + %define r_tmp0b r1b + %define xmm_xpos_frac xmm1 + %define xmm_xpos_frac_inc [esp + 8 * 4] + %define xmm_xpos_int xmm3 + %define xmm_xpos_int_inc [esp + 8 * 4 + 1 * 16] + %define xmm_yfrac0 [esp + 8 * 4 + 2 * 16] + %define xmm_yfrac1 [esp + 8 * 4 + 3 * 16] + %define xmm_tmp0 xmm7 + %define xmm_tmp1 xmm0 + %define xmm_tmp2 xmm2 + %define xmm_tmp3 xmm4 + %define xmm_tmp4 xmm5 + %define xmm_tmp5 xmm6 + %define xmm_0 [esp + 8 * 4 + 4 * 16] + %define xmm_xpos_int_begin [esp + 8 * 4 + 5 * 16] + %define xmm_xpos_frac_begin [esp + 8 * 4 + 6 * 16] +%ifdef X86_32_PICASM + %define xmm_db80h [esp + 8 * 4 + 7 * 16] + %define xmm_shufb_0000000088888888 [esp + 8 * 4 + 8 * 16] + pxor xmm_tmp4, xmm_tmp4 + pcmpeqb xmm_tmp5, xmm_tmp5 + psubb xmm_tmp4, xmm_tmp5 + movdqa xmm_tmp3, xmm_tmp4 + psllw xmm_tmp3, 3 + pslldq xmm_tmp3, 8 + movdqa xmm_shufb_0000000088888888, xmm_tmp3 + psllw xmm_tmp4, 7 + movdqa xmm_db80h, xmm_tmp4 +%else + %define xmm_db80h [db80h_256] + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] +%endif + mov i_dst_stride_less_width, r1 + mov i_dst_width, r2 + mov i_dst_height, r3 + mov p_src, r4 + mov i_src_stride, r5 + movd i_scaleyd, xmm0 + pxor xmm_tmp0, xmm_tmp0 + movdqa xmm_0, xmm_tmp0 +%else + %define p_dst r0 + %define i_dst_stride_less_width r1 + %define i_dst_width r2 + %define i_dst_height r3 + %define p_src r4 + %define i_src_stride r5 + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd dword arg8d + %define i_xpos r12 + %define i_ypos r13 + %define i_yposd r13d + %define p_src_row0 rbp +%ifdef WIN64 + %define p_src_row1 rsi + %define i_width_cnt rdi +%else + %define p_src_row1 r11 + %define i_width_cnt rax +%endif + %define r_tmp0 rbx + %define r_tmp0b bl + %define xmm_0 xmm0 + %define xmm_xpos_frac xmm1 + %define xmm_xpos_frac_inc xmm8 + %define xmm_xpos_int xmm3 + %define xmm_xpos_int_inc xmm10 + %define xmm_yfrac0 xmm11 + %define xmm_yfrac1 xmm12 + %define xmm_tmp0 xmm7 + %define xmm_tmp1 xmm2 + %define xmm_tmp2 xmm9 + %define xmm_tmp3 xmm4 + %define xmm_tmp4 xmm5 + %define xmm_tmp5 xmm6 + %define xmm_xpos_int_begin xmm14 + %define xmm_xpos_frac_begin xmm15 + %define xmm_db80h [db80h_256] + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] + pxor xmm_0, xmm_0 +%endif + + sub i_dst_height, 1 + je .final_row + jl .done + + mov i_ypos, 1 << 14 + movd xmm_xpos_frac, i_scalexd + pshufd xmm_xpos_frac, xmm_xpos_frac, 0 + movdqa xmm_tmp0, xmm_xpos_frac + pslld xmm_tmp0, 2 + pslldq xmm_xpos_frac, 4 + paddd xmm_tmp0, xmm_xpos_frac + movdqa xmm_tmp1, xmm_xpos_frac + pslldq xmm_tmp1, 4 + paddd xmm_xpos_frac, xmm_tmp1 + paddd xmm_tmp0, xmm_tmp1 + pslldq xmm_tmp1, 4 + paddd xmm_xpos_frac, xmm_tmp1 + paddd xmm_tmp0, xmm_tmp1 + pcmpeqw xmm_tmp1, xmm_tmp1 + psrld xmm_tmp1, 31 + pslld xmm_tmp1, 15 + paddd xmm_xpos_frac, xmm_tmp1 + paddd xmm_tmp0, xmm_tmp1 + movdqa xmm_xpos_int, xmm_xpos_frac + movdqa xmm_tmp1, xmm_tmp0 + psrld xmm_xpos_int, 16 + psrld xmm_tmp1, 16 + packssdw xmm_xpos_int, xmm_tmp1 + packuswb xmm_xpos_int, xmm_xpos_int + movdqa xmm_tmp1, xmm_xpos_int + pcmpeqw xmm_tmp2, xmm_tmp2 + psubb xmm_tmp1, xmm_tmp2 + punpcklbw xmm_xpos_int, xmm_tmp1 + pslld xmm_xpos_frac, 16 + pslld xmm_tmp0, 16 + psrad xmm_xpos_frac, 16 + psrad xmm_tmp0, 16 + packssdw xmm_xpos_frac, xmm_tmp0 + movd xmm_tmp0, i_scalexd + pslld xmm_tmp0, 3 + movdqa xmm_tmp1, xmm_tmp0 + punpcklwd xmm_tmp0, xmm_tmp0 + pshufd xmm_tmp0, xmm_tmp0, 0 + movdqa xmm_xpos_frac_inc, xmm_tmp0 + psrld xmm_tmp1, 16 + psubw xmm_tmp1, xmm_tmp2 + pxor xmm_tmp2, xmm_tmp2 + pshufb xmm_tmp1, xmm_tmp2 + movdqa xmm_xpos_int_inc, xmm_tmp1 + movdqa xmm_xpos_int_begin, xmm_xpos_int + movdqa xmm_xpos_frac_begin, xmm_xpos_frac + + cmp i_scalex, 4 << 16 + ja .scalex_above4 + cmp i_scalex, 2 << 16 + ja .scalex_above2_beloweq4 + SSE2_GeneralBilinearDownsampler_loop SSSE3_BilinearFastDownsample2xOrLess_8px, 1 + jmp .final_row +%ifdef X86_32 + %undef xmm_yfrac0 + %xdefine xmm_yfrac0 xmm_tmp5 + %undef xmm_tmp5 +%endif +.scalex_above2_beloweq4: + SSE2_GeneralBilinearDownsampler_loop SSSE3_BilinearFastDownsample4xOrLess_8px, 1 + jmp .final_row +.scalex_above4: +%xdefine xmm_xfrac0 xmm_xpos_frac +%xdefine xmm_xfrac1 xmm_xpos_int +%xdefine xmm_xfrac0_begin xmm_xpos_int_begin +%xdefine xmm_xfrac1_begin xmm_xpos_frac_begin +%xdefine xmm_xfrac_inc xmm_xpos_frac_inc +%undef xmm_xpos_int +%undef xmm_xpos_frac +%undef xmm_xpos_int_begin +%undef xmm_xpos_frac_begin +%undef xmm_xpos_int_inc +%undef xmm_xpos_frac_inc + SSE2_UnpckXFracuw xmm_tmp0, xmm_xfrac1, xmm_xfrac0 + movdqa xmm_xfrac0, xmm_tmp0 + movdqa xmm_xfrac0_begin, xmm_xfrac0 + movdqa xmm_xfrac1_begin, xmm_xfrac1 + pcmpeqw xmm_tmp0, xmm_tmp0 + pmullw xmm_tmp0, xmm_xfrac_inc + punpcklwd xmm_tmp0, xmm_xfrac_inc + movdqa xmm_xfrac_inc, xmm_tmp0 + SSE2_GeneralBilinearDownsampler_loop SSE2_GeneralBilinearFastDownsample_8px, 1 + +.final_row: + mov p_src_row0, i_ypos + shr p_src_row0, 15 + imul p_src_row0, i_src_stride + add p_src_row0, p_src + mov i_xpos, 1 << 15 + mov i_width_cnt, i_dst_width + +.final_row_width: + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movzx r_tmp0, byte [p_src_row0 + r_tmp0] + mov [p_dst], r_tmp0b + add p_dst, 1 + add i_xpos, i_scalex + sub i_width_cnt, 1 + jg .final_row_width + +.done: +%ifdef X86_32 + mov esp, [esp] +%endif + POP_XMM + LOAD_7_PARA_POP +%ifndef X86_32 +%ifdef WIN64 + pop rsi + pop rdi +%endif + pop rbp + pop rbx + pop r13 + pop r12 +%endif + ret +%undef p_dst +%undef i_dst_stride_less_width +%undef i_dst_width +%undef i_dst_height +%undef p_src +%undef i_src_stride +%undef i_scalex +%undef i_scalexd +%undef i_scaleyd +%undef i_xpos +%undef i_ypos +%undef i_yposd +%undef p_src_row0 +%undef p_src_row1 +%undef i_width_cnt +%undef r_tmp0 +%undef r_tmp0b +%undef xmm_0 +%undef xmm_xpos_frac +%undef xmm_xpos_frac_inc +%undef xmm_xpos_int +%undef xmm_xpos_int_inc +%undef xmm_yfrac0 +%undef xmm_yfrac1 +%undef xmm_tmp0 +%undef xmm_tmp1 +%undef xmm_tmp2 +%undef xmm_tmp3 +%undef xmm_tmp4 +%undef xmm_tmp5 +%undef xmm_xpos_int_begin +%undef xmm_xpos_frac_begin +%undef xmm_xfrac0 +%undef xmm_xfrac1 +%undef xmm_xfrac0_begin +%undef xmm_xfrac1_begin +%undef xmm_xfrac_inc +%undef xmm_db80h +%undef xmm_shufb_0000000088888888 + +;************************************************************************************************************** +;void GeneralBilinearAccurateDownsampler_sse41 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, +; int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, +; uint32_t uiScaleY); +; +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearAccurateDownsampler_sse41 + %assign push_num 0 +%ifndef X86_32 + push r12 + push r13 + push rbx + push rbp + %assign push_num 4 +%ifdef WIN64 + push rdi + push rsi + %assign push_num push_num + 2 +%endif +%endif + LOAD_7_PARA + PUSH_XMM 16 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + ZERO_EXTENSION r6d + sub r1, r2 ; dst_stride - dst_width + add r6, r6 ; 2 * scalex +%ifdef X86_32 + movd xmm0, arg8 + movd xmm1, esp + and esp, -16 +%ifdef X86_32_PICASM + sub esp, 8 * 4 + 10 * 16 +%else + sub esp, 8 * 4 + 8 * 16 +%endif + movd [esp], xmm1 + %define p_dst r0 + %define i_dst_stride_less_width [esp + 1 * 4] + %define i_dst_width [esp + 2 * 4] + %define i_dst_height dword [esp + 3 * 4] + %define p_src [esp + 4 * 4] + %define i_src_stride [esp + 5 * 4] + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd [esp + 6 * 4] + %define i_xpos r2 + %define i_ypos dword [esp + 7 * 4] + %define i_yposd dword [esp + 7 * 4] + %define p_src_row0 r3 + %define p_src_row1 r4 + %define i_width_cnt r5 + %define r_tmp0 r1 + %define r_tmp0b r1b + %define xmm_xpos_frac xmm1 + %define xmm_xpos_frac_inc [esp + 8 * 4] + %define xmm_xpos_int xmm3 + %define xmm_xpos_int_inc [esp + 8 * 4 + 1 * 16] + %define xmm_yfrac0 [esp + 8 * 4 + 2 * 16] + %define xmm_yfrac1 [esp + 8 * 4 + 3 * 16] + %define xmm_tmp0 xmm7 + %define xmm_tmp1 xmm0 + %define xmm_tmp2 xmm2 + %define xmm_tmp3 xmm4 + %define xmm_tmp4 xmm5 + %define xmm_tmp5 xmm6 + %define xmm_0 [esp + 8 * 4 + 4 * 16] + %define xmm_7fff [esp + 8 * 4 + 5 * 16] + %define xmm_xpos_int_begin [esp + 8 * 4 + 6 * 16] + %define xmm_xpos_frac_begin [esp + 8 * 4 + 7 * 16] +%ifdef X86_32_PICASM + %define xmm_db80h [esp + 8 * 4 + 8 * 16] + %define xmm_shufb_0000000088888888 [esp + 8 * 4 + 9 * 16] + pxor xmm_tmp4, xmm_tmp4 + pcmpeqb xmm_tmp5, xmm_tmp5 + psubb xmm_tmp4, xmm_tmp5 + movdqa xmm_tmp3, xmm_tmp4 + psllw xmm_tmp3, 3 + pslldq xmm_tmp3, 8 + movdqa xmm_shufb_0000000088888888, xmm_tmp3 + psllw xmm_tmp4, 7 + movdqa xmm_db80h, xmm_tmp4 +%else + %define xmm_db80h [db80h_256] + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] +%endif + mov i_dst_stride_less_width, r1 + mov i_dst_width, r2 + mov i_dst_height, r3 + mov p_src, r4 + mov i_src_stride, r5 + movd i_scaleyd, xmm0 + pxor xmm_tmp5, xmm_tmp5 + movdqa xmm_0, xmm_tmp5 + pcmpeqw xmm_tmp5, xmm_tmp5 + psrlw xmm_tmp5, 1 + movdqa xmm_7fff, xmm_tmp5 +%else + %define p_dst r0 + %define i_dst_stride_less_width r1 + %define i_dst_width r2 + %define i_dst_height r3 + %define p_src r4 + %define i_src_stride r5 + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd dword arg8d + %define i_xpos r12 + %define i_ypos r13 + %define i_yposd r13d + %define p_src_row0 rbp +%ifdef WIN64 + %define p_src_row1 rsi + %define i_width_cnt rdi +%else + %define p_src_row1 r11 + %define i_width_cnt rax +%endif + %define r_tmp0 rbx + %define r_tmp0b bl + %define xmm_0 xmm0 + %define xmm_xpos_frac xmm1 + %define xmm_xpos_frac_inc xmm8 + %define xmm_xpos_int xmm3 + %define xmm_xpos_int_inc xmm10 + %define xmm_yfrac0 xmm11 + %define xmm_yfrac1 xmm12 + %define xmm_tmp0 xmm7 + %define xmm_tmp1 xmm2 + %define xmm_tmp2 xmm9 + %define xmm_tmp3 xmm4 + %define xmm_tmp4 xmm5 + %define xmm_tmp5 xmm6 + %define xmm_7fff xmm13 + %define xmm_xpos_int_begin xmm14 + %define xmm_xpos_frac_begin xmm15 + %define xmm_db80h [db80h_256] + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] + pxor xmm_0, xmm_0 + pcmpeqw xmm_7fff, xmm_7fff + psrlw xmm_7fff, 1 +%endif + + sub i_dst_height, 1 + je .final_row + jl .done + + mov i_ypos, 1 << 14 + movd xmm_xpos_frac, i_scalexd + pshufd xmm_xpos_frac, xmm_xpos_frac, 0 + movdqa xmm_tmp0, xmm_xpos_frac + pslld xmm_tmp0, 2 + pslldq xmm_xpos_frac, 4 + paddd xmm_tmp0, xmm_xpos_frac + movdqa xmm_tmp1, xmm_xpos_frac + pslldq xmm_tmp1, 4 + paddd xmm_xpos_frac, xmm_tmp1 + paddd xmm_tmp0, xmm_tmp1 + pslldq xmm_tmp1, 4 + paddd xmm_xpos_frac, xmm_tmp1 + paddd xmm_tmp0, xmm_tmp1 + pcmpeqw xmm_tmp1, xmm_tmp1 + psrld xmm_tmp1, 31 + pslld xmm_tmp1, 15 + paddd xmm_xpos_frac, xmm_tmp1 + paddd xmm_tmp0, xmm_tmp1 + movdqa xmm_xpos_int, xmm_xpos_frac + movdqa xmm_tmp1, xmm_tmp0 + psrld xmm_xpos_int, 16 + psrld xmm_tmp1, 16 + packssdw xmm_xpos_int, xmm_tmp1 + packuswb xmm_xpos_int, xmm_xpos_int + movdqa xmm_tmp1, xmm_xpos_int + pcmpeqw xmm_tmp2, xmm_tmp2 + psubb xmm_tmp1, xmm_tmp2 + punpcklbw xmm_xpos_int, xmm_tmp1 + pslld xmm_xpos_frac, 16 + pslld xmm_tmp0, 16 + psrad xmm_xpos_frac, 16 + psrad xmm_tmp0, 16 + packssdw xmm_xpos_frac, xmm_tmp0 + psrlw xmm_xpos_frac, 1 + movd xmm_tmp0, i_scalexd + pslld xmm_tmp0, 3 + movdqa xmm_tmp1, xmm_tmp0 + punpcklwd xmm_tmp0, xmm_tmp0 + pshufd xmm_tmp0, xmm_tmp0, 0 + psrlw xmm_tmp0, 1 + movdqa xmm_xpos_frac_inc, xmm_tmp0 + psrld xmm_tmp1, 16 + pxor xmm_tmp2, xmm_tmp2 + pshufb xmm_tmp1, xmm_tmp2 + movdqa xmm_xpos_int_inc, xmm_tmp1 + movdqa xmm_xpos_int_begin, xmm_xpos_int + movdqa xmm_xpos_frac_begin, xmm_xpos_frac + + cmp i_scalex, 4 << 16 + ja .scalex_above4 + cmp i_scalex, 2 << 16 + ja .scalex_above2_beloweq4 + SSE2_GeneralBilinearDownsampler_loop SSE41_BilinearAccurateDownsample2xOrLess_8px, 0 + jmp .final_row +%ifdef X86_32 + %undef xmm_yfrac0 + %xdefine xmm_yfrac0 xmm_tmp5 + %undef xmm_tmp5 +%endif +.scalex_above2_beloweq4: + SSE2_GeneralBilinearDownsampler_loop SSE41_BilinearAccurateDownsample4xOrLess_8px, 0 + jmp .final_row +.scalex_above4: +%xdefine xmm_xfrac0 xmm_xpos_frac +%xdefine xmm_xfrac1 xmm_xpos_int +%xdefine xmm_xfrac0_begin xmm_xpos_int_begin +%xdefine xmm_xfrac1_begin xmm_xpos_frac_begin +%xdefine xmm_xfrac_inc xmm_xpos_frac_inc +%undef xmm_xpos_int +%undef xmm_xpos_frac +%undef xmm_xpos_int_begin +%undef xmm_xpos_frac_begin +%undef xmm_xpos_int_inc +%undef xmm_xpos_frac_inc + SSE2_UnpckXFracw xmm_tmp0, xmm_xfrac1, xmm_xfrac0, xmm_7fff + movdqa xmm_xfrac0, xmm_tmp0 + movdqa xmm_xfrac0_begin, xmm_xfrac0 + movdqa xmm_xfrac1_begin, xmm_xfrac1 + pcmpeqw xmm_tmp0, xmm_tmp0 + pmullw xmm_tmp0, xmm_xfrac_inc + punpcklwd xmm_tmp0, xmm_xfrac_inc + movdqa xmm_xfrac_inc, xmm_tmp0 + SSE2_GeneralBilinearDownsampler_loop SSE41_GeneralBilinearAccurateDownsample_8px, 0 + +.final_row: + mov p_src_row0, i_ypos + shr p_src_row0, 15 + imul p_src_row0, i_src_stride + add p_src_row0, p_src + mov i_xpos, 1 << 15 + mov i_width_cnt, i_dst_width + +.final_row_width: + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movzx r_tmp0, byte [p_src_row0 + r_tmp0] + mov [p_dst], r_tmp0b + add p_dst, 1 + add i_xpos, i_scalex + sub i_width_cnt, 1 + jg .final_row_width + +.done: +%ifdef X86_32 + mov esp, [esp] +%endif + POP_XMM + LOAD_7_PARA_POP +%ifndef X86_32 +%ifdef WIN64 + pop rsi + pop rdi +%endif + pop rbp + pop rbx + pop r13 + pop r12 +%endif + ret +%undef p_dst +%undef i_dst_stride_less_width +%undef i_dst_width +%undef i_dst_height +%undef p_src +%undef i_src_stride +%undef i_scalex +%undef i_scalexd +%undef i_scaleyd +%undef i_xpos +%undef i_ypos +%undef i_yposd +%undef p_src_row0 +%undef p_src_row1 +%undef i_width_cnt +%undef r_tmp0 +%undef r_tmp0b +%undef xmm_0 +%undef xmm_xpos_frac +%undef xmm_xpos_frac_inc +%undef xmm_xpos_int +%undef xmm_xpos_int_inc +%undef xmm_yfrac0 +%undef xmm_yfrac1 +%undef xmm_tmp0 +%undef xmm_tmp1 +%undef xmm_tmp2 +%undef xmm_tmp3 +%undef xmm_tmp4 +%undef xmm_tmp5 +%undef xmm_7fff +%undef xmm_xpos_int_begin +%undef xmm_xpos_frac_begin +%undef xmm_xfrac0 +%undef xmm_xfrac1 +%undef xmm_xfrac0_begin +%undef xmm_xfrac1_begin +%undef xmm_xfrac_inc +%undef xmm_db80h +%undef xmm_shufb_0000000088888888 + +%ifdef HAVE_AVX2 +; xpos_int=%1 xpos_frac=%2 inc_int+1=%3 inc_frac=%4 tmp=%5 +%macro AVX2_BilinearIncXposuw 5 + vpaddusw %5, %2, %4 + vpaddw %2, %2, %4 + vpcmpeqw %5, %5, %2 + vpaddb %1, %1, %3 + vpaddb %1, %1, %5 ; subtract 1 if no carry +%endmacro + +; outl=%1 outh=%2 in=%3 FFFFh/7FFFh=%4 +%macro AVX2_UnpckXFrac 4 + vpxor %1, %3, %4 + vpunpckhwd %2, %1, %3 + vpunpcklwd %1, %1, %3 +%endmacro + +; out0=%1 out1=%2 xfrac=%3 yfrac0=%4 yfrac1=%5 +%macro AVX2_BilinearFastCalcXYFrac 5 + vpmulhuw %2, %3, %5 + vpmulhuw %1, %3, %4 +%endmacro + +; [in:dwordsl out:bytes] dwordsh=%2 zero=%3 +%macro AVX2_BilinearFastPackDwordsToBytes 3 + vpsrld %1, %1, 14 + vpsrld %2, %2, 14 + vpackssdw %1, %1, %2 + vpavgw %1, %1, %3 + vpackuswb %1, %1, %1 +%endmacro + +%macro AVX2_BilinearFastDownsample2xOrLess_16px 0 + vpshufb ymm_tmp0, ymm_xpos_int, ymm_0 + vpsubb ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + AVX2_UnpckXFrac ymm_tmp0, ymm_tmp1, ymm_xpos_frac, ymm_ffff + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + lea i_xpos, [i_xpos + 8 * i_scalex2] + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp5, ymm_tmp5, [p_src_row1 + r_tmp0], 1 + vpshufb ymm_tmp4, ymm_tmp4, ymm_xpos_int + vpshufb ymm_tmp5, ymm_tmp5, ymm_xpos_int + AVX2_BilinearFastCalcXYFrac ymm_tmp0, ymm_tmp2, ymm_tmp0, ymm_yfrac0, ymm_yfrac1 + vpunpcklbw ymm_tmp3, ymm_tmp4, ymm_0 + vpmaddwd ymm_tmp0, ymm_tmp0, ymm_tmp3 + vpunpcklbw ymm_tmp3, ymm_tmp5, ymm_0 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp3 + vpaddd ymm_tmp0, ymm_tmp0, ymm_tmp2 + AVX2_BilinearFastCalcXYFrac ymm_tmp1, ymm_tmp3, ymm_tmp1, ymm_yfrac0, ymm_yfrac1 + vpunpckhbw ymm_tmp2, ymm_tmp4, ymm_0 + vpmaddwd ymm_tmp1, ymm_tmp1, ymm_tmp2 + vpunpckhbw ymm_tmp2, ymm_tmp5, ymm_0 + vpmaddwd ymm_tmp3, ymm_tmp3, ymm_tmp2 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp3 + AVX2_BilinearFastPackDwordsToBytes ymm_tmp0, ymm_tmp1, ymm_0 + vmovlps [p_dst], xmm_tmp0 + vextracti128 [p_dst + 8], ymm_tmp0, 1 + add p_dst, 16 + AVX2_BilinearIncXposuw ymm_xpos_int, ymm_xpos_frac, ymm_xpos_int_inc, ymm_xpos_frac_inc, ymm_tmp0 +%endmacro + +%macro AVX2_BilinearFastDownsample4xOrLess_16px 0 + vbroadcasti128 ymm_tmp0, xmm_shufb_0000000088888888 + vpshufb ymm_tmp0, ymm_xpos_int, ymm_tmp0 + vpsubb ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + AVX2_UnpckXFrac ymm_tmp0, ymm_tmp1, ymm_xpos_frac, ymm_ffff + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp3, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp3, ymm_tmp3, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex2] + lea i_xpos, [r_tmp0 + 4 * i_scalex2] + shr r_tmp0, 16 + vpunpcklbw ymm_tmp2, ymm_xpos_int, ymm_ffff + vpshufb ymm_tmp4, ymm_tmp4, ymm_tmp2 + vpshufb ymm_tmp3, ymm_tmp3, ymm_tmp2 + AVX2_BilinearFastCalcXYFrac ymm_tmp0, ymm_tmp2, ymm_tmp0, ymm_yfrac0, ymm_yfrac1 + vpmaddwd ymm_tmp0, ymm_tmp0, ymm_tmp4 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp3 + vpaddd ymm_tmp0, ymm_tmp0, ymm_tmp2 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp3, [p_src_row1 + r_tmp0] + mov r_tmp0, i_xpos + lea i_xpos, [i_xpos + 2 * i_scalex2] + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp3, ymm_tmp3, [p_src_row1 + r_tmp0], 1 + vpunpckhbw ymm_tmp2, ymm_xpos_int, ymm_ffff + vpshufb ymm_tmp4, ymm_tmp4, ymm_tmp2 + vpshufb ymm_tmp3, ymm_tmp3, ymm_tmp2 + AVX2_BilinearFastCalcXYFrac ymm_tmp1, ymm_tmp2, ymm_tmp1, ymm_yfrac0, ymm_yfrac1 + vpmaddwd ymm_tmp1, ymm_tmp1, ymm_tmp4 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp3 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp2 + AVX2_BilinearFastPackDwordsToBytes ymm_tmp0, ymm_tmp1, ymm_0 + vmovlps [p_dst], xmm_tmp0 + vextracti128 [p_dst + 8], ymm_tmp0, 1 + add p_dst, 16 + AVX2_BilinearIncXposuw ymm_xpos_int, ymm_xpos_frac, ymm_xpos_int_inc, ymm_xpos_frac_inc, ymm_tmp0 +%endmacro + +%macro AVX2_BilinearFastDownsample8xOrLess_16px 0 + vbroadcasti128 ymm_tmp0, xmm_shufb_000044448888CCCC + vpshufb ymm_tmp0, ymm_xpos_int, ymm_tmp0 + vpsubb ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp5, ymm_tmp5, [p_src_row1 + r_tmp0], 1 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp0, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp1, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp0, ymm_tmp0, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp1, ymm_tmp1, [p_src_row1 + r_tmp0], 1 + vpunpcklbw ymm_tmp3, ymm_xpos_int, ymm_ffff + vpshufb ymm_tmp4, ymm_tmp4, ymm_tmp3 + vpshufb ymm_tmp5, ymm_tmp5, ymm_tmp3 + vpshufb ymm_tmp0, ymm_tmp0, ymm_tmp3 + vpshufb ymm_tmp1, ymm_tmp1, ymm_tmp3 + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp0, 11001100b + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp1, 11001100b + AVX2_UnpckXFrac ymm_tmp0, ymm_tmp1, ymm_xpos_frac, ymm_ffff + AVX2_BilinearFastCalcXYFrac ymm_tmp0, ymm_tmp2, ymm_tmp0, ymm_yfrac0, ymm_yfrac1 + vpmaddwd ymm_tmp0, ymm_tmp0, ymm_tmp4 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp5 + vpaddd ymm_tmp0, ymm_tmp0, ymm_tmp2 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp5, ymm_tmp5, [p_src_row1 + r_tmp0], 1 + mov r_tmp0, i_xpos + lea i_xpos, [i_xpos + 4 * i_scalex2] + shr r_tmp0, 16 + vmovdqu xmm_tmp2, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp3, [p_src_row1 + r_tmp0] + mov r_tmp0, i_xpos + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp2, ymm_tmp2, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp3, ymm_tmp3, [p_src_row1 + r_tmp0], 1 + vpshufb ymm_tmp4, ymm_tmp4, ymm_xpos_int + vpshufb ymm_tmp5, ymm_tmp5, ymm_xpos_int + vpshufb ymm_tmp2, ymm_tmp2, ymm_xpos_int + vpshufb ymm_tmp3, ymm_tmp3, ymm_xpos_int + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp2, 10001000b + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp3, 10001000b + vpunpckhbw ymm_tmp4, ymm_tmp4, ymm_0 + vpunpckhbw ymm_tmp5, ymm_tmp5, ymm_0 + AVX2_BilinearFastCalcXYFrac ymm_tmp1, ymm_tmp3, ymm_tmp1, ymm_yfrac0, ymm_yfrac1 + vpmaddwd ymm_tmp1, ymm_tmp1, ymm_tmp4 + vpmaddwd ymm_tmp3, ymm_tmp3, ymm_tmp5 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp3 + AVX2_BilinearFastPackDwordsToBytes ymm_tmp0, ymm_tmp1, ymm_0 + vmovlps [p_dst], xmm_tmp0 + vextracti128 [p_dst + 8], ymm_tmp0, 1 + add p_dst, 16 + AVX2_BilinearIncXposuw ymm_xpos_int, ymm_xpos_frac, ymm_xpos_int_inc, ymm_xpos_frac_inc, ymm_tmp0 +%endmacro + +%macro AVX2_GeneralBilinearFastDownsample_16px 0 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vpbroadcastd ymm_tmp4, [p_src_row0 + r_tmp0] + vpbroadcastd ymm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 1 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpunpcklwd ymm_tmp4, ymm_tmp4, ymm_tmp0 + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpunpcklwd ymm_tmp5, ymm_tmp5, ymm_tmp0 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp0, 00100010b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp0, 00100010b + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0 - 2] + vpblendw ymm_tmp4, ymm_tmp4, ymm_tmp0, 1000b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0 - 2] + vpblendw ymm_tmp5, ymm_tmp5, ymm_tmp0, 1000b + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vpbroadcastd ymm_tmp2, [p_src_row0 + r_tmp0] + vpbroadcastd ymm_tmp3, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 1 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpunpcklwd ymm_tmp2, ymm_tmp2, ymm_tmp0 + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpunpcklwd ymm_tmp3, ymm_tmp3, ymm_tmp0 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpblendd ymm_tmp2, ymm_tmp2, ymm_tmp0, 00100010b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpblendd ymm_tmp3, ymm_tmp3, ymm_tmp0, 00100010b + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0 - 2] + vpblendw ymm_tmp2, ymm_tmp2, ymm_tmp0, 1000b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0 - 2] + vpblendw ymm_tmp3, ymm_tmp3, ymm_tmp0, 1000b + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovd xmm_tmp0, [p_src_row0 + r_tmp0] + vmovd xmm_tmp1, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 1 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 2 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 3 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 3 + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp0, 00001111b + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp1, 00001111b + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovd xmm_tmp0, [p_src_row0 + r_tmp0] + vmovd xmm_tmp1, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 1 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 2 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 3 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 3 + vpblendd ymm_tmp2, ymm_tmp2, ymm_tmp0, 00001111b + vpblendd ymm_tmp3, ymm_tmp3, ymm_tmp1, 00001111b + vpunpcklbw ymm_tmp4, ymm_tmp4, ymm_0 + vpunpcklbw ymm_tmp5, ymm_tmp5, ymm_0 + AVX2_BilinearFastCalcXYFrac ymm_tmp0, ymm_tmp1, ymm_xfrac0, ymm_yfrac0, ymm_yfrac1 + vpmaddwd ymm_tmp0, ymm_tmp0, ymm_tmp4 + vpmaddwd ymm_tmp1, ymm_tmp1, ymm_tmp5 + vpaddd ymm_tmp0, ymm_tmp0, ymm_tmp1 + vpunpcklbw ymm_tmp4, ymm_tmp2, ymm_0 + vpunpcklbw ymm_tmp5, ymm_tmp3, ymm_0 + AVX2_BilinearFastCalcXYFrac ymm_tmp1, ymm_tmp2, ymm_xfrac1, ymm_yfrac0, ymm_yfrac1 + vpmaddwd ymm_tmp1, ymm_tmp1, ymm_tmp4 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp5 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp2 + AVX2_BilinearFastPackDwordsToBytes ymm_tmp0, ymm_tmp1, ymm_0 + vpermq ymm_tmp0, ymm_tmp0, 0010b + vmovdqu [p_dst], xmm_tmp0 + add p_dst, 16 + vpaddw ymm_xfrac0, ymm_xfrac0, ymm_xfrac_inc + vpaddw ymm_xfrac1, ymm_xfrac1, ymm_xfrac_inc +%endmacro + +; xpos_int=%1 xpos_frac=%2 inc_int=%3 inc_frac=%4 7FFFh=%5 tmp=%6,%7 +%macro AVX2_BilinearIncXposw 7 + vpaddb %1, %1, %3 + vpaddw %6, %2, %4 + vpcmpgtw %7, %2, %6 + vpsubb %1, %1, %7 ; add carry + vpand %2, %6, %5 +%endmacro + +; res>>29=%1 data0=%2 data1=%3 frac0=%4 frac1=%5 tmp=%6 +%macro AVX2_LinearAccurateInterpolateVerticalDwords 6 + vpshufd %1, %2, 10110001b + vpshufd %6, %3, 10110001b + vpmuludq %1, %1, %4 + vpmuludq %6, %6, %5 + vpaddq %1, %1, %6 + vpmuludq %2, %2, %4 + vpmuludq %3, %3, %5 + vpaddq %2, %2, %3 + vpsllq %1, %1, 3 + vpsrlq %2, %2, 29 + vpblendd %1, %1, %2, 01010101b +%endmacro + +%macro AVX2_BilinearAccurateDownsample2xOrLess_16px 0 + vpshufb ymm_tmp0, ymm_xpos_int, ymm_0 + vpsubb ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + AVX2_UnpckXFrac ymm_tmp0, ymm_tmp1, ymm_xpos_frac, ymm_7fff + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + lea i_xpos, [i_xpos + 8 * i_scalex2] + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp5, ymm_tmp5, [p_src_row1 + r_tmp0], 1 + vpshufb ymm_tmp4, ymm_tmp4, ymm_xpos_int + vpshufb ymm_tmp5, ymm_tmp5, ymm_xpos_int + vpunpcklbw ymm_tmp2, ymm_tmp4, ymm_0 + vpunpcklbw ymm_tmp3, ymm_tmp5, ymm_0 + vpunpckhbw ymm_tmp4, ymm_tmp4, ymm_0 + vpunpckhbw ymm_tmp5, ymm_tmp5, ymm_0 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp0 + vpmaddwd ymm_tmp3, ymm_tmp3, ymm_tmp0 + vpmaddwd ymm_tmp4, ymm_tmp4, ymm_tmp1 + vpmaddwd ymm_tmp5, ymm_tmp5, ymm_tmp1 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp0, ymm_tmp2, ymm_tmp3, ymm_yfrac0, ymm_yfrac1, ymm_tmp1 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp1, ymm_tmp4, ymm_tmp5, ymm_yfrac0, ymm_yfrac1, ymm_tmp2 + vpackssdw ymm_tmp0, ymm_tmp0, ymm_tmp1 + vpavgw ymm_tmp0, ymm_tmp0, ymm_0 + vpackuswb ymm_tmp0, ymm_tmp0, ymm_tmp0 + vmovlps [p_dst], xmm_tmp0 + vextracti128 [p_dst + 8], ymm_tmp0, 1 + add p_dst, 16 + AVX2_BilinearIncXposw ymm_xpos_int, ymm_xpos_frac, ymm_xpos_int_inc, ymm_xpos_frac_inc, ymm_7fff, ymm_tmp0, ymm_tmp1 +%endmacro + +%macro AVX2_BilinearAccurateDownsample4xOrLess_16px 0 + vbroadcasti128 ymm_tmp0, xmm_shufb_0000000088888888 + vpshufb ymm_tmp0, ymm_xpos_int, ymm_tmp0 + vpsubb ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + AVX2_UnpckXFrac ymm_tmp0, ymm_tmp1, ymm_xpos_frac, ymm_7fff + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp2, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp2, ymm_tmp2, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex2] + lea i_xpos, [r_tmp0 + 4 * i_scalex2] + shr r_tmp0, 16 + vpunpcklbw ymm_tmp3, ymm_xpos_int, ymm_db80h + vpshufb ymm_tmp4, ymm_tmp4, ymm_tmp3 + vpshufb ymm_tmp2, ymm_tmp2, ymm_tmp3 + vpmaddwd ymm_tmp4, ymm_tmp4, ymm_tmp0 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp0 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp0, ymm_tmp4, ymm_tmp2, ymm_yfrac0, ymm_yfrac1, ymm_tmp3 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp2, [p_src_row1 + r_tmp0] + mov r_tmp0, i_xpos + lea i_xpos, [i_xpos + 2 * i_scalex2] + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp2, ymm_tmp2, [p_src_row1 + r_tmp0], 1 + vpunpckhbw ymm_tmp3, ymm_xpos_int, ymm_db80h + vpshufb ymm_tmp4, ymm_tmp4, ymm_tmp3 + vpshufb ymm_tmp2, ymm_tmp2, ymm_tmp3 + vpmaddwd ymm_tmp4, ymm_tmp4, ymm_tmp1 + vpmaddwd ymm_tmp2, ymm_tmp2, ymm_tmp1 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp1, ymm_tmp4, ymm_tmp2, ymm_yfrac0, ymm_yfrac1, ymm_tmp3 + vpackssdw ymm_tmp0, ymm_tmp0, ymm_tmp1 + vpavgw ymm_tmp0, ymm_tmp0, ymm_0 + vpackuswb ymm_tmp0, ymm_tmp0, ymm_tmp0 + vmovlps [p_dst], xmm_tmp0 + vextracti128 [p_dst + 8], ymm_tmp0, 1 + add p_dst, 16 + AVX2_BilinearIncXposw ymm_xpos_int, ymm_xpos_frac, ymm_xpos_int_inc, ymm_xpos_frac_inc, ymm_7fff, ymm_tmp0, ymm_tmp1 +%endmacro + +%macro AVX2_BilinearAccurateDownsample8xOrLess_16px 0 + vbroadcasti128 ymm_tmp0, xmm_shufb_000044448888CCCC + vpshufb ymm_tmp0, ymm_xpos_int, ymm_tmp0 + vpsubb ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp5, ymm_tmp5, [p_src_row1 + r_tmp0], 1 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp0, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp1, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp0, ymm_tmp0, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp1, ymm_tmp1, [p_src_row1 + r_tmp0], 1 + vpunpcklbw ymm_tmp3, ymm_xpos_int, ymm_db80h + vpshufb ymm_tmp4, ymm_tmp4, ymm_tmp3 + vpshufb ymm_tmp5, ymm_tmp5, ymm_tmp3 + vpshufb ymm_tmp0, ymm_tmp0, ymm_tmp3 + vpshufb ymm_tmp1, ymm_tmp1, ymm_tmp3 + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp0, 11001100b + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp1, 11001100b + AVX2_UnpckXFrac ymm_tmp0, ymm_tmp1, ymm_xpos_frac, ymm_7fff + vpmaddwd ymm_tmp4, ymm_tmp4, ymm_tmp0 + vpmaddwd ymm_tmp5, ymm_tmp5, ymm_tmp0 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp0, ymm_tmp4, ymm_tmp5, ymm_yfrac0, ymm_yfrac1, ymm_tmp3 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovdqu xmm_tmp4, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 4 * i_scalex2] + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp4, ymm_tmp4, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp5, ymm_tmp5, [p_src_row1 + r_tmp0], 1 + mov r_tmp0, i_xpos + lea i_xpos, [i_xpos + 4 * i_scalex2] + shr r_tmp0, 16 + vmovdqu xmm_tmp2, [p_src_row0 + r_tmp0] + vmovdqu xmm_tmp3, [p_src_row1 + r_tmp0] + mov r_tmp0, i_xpos + add i_xpos, i_scalex2 + shr r_tmp0, 16 + vinserti128 ymm_tmp2, ymm_tmp2, [p_src_row0 + r_tmp0], 1 + vinserti128 ymm_tmp3, ymm_tmp3, [p_src_row1 + r_tmp0], 1 + vpshufb ymm_tmp4, ymm_tmp4, ymm_xpos_int + vpshufb ymm_tmp5, ymm_tmp5, ymm_xpos_int + vpshufb ymm_tmp2, ymm_tmp2, ymm_xpos_int + vpshufb ymm_tmp3, ymm_tmp3, ymm_xpos_int + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp2, 10001000b + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp3, 10001000b + vpunpckhbw ymm_tmp4, ymm_tmp4, ymm_0 + vpunpckhbw ymm_tmp5, ymm_tmp5, ymm_0 + vpmaddwd ymm_tmp4, ymm_tmp4, ymm_tmp1 + vpmaddwd ymm_tmp5, ymm_tmp5, ymm_tmp1 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp1, ymm_tmp4, ymm_tmp5, ymm_yfrac0, ymm_yfrac1, ymm_tmp3 + vpackssdw ymm_tmp0, ymm_tmp0, ymm_tmp1 + vpavgw ymm_tmp0, ymm_tmp0, ymm_0 + vpackuswb ymm_tmp0, ymm_tmp0, ymm_tmp0 + vmovlps [p_dst], xmm_tmp0 + vextracti128 [p_dst + 8], ymm_tmp0, 1 + add p_dst, 16 + AVX2_BilinearIncXposw ymm_xpos_int, ymm_xpos_frac, ymm_xpos_int_inc, ymm_xpos_frac_inc, ymm_7fff, ymm_tmp0, ymm_tmp1 +%endmacro + +%macro AVX2_GeneralBilinearAccurateDownsample_16px 0 + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vpbroadcastd ymm_tmp4, [p_src_row0 + r_tmp0] + vpbroadcastd ymm_tmp5, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 1 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpunpcklwd ymm_tmp4, ymm_tmp4, ymm_tmp0 + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpunpcklwd ymm_tmp5, ymm_tmp5, ymm_tmp0 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp0, 00100010b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp0, 00100010b + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0 - 2] + vpblendw ymm_tmp4, ymm_tmp4, ymm_tmp0, 1000b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0 - 2] + vpblendw ymm_tmp5, ymm_tmp5, ymm_tmp0, 1000b + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vpbroadcastd ymm_tmp2, [p_src_row0 + r_tmp0] + vpbroadcastd ymm_tmp3, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + 1 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpunpcklwd ymm_tmp2, ymm_tmp2, ymm_tmp0 + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpunpcklwd ymm_tmp3, ymm_tmp3, ymm_tmp0 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0] + vpblendd ymm_tmp2, ymm_tmp2, ymm_tmp0, 00100010b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0] + vpblendd ymm_tmp3, ymm_tmp3, ymm_tmp0, 00100010b + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpbroadcastd ymm_tmp0, [p_src_row0 + r_tmp0 - 2] + vpblendw ymm_tmp2, ymm_tmp2, ymm_tmp0, 1000b + vpbroadcastd ymm_tmp0, [p_src_row1 + r_tmp0 - 2] + vpblendw ymm_tmp3, ymm_tmp3, ymm_tmp0, 1000b + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovd xmm_tmp0, [p_src_row0 + r_tmp0] + vmovd xmm_tmp1, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 1 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 2 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 3 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 3 + vpblendd ymm_tmp4, ymm_tmp4, ymm_tmp0, 00001111b + vpblendd ymm_tmp5, ymm_tmp5, ymm_tmp1, 00001111b + mov r_tmp0, i_xpos + shr r_tmp0, 16 + vmovd xmm_tmp0, [p_src_row0 + r_tmp0] + vmovd xmm_tmp1, [p_src_row1 + r_tmp0] + lea r_tmp0, [i_xpos + i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 1 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 1 + lea r_tmp0, [i_xpos + 2 * i_scalex] + lea i_xpos, [i_xpos + 4 * i_scalex] + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 2 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 2 + mov r_tmp0, i_xpos + sub r_tmp0, i_scalex + shr r_tmp0, 16 + vpinsrw xmm_tmp0, [p_src_row0 + r_tmp0], 3 + vpinsrw xmm_tmp1, [p_src_row1 + r_tmp0], 3 + vpblendd ymm_tmp2, ymm_tmp2, ymm_tmp0, 00001111b + vpblendd ymm_tmp3, ymm_tmp3, ymm_tmp1, 00001111b + vpunpcklbw ymm_tmp4, ymm_tmp4, ymm_0 + vpunpcklbw ymm_tmp5, ymm_tmp5, ymm_0 + vpmaddwd ymm_tmp4, ymm_tmp4, ymm_xfrac0 + vpmaddwd ymm_tmp5, ymm_tmp5, ymm_xfrac0 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp0, ymm_tmp4, ymm_tmp5, ymm_yfrac0, ymm_yfrac1, ymm_tmp1 + vpunpcklbw ymm_tmp4, ymm_tmp2, ymm_0 + vpunpcklbw ymm_tmp5, ymm_tmp3, ymm_0 + vpmaddwd ymm_tmp4, ymm_tmp4, ymm_xfrac1 + vpmaddwd ymm_tmp5, ymm_tmp5, ymm_xfrac1 + AVX2_LinearAccurateInterpolateVerticalDwords ymm_tmp1, ymm_tmp4, ymm_tmp5, ymm_yfrac0, ymm_yfrac1, ymm_tmp2 + vpackssdw ymm_tmp0, ymm_tmp0, ymm_tmp1 + vpavgw ymm_tmp0, ymm_tmp0, ymm_0 + vpackuswb ymm_tmp0, ymm_tmp0, ymm_tmp0 + vextracti128 [p_dst], ymm_tmp0, 1 + vmovlps [p_dst + 8], xmm_tmp0 + add p_dst, 16 + vpaddw ymm_xfrac0, ymm_xfrac0, ymm_xfrac_inc + vpaddw ymm_xfrac1, ymm_xfrac1, ymm_xfrac_inc + vpand ymm_xfrac0, ymm_xfrac0, ymm_7fff + vpand ymm_xfrac1, ymm_xfrac1, ymm_7fff +%endmacro + +; downsample_16px_macro=%1 b_fast=%2 +%macro AVX2_GeneralBilinearDownsampler_loop 2 +%%height: + mov p_src_row0, i_ypos + shr p_src_row0, 15 + imul p_src_row0, i_src_stride + add p_src_row0, p_src + mov p_src_row1, p_src_row0 + add p_src_row1, i_src_stride +%ifdef X86_32 +%if %2 + vpbroadcastw ymm_tmp1, i_ypos + vpsllw ymm_tmp1, ymm_tmp1, 1 + vpsrlw ymm_tmp1, ymm_tmp1, 1 + vpcmpeqw ymm_tmp0, ymm_tmp0, ymm_tmp0 + vpsrlw ymm_tmp0, ymm_tmp0, 1 +%else + vpbroadcastd ymm_tmp1, i_ypos + vpslld ymm_tmp1, ymm_tmp1, 17 + vpsrld ymm_tmp1, ymm_tmp1, 17 + vpcmpeqw ymm_tmp0, ymm_tmp0, ymm_tmp0 + vpsrld ymm_tmp0, ymm_tmp0, 17 +%endif + vpxor ymm_tmp0, ymm_tmp0, ymm_tmp1 + vmovdqa ymm_yfrac0, ymm_tmp0 + vmovdqa ymm_yfrac1, ymm_tmp1 +%else + vmovd xmm_tmp0, i_yposd + vpbroadcastw ymm_yfrac1, xmm_tmp0 +%if %2 + vpsllw ymm_yfrac1, ymm_yfrac1, 1 + vpsrlw ymm_yfrac1, ymm_yfrac1, 1 + vpcmpeqw ymm_yfrac0, ymm_yfrac0, ymm_yfrac0 + vpsrlw ymm_yfrac0, ymm_yfrac0, 1 +%else + vpslld ymm_yfrac1, ymm_yfrac1, 17 + vpsrld ymm_yfrac1, ymm_yfrac1, 17 + vpcmpeqw ymm_yfrac0, ymm_yfrac0, ymm_yfrac0 + vpsrld ymm_yfrac0, ymm_yfrac0, 17 +%endif + vpxor ymm_yfrac0, ymm_yfrac0, ymm_yfrac1 +%endif + + mov i_xpos, 1 << 15 + mov i_width_cnt, i_dst_width + sub i_width_cnt, 1 + +%ifdef ymm_xpos_int + vmovdqa ymm_xpos_int, ymm_xpos_int_begin + vmovdqa ymm_xpos_frac, ymm_xpos_frac_begin +%else + vmovdqa ymm_xfrac0, ymm_xfrac0_begin + vmovdqa ymm_xfrac1, ymm_xfrac1_begin +%endif + +%%width: + %1 + sub i_width_cnt, 16 + jg %%width + + lea p_dst, [p_dst + i_width_cnt + 1] +%ifdef i_scalex2 + mov r_tmp0, i_scalex2 + shr r_tmp0, 1 + imul i_width_cnt, r_tmp0 +%else + imul i_width_cnt, i_scalex +%endif + add i_xpos, i_width_cnt + shr i_xpos, 16 + movzx r_tmp0, byte [p_src_row0 + i_xpos] + mov [p_dst - 1], r_tmp0b +%ifdef X86_32 + mov r_tmp0, i_scaleyd + add i_yposd, r_tmp0 +%else + add i_yposd, i_scaleyd +%endif + add p_dst, i_dst_stride_less_width + sub i_dst_height, 1 + jg %%height +%endmacro + +;************************************************************************************************************** +;void GeneralBilinearFastDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, +; int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, +; uint32_t uiScaleY); +; +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearFastDownsampler_avx2 + %assign push_num 0 +%ifndef X86_32 + push r12 + push r13 + push rbx + push rbp + %assign push_num 4 +%ifdef WIN64 + push rdi + push rsi + %assign push_num push_num + 2 +%endif +%endif + LOAD_7_PARA + PUSH_XMM 16 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + ZERO_EXTENSION r6d + sub r1, r2 ; dst_stride - dst_width +%ifdef X86_32 + vmovd xmm0, arg8 + vmovd xmm1, esp + and esp, -32 +%ifdef X86_32_PICASM + sub esp, 8 * 4 + 9 * 32 +%else + sub esp, 8 * 4 + 8 * 32 +%endif + vmovd [esp], xmm1 + %define p_dst r0 + %define i_dst_stride_less_width [esp + 1 * 4] + %define i_dst_width [esp + 2 * 4] + %define i_dst_height dword [esp + 3 * 4] + %define p_src [esp + 4 * 4] + %define i_src_stride [esp + 5 * 4] + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd [esp + 6 * 4] + %define i_xpos r2 + %define i_ypos [esp + 7 * 4] + %define i_yposd dword [esp + 7 * 4] + %define p_src_row0 r3 + %define p_src_row1 r4 + %define i_width_cnt r5 + %define r_tmp0 r1 + %define r_tmp0b r1b + %define ymm_xpos_frac ymm1 + %define ymm_xpos_frac_inc [esp + 8 * 4] + %define ymm_xpos_int ymm3 + %define ymm_xpos_int_inc [esp + 8 * 4 + 1 * 32] + %define ymm_yfrac0 [esp + 8 * 4 + 2 * 32] + %define ymm_yfrac1 [esp + 8 * 4 + 3 * 32] + %define xmm_tmp0 xmm7 + %define ymm_tmp0 ymm7 + %define xmm_tmp1 xmm0 + %define ymm_tmp1 ymm0 + %define xmm_tmp2 xmm2 + %define ymm_tmp2 ymm2 + %define xmm_tmp3 xmm4 + %define ymm_tmp3 ymm4 + %define xmm_tmp4 xmm5 + %define ymm_tmp4 ymm5 + %define xmm_tmp5 xmm6 + %define ymm_tmp5 ymm6 + %define ymm_0 [esp + 8 * 4 + 4 * 32] + %define ymm_ffff [esp + 8 * 4 + 5 * 32] + %define ymm_xpos_int_begin [esp + 8 * 4 + 6 * 32] + %define ymm_xpos_frac_begin [esp + 8 * 4 + 7 * 32] +%ifdef X86_32_PICASM + %define xmm_shufb_0000000088888888 [esp + 8 * 4 + 8 * 32] + %define xmm_shufb_000044448888CCCC [esp + 8 * 4 + 8 * 32 + 16] + vpxor ymm_tmp4, ymm_tmp4, ymm_tmp4 + vpcmpeqb ymm_tmp5, ymm_tmp5, ymm_tmp5 + vpsubb ymm_tmp4, ymm_tmp4, ymm_tmp5 + vpsllw ymm_tmp3, ymm_tmp4, 3 + vpslldq ymm_tmp3, ymm_tmp3, 8 + vmovdqa xmm_shufb_0000000088888888, xmm_tmp3 + vpsllq ymm_tmp5, ymm_tmp4, 34 + vpaddb ymm_tmp5, ymm_tmp5, ymm_tmp3 + vmovdqa xmm_shufb_000044448888CCCC, xmm_tmp5 +%else + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] + %define xmm_shufb_000044448888CCCC [shufb_000044448888CCCC] +%endif + mov i_dst_stride_less_width, r1 + mov i_dst_width, r2 + mov i_dst_height, r3 + mov p_src, r4 + mov i_src_stride, r5 + vmovd i_scaleyd, xmm0 + vpxor xmm0, xmm0, xmm0 + vmovdqa ymm_0, ymm0 + vpcmpeqw ymm_tmp0, ymm_tmp0, ymm_tmp0 + vmovdqa ymm_ffff, ymm_tmp0 +%else + %define p_dst r0 + %define i_dst_stride_less_width r1 + %define i_dst_width r2 + %define i_dst_height r3 + %define p_src r4 + %define i_src_stride r5 + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd dword arg8d + %define i_xpos r12 + %define i_ypos r13 + %define i_yposd r13d + %define p_src_row0 rbp +%ifdef WIN64 + %define p_src_row1 rsi + %define i_width_cnt rdi +%else + %define p_src_row1 r11 + %define i_width_cnt rax +%endif + %define r_tmp0 rbx + %define r_tmp0b bl + %define ymm_0 ymm0 + %define ymm_xpos_frac ymm1 + %define ymm_xpos_frac_inc ymm2 + %define ymm_xpos_int ymm3 + %define ymm_xpos_int_inc ymm4 + %define ymm_yfrac0 ymm5 + %define ymm_yfrac1 ymm6 + %define xmm_tmp0 xmm7 + %define ymm_tmp0 ymm7 + %define xmm_tmp1 xmm8 + %define ymm_tmp1 ymm8 + %define xmm_tmp2 xmm9 + %define ymm_tmp2 ymm9 + %define xmm_tmp3 xmm10 + %define ymm_tmp3 ymm10 + %define xmm_tmp4 xmm11 + %define ymm_tmp4 ymm11 + %define xmm_tmp5 xmm12 + %define ymm_tmp5 ymm12 + %define ymm_ffff ymm13 + %define ymm_xpos_int_begin ymm14 + %define ymm_xpos_frac_begin ymm15 + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] + %define xmm_shufb_000044448888CCCC [shufb_000044448888CCCC] + vpxor ymm_0, ymm_0, ymm_0 + vpcmpeqw ymm_ffff, ymm_ffff, ymm_ffff +%endif + + sub i_dst_height, 1 + je .final_row + jl .done + + mov i_yposd, 1 << 14 + vmovd xmm_tmp0, i_scalexd + vpbroadcastd ymm_tmp0, xmm_tmp0 + vpslld ymm_tmp1, ymm_tmp0, 2 + vpslld ymm_tmp2, ymm_tmp0, 3 + vpaddd ymm_tmp3, ymm_tmp1, ymm_tmp2 + vpxor ymm_tmp4, ymm_tmp4, ymm_tmp4 + vpblendd ymm_tmp1, ymm_tmp4, ymm_tmp1, 11110000b + vpblendd ymm_tmp2, ymm_tmp2, ymm_tmp3, 11110000b + vpaddd ymm_tmp3, ymm_tmp0, ymm_tmp0 + vpblendd ymm_tmp3, ymm_tmp4, ymm_tmp3, 11001100b + vpblendd ymm_tmp0, ymm_tmp4, ymm_tmp0, 10101010b + vpaddd ymm_tmp0, ymm_tmp3, ymm_tmp0 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp0 + vpaddd ymm_tmp2, ymm_tmp2, ymm_tmp0 + vpcmpeqw ymm_tmp3, ymm_tmp3, ymm_tmp3 + vpsrld ymm_tmp3, ymm_tmp3, 31 + vpslld ymm_tmp3, ymm_tmp3, 15 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp3 + vpaddd ymm_tmp2, ymm_tmp2, ymm_tmp3 + vpsrld ymm_xpos_int, ymm_tmp1, 16 + vpsrld ymm_tmp0, ymm_tmp2, 16 + vpackssdw ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + vpermq ymm_xpos_int, ymm_xpos_int, 11011000b + vpackuswb ymm_xpos_int, ymm_xpos_int, ymm_xpos_int + vpcmpeqw ymm_tmp3, ymm_tmp3, ymm_tmp3 + vpsubb ymm_tmp0, ymm_xpos_int, ymm_tmp3 + vpunpcklbw ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + vpslld ymm_tmp1, ymm_tmp1, 16 + vpsrld ymm_tmp1, ymm_tmp1, 16 + vpslld ymm_tmp2, ymm_tmp2, 16 + vpsrld ymm_tmp2, ymm_tmp2, 16 + vpackusdw ymm_xpos_frac, ymm_tmp1, ymm_tmp2 + vpermq ymm_xpos_frac, ymm_xpos_frac, 11011000b + vmovd xmm_tmp0, i_scalexd + vpslld xmm_tmp0, xmm_tmp0, 4 + vpbroadcastw ymm_tmp1, xmm_tmp0 + vmovdqa ymm_xpos_frac_inc, ymm_tmp1 + vpsrld xmm_tmp0, xmm_tmp0, 16 + vpsubw ymm_tmp0, ymm_tmp0, ymm_tmp3 + vpbroadcastb ymm_tmp0, xmm_tmp0 + vmovdqa ymm_xpos_int_inc, ymm_tmp0 + vmovdqa ymm_xpos_int_begin, ymm_xpos_int + vmovdqa ymm_xpos_frac_begin, ymm_xpos_frac + + cmp i_scalex, 4 << 16 + ja .scalex_above4 + cmp i_scalex, 2 << 16 + ja .scalex_above2_beloweq4 + add i_scalex, i_scalex +%xdefine i_scalex2 i_scalex +%undef i_scalex + AVX2_GeneralBilinearDownsampler_loop AVX2_BilinearFastDownsample2xOrLess_16px, 1 + shr i_scalex2, 1 +%xdefine i_scalex i_scalex2 +%undef i_scalex2 + jmp .final_row +.scalex_above2_beloweq4: + add i_scalex, i_scalex +%xdefine i_scalex2 i_scalex +%undef i_scalex + AVX2_GeneralBilinearDownsampler_loop AVX2_BilinearFastDownsample4xOrLess_16px, 1 + shr i_scalex2, 1 +%xdefine i_scalex i_scalex2 +%undef i_scalex2 + jmp .final_row +.scalex_above4: + cmp i_scalex, 8 << 16 + ja .scalex_above8 + add i_scalex, i_scalex +%xdefine i_scalex2 i_scalex +%undef i_scalex + AVX2_GeneralBilinearDownsampler_loop AVX2_BilinearFastDownsample8xOrLess_16px, 1 + shr i_scalex2, 1 +%xdefine i_scalex i_scalex2 +%undef i_scalex2 + jmp .final_row +.scalex_above8: +%xdefine ymm_xfrac0 ymm_xpos_frac +%xdefine ymm_xfrac1 ymm_xpos_int +%xdefine ymm_xfrac0_begin ymm_xpos_int_begin +%xdefine ymm_xfrac1_begin ymm_xpos_frac_begin +%xdefine ymm_xfrac_inc ymm_xpos_frac_inc +%undef ymm_xpos_int +%undef ymm_xpos_frac +%undef ymm_xpos_int_begin +%undef ymm_xpos_frac_begin +%undef ymm_xpos_int_inc +%undef ymm_xpos_frac_inc + AVX2_UnpckXFrac ymm_tmp0, ymm_xfrac1, ymm_xfrac0, ymm_ffff + vpermq ymm_xfrac0, ymm_tmp0, 01001110b + vpermq ymm_xfrac1, ymm_xfrac1, 01001110b + vmovdqa ymm_xfrac0_begin, ymm_xfrac0 + vmovdqa ymm_xfrac1_begin, ymm_xfrac1 + vpcmpeqw ymm_tmp0, ymm_tmp0, ymm_tmp0 + vpmullw ymm_tmp0, ymm_tmp0, ymm_xfrac_inc + vpunpcklwd ymm_tmp0, ymm_tmp0, ymm_xfrac_inc + vmovdqa ymm_xfrac_inc, ymm_tmp0 + AVX2_GeneralBilinearDownsampler_loop AVX2_GeneralBilinearFastDownsample_16px, 1 + +.final_row: + mov p_src_row0, i_ypos + shr p_src_row0, 15 + imul p_src_row0, i_src_stride + add p_src_row0, p_src + mov i_xpos, 1 << 15 + mov i_width_cnt, i_dst_width + +.final_row_width: + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movzx r_tmp0, byte [p_src_row0 + r_tmp0] + mov [p_dst], r_tmp0b + add p_dst, 1 + add i_xpos, i_scalex + sub i_width_cnt, 1 + jg .final_row_width + +.done: + vzeroupper +%ifdef X86_32 + mov esp, [esp] +%endif + POP_XMM + LOAD_7_PARA_POP +%ifndef X86_32 +%ifdef WIN64 + pop rsi + pop rdi +%endif + pop rbp + pop rbx + pop r13 + pop r12 +%endif + ret +%undef p_dst +%undef i_dst_stride_less_width +%undef i_dst_width +%undef i_dst_height +%undef p_src +%undef i_src_stride +%undef i_scalex +%undef i_scalexd +%undef i_scaleyd +%undef i_xpos +%undef i_ypos +%undef i_yposd +%undef p_src_row0 +%undef p_src_row1 +%undef i_width_cnt +%undef r_tmp0 +%undef r_tmp0b +%undef ymm_xpos_frac +%undef ymm_xpos_frac_inc +%undef ymm_xpos_int +%undef ymm_xpos_int_inc +%undef ymm_yfrac0 +%undef ymm_yfrac1 +%undef xmm_tmp0 +%undef ymm_tmp0 +%undef xmm_tmp1 +%undef ymm_tmp1 +%undef xmm_tmp2 +%undef ymm_tmp2 +%undef xmm_tmp3 +%undef ymm_tmp3 +%undef xmm_tmp4 +%undef ymm_tmp4 +%undef xmm_tmp5 +%undef ymm_tmp5 +%undef ymm_ffff +%undef ymm_0 +%undef ymm_xpos_int_begin +%undef ymm_xpos_frac_begin +%undef ymm_xfrac0 +%undef ymm_xfrac1 +%undef ymm_xfrac0_begin +%undef ymm_xfrac1_begin +%undef ymm_xfrac_inc +%undef xmm_shufb_0000000088888888 +%undef xmm_shufb_000044448888CCCC + +;************************************************************************************************************** +;void GeneralBilinearAccurateDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, +; int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, +; uint32_t uiScaleY); +; +;************************************************************************************************************** + +WELS_EXTERN GeneralBilinearAccurateDownsampler_avx2 + %assign push_num 0 +%ifndef X86_32 + push r12 + push r13 + push rbx + push rbp + %assign push_num 4 +%ifdef WIN64 + push rdi + push rsi + %assign push_num push_num + 2 +%endif +%endif + LOAD_7_PARA + PUSH_XMM 16 + SIGN_EXTENSION r1, r1d + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r5, r5d + ZERO_EXTENSION r6d + sub r1, r2 ; dst_stride - dst_width + add r6, r6 ; 2 * scalex +%ifdef X86_32 + vmovd xmm0, arg8 + vmovd xmm1, esp + and esp, -32 +%ifdef X86_32_PICASM + sub esp, 8 * 4 + 10 * 32 +%else + sub esp, 8 * 4 + 8 * 32 +%endif + vmovd [esp], xmm1 + %define p_dst r0 + %define i_dst_stride_less_width [esp + 1 * 4] + %define i_dst_width [esp + 2 * 4] + %define i_dst_height dword [esp + 3 * 4] + %define p_src [esp + 4 * 4] + %define i_src_stride [esp + 5 * 4] + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd [esp + 6 * 4] + %define i_xpos r2 + %define i_ypos [esp + 7 * 4] + %define i_yposd dword [esp + 7 * 4] + %define p_src_row0 r3 + %define p_src_row1 r4 + %define i_width_cnt r5 + %define r_tmp0 r1 + %define r_tmp0b r1b + %define ymm_xpos_frac ymm1 + %define ymm_xpos_frac_inc [esp + 8 * 4] + %define ymm_xpos_int ymm3 + %define ymm_xpos_int_inc [esp + 8 * 4 + 1 * 32] + %define ymm_yfrac0 [esp + 8 * 4 + 2 * 32] + %define ymm_yfrac1 [esp + 8 * 4 + 3 * 32] + %define xmm_tmp0 xmm7 + %define ymm_tmp0 ymm7 + %define xmm_tmp1 xmm0 + %define ymm_tmp1 ymm0 + %define xmm_tmp2 xmm2 + %define ymm_tmp2 ymm2 + %define xmm_tmp3 xmm4 + %define ymm_tmp3 ymm4 + %define xmm_tmp4 xmm5 + %define ymm_tmp4 ymm5 + %define xmm_tmp5 xmm6 + %define ymm_tmp5 ymm6 + %define ymm_0 [esp + 8 * 4 + 4 * 32] + %define ymm_7fff [esp + 8 * 4 + 5 * 32] + %define ymm_xpos_int_begin [esp + 8 * 4 + 6 * 32] + %define ymm_xpos_frac_begin [esp + 8 * 4 + 7 * 32] +%ifdef X86_32_PICASM + %define ymm_db80h [esp + 8 * 4 + 8 * 32] + %define xmm_shufb_0000000088888888 [esp + 8 * 4 + 9 * 32] + %define xmm_shufb_000044448888CCCC [esp + 8 * 4 + 9 * 32 + 16] + vpxor ymm_tmp4, ymm_tmp4, ymm_tmp4 + vpcmpeqb ymm_tmp5, ymm_tmp5, ymm_tmp5 + vpsubb ymm_tmp4, ymm_tmp4, ymm_tmp5 + vpsllw ymm_tmp3, ymm_tmp4, 3 + vpslldq ymm_tmp3, ymm_tmp3, 8 + vmovdqa xmm_shufb_0000000088888888, xmm_tmp3 + vpsllq ymm_tmp5, ymm_tmp4, 34 + vpaddb ymm_tmp5, ymm_tmp5, ymm_tmp3 + vmovdqa xmm_shufb_000044448888CCCC, xmm_tmp5 + vpsllw ymm_tmp4, ymm_tmp4, 7 + vmovdqa ymm_db80h, ymm_tmp4 +%else + %define ymm_db80h [db80h_256] + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] + %define xmm_shufb_000044448888CCCC [shufb_000044448888CCCC] +%endif + mov i_dst_stride_less_width, r1 + mov i_dst_width, r2 + mov i_dst_height, r3 + mov p_src, r4 + mov i_src_stride, r5 + vmovd i_scaleyd, xmm0 + vpxor xmm0, xmm0, xmm0 + vmovdqa ymm_0, ymm0 + vpcmpeqw ymm0, ymm0, ymm0 + vpsrlw ymm0, ymm0, 1 + vmovdqa ymm_7fff, ymm0 +%else + %define p_dst r0 + %define i_dst_stride_less_width r1 + %define i_dst_width r2 + %define i_dst_height r3 + %define p_src r4 + %define i_src_stride r5 + %define i_scalex r6 + %define i_scalexd r6d + %define i_scaleyd dword arg8d + %define i_xpos r12 + %define i_ypos r13 + %define i_yposd r13d + %define p_src_row0 rbp +%ifdef WIN64 + %define p_src_row1 rsi + %define i_width_cnt rdi +%else + %define p_src_row1 r11 + %define i_width_cnt rax +%endif + %define r_tmp0 rbx + %define r_tmp0b bl + %define ymm_0 ymm0 + %define ymm_xpos_frac ymm1 + %define ymm_xpos_int ymm3 + %define ymm_xpos_frac_inc ymm2 + %define ymm_xpos_int_inc ymm4 + %define ymm_yfrac0 ymm5 + %define ymm_yfrac1 ymm6 + %define xmm_tmp0 xmm7 + %define ymm_tmp0 ymm7 + %define xmm_tmp1 xmm8 + %define ymm_tmp1 ymm8 + %define xmm_tmp2 xmm9 + %define ymm_tmp2 ymm9 + %define xmm_tmp3 xmm10 + %define ymm_tmp3 ymm10 + %define xmm_tmp4 xmm11 + %define ymm_tmp4 ymm11 + %define xmm_tmp5 xmm12 + %define ymm_tmp5 ymm12 + %define ymm_7fff ymm13 + %define ymm_xpos_int_begin ymm14 + %define ymm_xpos_frac_begin ymm15 + %define ymm_db80h [db80h_256] + %define xmm_shufb_0000000088888888 [shufb_0000000088888888] + %define xmm_shufb_000044448888CCCC [shufb_000044448888CCCC] + vpxor ymm_0, ymm_0, ymm_0 + vpcmpeqw ymm_7fff, ymm_7fff, ymm_7fff + vpsrlw ymm_7fff, ymm_7fff, 1 +%endif + + sub i_dst_height, 1 + je .final_row + jl .done + + mov i_yposd, 1 << 14 + vmovd xmm_tmp0, i_scalexd + vpbroadcastd ymm_tmp0, xmm_tmp0 + vpslld ymm_tmp1, ymm_tmp0, 2 + vpslld ymm_tmp2, ymm_tmp0, 3 + vpaddd ymm_tmp3, ymm_tmp1, ymm_tmp2 + vpxor ymm_tmp4, ymm_tmp4, ymm_tmp4 + vpblendd ymm_tmp1, ymm_tmp4, ymm_tmp1, 11110000b + vpblendd ymm_tmp2, ymm_tmp2, ymm_tmp3, 11110000b + vpaddd ymm_tmp3, ymm_tmp0, ymm_tmp0 + vpblendd ymm_tmp3, ymm_tmp4, ymm_tmp3, 11001100b + vpblendd ymm_tmp0, ymm_tmp4, ymm_tmp0, 10101010b + vpaddd ymm_tmp0, ymm_tmp3, ymm_tmp0 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp0 + vpaddd ymm_tmp2, ymm_tmp2, ymm_tmp0 + vpcmpeqw ymm_tmp3, ymm_tmp3, ymm_tmp3 + vpsrld ymm_tmp3, ymm_tmp3, 31 + vpslld ymm_tmp3, ymm_tmp3, 15 + vpaddd ymm_tmp1, ymm_tmp1, ymm_tmp3 + vpaddd ymm_tmp2, ymm_tmp2, ymm_tmp3 + vpsrld ymm_xpos_int, ymm_tmp1, 16 + vpsrld ymm_tmp0, ymm_tmp2, 16 + vpackssdw ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + vpermq ymm_xpos_int, ymm_xpos_int, 11011000b + vpackuswb ymm_xpos_int, ymm_xpos_int, ymm_xpos_int + vpcmpeqw ymm_tmp3, ymm_tmp3, ymm_tmp3 + vpsubb ymm_tmp0, ymm_xpos_int, ymm_tmp3 + vpunpcklbw ymm_xpos_int, ymm_xpos_int, ymm_tmp0 + vpslld ymm_tmp1, ymm_tmp1, 16 + vpsrld ymm_tmp1, ymm_tmp1, 16 + vpslld ymm_tmp2, ymm_tmp2, 16 + vpsrld ymm_tmp2, ymm_tmp2, 16 + vpackusdw ymm_xpos_frac, ymm_tmp1, ymm_tmp2 + vpermq ymm_xpos_frac, ymm_xpos_frac, 11011000b + vpsrlw ymm_xpos_frac, ymm_xpos_frac, 1 + vmovd xmm_tmp0, i_scalexd + vpslld xmm_tmp0, xmm_tmp0, 4 + vpbroadcastw ymm_tmp1, xmm_tmp0 + vpsrlw ymm_tmp1, ymm_tmp1, 1 + vmovdqa ymm_xpos_frac_inc, ymm_tmp1 + vpsrld xmm_tmp0, xmm_tmp0, 16 + vpsubw ymm_tmp0, ymm_tmp0, ymm_tmp3 + vpbroadcastb ymm_tmp0, xmm_tmp0 + vmovdqa ymm_xpos_int_inc, ymm_tmp0 + vmovdqa ymm_xpos_int_begin, ymm_xpos_int + vmovdqa ymm_xpos_frac_begin, ymm_xpos_frac + + cmp i_scalex, 4 << 16 + ja .scalex_above4 + cmp i_scalex, 2 << 16 + ja .scalex_above2_beloweq4 + add i_scalex, i_scalex +%xdefine i_scalex2 i_scalex +%undef i_scalex + AVX2_GeneralBilinearDownsampler_loop AVX2_BilinearAccurateDownsample2xOrLess_16px, 0 + shr i_scalex2, 1 +%xdefine i_scalex i_scalex2 +%undef i_scalex2 + jmp .final_row +.scalex_above2_beloweq4: + add i_scalex, i_scalex +%xdefine i_scalex2 i_scalex +%undef i_scalex + AVX2_GeneralBilinearDownsampler_loop AVX2_BilinearAccurateDownsample4xOrLess_16px, 0 + shr i_scalex2, 1 +%xdefine i_scalex i_scalex2 +%undef i_scalex2 + jmp .final_row +.scalex_above4: + cmp i_scalex, 8 << 16 + ja .scalex_above8 + add i_scalex, i_scalex +%xdefine i_scalex2 i_scalex +%undef i_scalex + AVX2_GeneralBilinearDownsampler_loop AVX2_BilinearAccurateDownsample8xOrLess_16px, 0 + shr i_scalex2, 1 +%xdefine i_scalex i_scalex2 +%undef i_scalex2 + jmp .final_row +.scalex_above8: +%xdefine ymm_xfrac0 ymm_xpos_frac +%xdefine ymm_xfrac1 ymm_xpos_int +%xdefine ymm_xfrac0_begin ymm_xpos_int_begin +%xdefine ymm_xfrac1_begin ymm_xpos_frac_begin +%xdefine ymm_xfrac_inc ymm_xpos_frac_inc +%undef ymm_xpos_int +%undef ymm_xpos_frac +%undef ymm_xpos_int_begin +%undef ymm_xpos_frac_begin +%undef ymm_xpos_int_inc +%undef ymm_xpos_frac_inc + AVX2_UnpckXFrac ymm_tmp0, ymm_xfrac1, ymm_xfrac0, ymm_7fff + vpermq ymm_xfrac0, ymm_tmp0, 01001110b + vpermq ymm_xfrac1, ymm_xfrac1, 01001110b + vmovdqa ymm_xfrac0_begin, ymm_xfrac0 + vmovdqa ymm_xfrac1_begin, ymm_xfrac1 + vpcmpeqw ymm_tmp0, ymm_tmp0, ymm_tmp0 + vpmullw ymm_tmp0, ymm_tmp0, ymm_xfrac_inc + vpunpcklwd ymm_tmp0, ymm_tmp0, ymm_xfrac_inc + vmovdqa ymm_xfrac_inc, ymm_tmp0 + AVX2_GeneralBilinearDownsampler_loop AVX2_GeneralBilinearAccurateDownsample_16px, 0 + +.final_row: + mov p_src_row0, i_ypos + shr p_src_row0, 15 + imul p_src_row0, i_src_stride + add p_src_row0, p_src + mov i_xpos, 1 << 15 + mov i_width_cnt, i_dst_width + +.final_row_width: + mov r_tmp0, i_xpos + shr r_tmp0, 16 + movzx r_tmp0, byte [p_src_row0 + r_tmp0] + mov [p_dst], r_tmp0b + add p_dst, 1 + add i_xpos, i_scalex + sub i_width_cnt, 1 + jg .final_row_width + +.done: + vzeroupper +%ifdef X86_32 + mov esp, [esp] +%endif + POP_XMM + LOAD_7_PARA_POP +%ifndef X86_32 +%ifdef WIN64 + pop rsi + pop rdi +%endif + pop rbp + pop rbx + pop r13 + pop r12 +%endif + ret +%undef p_dst +%undef i_dst_stride_less_width +%undef i_dst_width +%undef i_dst_height +%undef p_src +%undef i_src_stride +%undef i_scalex +%undef i_scalexd +%undef i_scaleyd +%undef i_xpos +%undef i_ypos +%undef i_yposd +%undef p_src_row0 +%undef p_src_row1 +%undef i_width_cnt +%undef r_tmp0 +%undef r_tmp0b +%undef ymm_xpos_frac +%undef ymm_xpos_frac_inc +%undef ymm_xpos_int +%undef ymm_xpos_int_inc +%undef ymm_yfrac0 +%undef ymm_yfrac1 +%undef xmm_tmp0 +%undef ymm_tmp0 +%undef xmm_tmp1 +%undef ymm_tmp1 +%undef xmm_tmp2 +%undef ymm_tmp2 +%undef xmm_tmp3 +%undef ymm_tmp3 +%undef xmm_tmp4 +%undef ymm_tmp4 +%undef xmm_tmp5 +%undef ymm_tmp5 +%undef ymm_0 +%undef ymm_7fff +%undef ymm_xpos_int_begin +%undef ymm_xpos_frac_begin +%undef ymm_xfrac0 +%undef ymm_xfrac1 +%undef ymm_xfrac0_begin +%undef ymm_xfrac1_begin +%undef ymm_xfrac_inc +%undef ymm_db80h +%undef xmm_shufb_0000000088888888 +%undef xmm_shufb_000044448888CCCC + +%endif diff --git a/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/vaa.asm b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/vaa.asm new file mode 100644 index 000000000..9b728576b --- /dev/null +++ b/TMessagesProj/jni/third_party/openh264/src/codec/processing/src/x86/vaa.asm @@ -0,0 +1,3563 @@ +;*! +;* \copy +;* Copyright (c) 2010-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* vaa.asm +;* +;* Abstract +;* sse2 for pVaa routines +;* +;* History +;* 04/14/2010 Created +;* 06/07/2010 Added AnalysisVaaInfoIntra_sse2(ssse3) +;* 06/10/2010 Tune rc_sad_frame_sse2 and got about 40% improvement +;* 08/11/2010 Added abs_difference_mbrow_sse2 & sum_sqrsum_mbrow_sse2 +;* +;*************************************************************************/ +%include "asm_inc.asm" + + +;*********************************************************************** +; Macros and other preprocessor constants +;*********************************************************************** +%macro SUM_SQR_SSE2 3 ; dst, pSrc, zero + movdqa %1, %2 + punpcklbw %1, %3 + punpckhbw %2, %3 + pmaddwd %1, %1 + pmaddwd %2, %2 + paddd %1, %2 + pshufd %2, %1, 04Eh ; 01001110 B + paddd %1, %2 + pshufd %2, %1, 0B1h ; 10110001 B + paddd %1, %2 +%endmacro ; END OF SUM_SQR_SSE2 + +%macro WELS_SAD_16x2_SSE2 3 ;esi :%1 edi:%2 ebx:%3 + movdqa xmm1, [%1] + movdqa xmm2, [%2] + movdqa xmm3, [%1+%3] + movdqa xmm4, [%2+%3] + psadbw xmm1, xmm2 + psadbw xmm3, xmm4 + paddd xmm6, xmm1 + paddd xmm6, xmm3 + lea %1, [%1+%3*2] + lea %2, [%2+%3*2] +%endmacro + +; by comparing it outperforms than phaddw(SSSE3) sets +%macro SUM_WORD_8x2_SSE2 2 ; dst(pSrc), tmp + ; @sum_8x2 begin + pshufd %2, %1, 04Eh ; 01001110 B + paddw %1, %2 + pshuflw %2, %1, 04Eh ; 01001110 B + paddw %1, %2 + pshuflw %2, %1, 0B1h ; 10110001 B + paddw %1, %2 + ; end of @sum_8x2 +%endmacro ; END of SUM_WORD_8x2_SSE2 + +%macro WELS_SAD_SUM_SQSUM_16x1_SSE2 3 ;esi:%1,edi:%2,ebx:%3 + movdqa xmm1, [%1] + movdqa xmm2, [%2] + movdqa xmm3, xmm1 + psadbw xmm3, xmm2 + paddd xmm6, xmm3 + + movdqa xmm3, xmm1 + psadbw xmm3, xmm0 + paddd xmm5, xmm3 + + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm0 + punpckhbw xmm2, xmm0 + pmaddwd xmm1, xmm1 + pmaddwd xmm2, xmm2 + paddd xmm4, xmm1 + paddd xmm4, xmm2 + + add %1, %3 + add %2, %3 +%endmacro + +%macro WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 3 ;esi:%1 edi:%2 ebx:%3 + movdqa xmm1, [%1] + movdqa xmm2, [%2] + movdqa xmm3, xmm1 + psadbw xmm3, xmm2 + paddd xmm7, xmm3 ; sad + + movdqa xmm3, xmm1 + pmaxub xmm3, xmm2 + pminub xmm2, xmm1 + psubb xmm3, xmm2 ; diff + + movdqa xmm2, xmm1 + psadbw xmm2, xmm0 + paddd xmm6, xmm2 ; sum + + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm0 + punpckhbw xmm2, xmm0 + pmaddwd xmm1, xmm1 + pmaddwd xmm2, xmm2 + paddd xmm5, xmm1 + paddd xmm5, xmm2 ; sqsum + + movdqa xmm1, xmm3 + punpcklbw xmm1, xmm0 + punpckhbw xmm3, xmm0 + pmaddwd xmm1, xmm1 + pmaddwd xmm3, xmm3 + paddd xmm4, xmm1 + paddd xmm4, xmm3 ; sqdiff + + add %1, %3 + add %2, %3 +%endmacro + +%macro WELS_SAD_SD_MAD_16x1_SSE2 7 ;esi:%5 edi:%6 ebx:%7 +%define sad_reg %1 +%define sum_cur_reg %2 +%define sum_ref_reg %3 +%define mad_reg %4 + movdqa xmm1, [%5] + movdqa xmm2, [%6] + movdqa xmm3, xmm1 + psadbw xmm3, xmm0 + paddd sum_cur_reg, xmm3 ; sum_cur + movdqa xmm3, xmm2 + psadbw xmm3, xmm0 + paddd sum_ref_reg, xmm3 ; sum_ref + + movdqa xmm3, xmm1 + pmaxub xmm3, xmm2 + pminub xmm2, xmm1 + psubb xmm3, xmm2 ; abs diff + pmaxub mad_reg, xmm3 ; max abs diff + + psadbw xmm3, xmm0 + paddd sad_reg, xmm3 ; sad + + add %5, %7 + add %6, %7 +%endmacro + + +%macro WELS_MAX_REG_SSE2 1 ; xmm1, xmm2, xmm3 can be used +%define max_reg %1 + movdqa xmm1, max_reg + psrldq xmm1, 4 + pmaxub max_reg, xmm1 + movdqa xmm1, max_reg + psrldq xmm1, 2 + pmaxub max_reg, xmm1 + movdqa xmm1, max_reg + psrldq xmm1, 1 + pmaxub max_reg, xmm1 +%endmacro + +%macro WELS_SAD_BGD_SQDIFF_16x1_SSE2 7 ;esi:%5 edi:%6 ebx:%7 +%define sad_reg %1 +%define sum_reg %2 +%define mad_reg %3 +%define sqdiff_reg %4 + movdqa xmm1, [%5] + movdqa xmm2, xmm1 + movdqa xmm3, xmm1 + punpcklbw xmm2, xmm0 + punpckhbw xmm3, xmm0 + pmaddwd xmm2, xmm2 + pmaddwd xmm3, xmm3 + paddd xmm2, xmm3 + movdqa xmm3, xmm2 + psllq xmm2, 32 + psrlq xmm3, 32 + psllq xmm3, 32 + paddd xmm2, xmm3 + paddd sad_reg, xmm2 ; sqsum + + movdqa xmm2, [%6] + movdqa xmm3, xmm1 + psadbw xmm3, xmm0 + paddd sum_reg, xmm3 ; sum_cur + movdqa xmm3, xmm2 + psadbw xmm3, xmm0 + pslldq xmm3, 4 + paddd sum_reg, xmm3 ; sum_ref + + movdqa xmm3, xmm1 + pmaxub xmm3, xmm2 + pminub xmm2, xmm1 + psubb xmm3, xmm2 ; abs diff + pmaxub mad_reg, xmm3 ; max abs diff + + movdqa xmm1, xmm3 + psadbw xmm3, xmm0 + paddd sad_reg, xmm3 ; sad + + movdqa xmm3, xmm1 + punpcklbw xmm1, xmm0 + punpckhbw xmm3, xmm0 + pmaddwd xmm1, xmm1 + pmaddwd xmm3, xmm3 + paddd sqdiff_reg, xmm1 + paddd sqdiff_reg, xmm3 ; sqdiff + + add %5, %7 + add %6, %7 +%endmacro + + +;*********************************************************************** +; Code +;*********************************************************************** + +SECTION .text + +%ifdef X86_32 + +;*********************************************************************** +; void SampleVariance16x16_sse2( uint8_t * y_ref, int32_t y_ref_stride, uint8_t * y_src, int32_t y_src_stride,SMotionTextureUnit* pMotionTexture ); +;*********************************************************************** +WELS_EXTERN SampleVariance16x16_sse2 + push esi + push edi + push ebx + + sub esp, 16 + %define SUM [esp] + %define SUM_CUR [esp+4] + %define SQR [esp+8] + %define SQR_CUR [esp+12] + %define PUSH_SIZE 28 ; 12 + 16 + + mov edi, [esp+PUSH_SIZE+4] ; y_ref + mov edx, [esp+PUSH_SIZE+8] ; y_ref_stride + mov esi, [esp+PUSH_SIZE+12] ; y_src + mov eax, [esp+PUSH_SIZE+16] ; y_src_stride + mov ecx, 010h ; height = 16 + + pxor xmm7, xmm7 + movdqu SUM, xmm7 + +.hloops: + movdqa xmm0, [edi] ; y_ref + movdqa xmm1, [esi] ; y_src + movdqa xmm2, xmm0 ; store first for future process + movdqa xmm3, xmm1 + ; sum += diff; + movdqa xmm4, xmm0 + psadbw xmm4, xmm1 ; 2 parts, [0,..,15], [64,..,79] + ; to be continued for sum + pshufd xmm5, xmm4, 0C6h ; 11000110 B + paddw xmm4, xmm5 + movd ebx, xmm4 + add SUM, ebx + + ; sqr += diff * diff; + pmaxub xmm0, xmm1 + pminub xmm1, xmm2 + psubb xmm0, xmm1 ; diff + SUM_SQR_SSE2 xmm1, xmm0, xmm7 ; dst, pSrc, zero + movd ebx, xmm1 + add SQR, ebx + + ; sum_cur += y_src[x]; + movdqa xmm0, xmm3 ; cur_orig + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm7 + punpckhbw xmm1, xmm7 + paddw xmm0, xmm1 ; 8x2 + SUM_WORD_8x2_SSE2 xmm0, xmm1 + movd ebx, xmm0 + and ebx, 0ffffh + add SUM_CUR, ebx + + ; sqr_cur += y_src[x] * y_src[x]; + SUM_SQR_SSE2 xmm0, xmm3, xmm7 ; dst, pSrc, zero + movd ebx, xmm0 + add SQR_CUR, ebx + + lea edi, [edi+edx] + lea esi, [esi+eax] + dec ecx + jnz near .hloops + + mov ebx, 0 + mov bx, word SUM + sar ebx, 8 + imul ebx, ebx + mov ecx, SQR + sar ecx, 8 + sub ecx, ebx + mov edi, [esp+PUSH_SIZE+20] ; pMotionTexture + mov [edi], cx ; to store uiMotionIndex + mov ebx, 0 + mov bx, word SUM_CUR + sar ebx, 8 + imul ebx, ebx + mov ecx, SQR_CUR + sar ecx, 8 + sub ecx, ebx + mov [edi+2], cx ; to store uiTextureIndex + + %undef SUM + %undef SUM_CUR + %undef SQR + %undef SQR_CUR + %undef PUSH_SIZE + + add esp, 16 + pop ebx + pop edi + pop esi + + ret + + + +;************************************************************************************************************* +;void VAACalcSad_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSad_sse2 +%define cur_data esp + pushsize + 4 +%define ref_data esp + pushsize + 8 +%define iPicWidth esp + pushsize + 12 +%define iPicHeight esp + pushsize + 16 +%define iPicStride esp + pushsize + 20 +%define psadframe esp + pushsize + 24 +%define psad8x8 esp + pushsize + 28 +%define pushsize 12 + push esi + push edi + push ebx + mov esi, [cur_data] + mov edi, [ref_data] + mov ebx, [iPicStride] + mov edx, [psad8x8] + mov eax, ebx + + shr dword [iPicWidth], 4 ; iPicWidth/16 + shr dword [iPicHeight], 4 ; iPicHeight/16 + shl eax, 4 ; iPicStride*16 + pxor xmm0, xmm0 + pxor xmm7, xmm7 ; iFrameSad +height_loop: + mov ecx, dword [iPicWidth] + push esi + push edi +width_loop: + pxor xmm6, xmm6 ; + WELS_SAD_16x2_SSE2 esi,edi,ebx + WELS_SAD_16x2_SSE2 esi,edi,ebx + WELS_SAD_16x2_SSE2 esi,edi,ebx + WELS_SAD_16x2_SSE2 esi,edi,ebx + paddd xmm7, xmm6 + movd [edx], xmm6 + psrldq xmm6, 8 + movd [edx+4], xmm6 + + pxor xmm6, xmm6 + WELS_SAD_16x2_SSE2 esi,edi,ebx + WELS_SAD_16x2_SSE2 esi,edi,ebx + WELS_SAD_16x2_SSE2 esi,edi,ebx + WELS_SAD_16x2_SSE2 esi,edi,ebx + paddd xmm7, xmm6 + movd [edx+8], xmm6 + psrldq xmm6, 8 + movd [edx+12], xmm6 + + add edx, 16 + sub esi, eax + sub edi, eax + add esi, 16 + add edi, 16 + + dec ecx + jnz width_loop + + pop edi + pop esi + add esi, eax + add edi, eax + + dec dword [iPicHeight] + jnz height_loop + + mov edx, [psadframe] + movdqa xmm5, xmm7 + psrldq xmm7, 8 + paddd xmm7, xmm5 + movd [edx], xmm7 + +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef pushsize + pop ebx + pop edi + pop esi + ret + +%else ;64-bit + +;*********************************************************************** +; void SampleVariance16x16_sse2( uint8_t * y_ref, int32_t y_ref_stride, uint8_t * y_src, int32_t y_src_stride,SMotionTextureUnit* pMotionTexture ); +;*********************************************************************** +WELS_EXTERN SampleVariance16x16_sse2 + %define SUM r10;[esp] + %define SUM_CUR r11;[esp+4] + %define SQR r13;[esp+8] + %define SQR_CUR r15;[esp+12] + + push r12 + push r13 + push r14 + push r15 + %assign push_num 4 + LOAD_5_PARA + PUSH_XMM 8 + SIGN_EXTENSION r1,r1d + SIGN_EXTENSION r3,r3d + + mov r12,010h + pxor xmm7, xmm7 + movq SUM, xmm7 + movq SUM_CUR,xmm7 + movq SQR,xmm7 + movq SQR_CUR,xmm7 + +.hloops: + mov r14,0 + movdqa xmm0, [r0] ; y_ref + movdqa xmm1, [r2] ; y_src + movdqa xmm2, xmm0 ; store first for future process + movdqa xmm3, xmm1 + ; sum += diff; + movdqa xmm4, xmm0 + psadbw xmm4, xmm1 ; 2 parts, [0,..,15], [64,..,79] + ; to be continued for sum + pshufd xmm5, xmm4, 0C6h ; 11000110 B + paddw xmm4, xmm5 + movd r14d, xmm4 + add SUM, r14 + + ; sqr += diff * diff; + pmaxub xmm0, xmm1 + pminub xmm1, xmm2 + psubb xmm0, xmm1 ; diff + SUM_SQR_SSE2 xmm1, xmm0, xmm7 ; dst, pSrc, zero + movd r14d, xmm1 + add SQR, r14 + + ; sum_cur += y_src[x]; + movdqa xmm0, xmm3 ; cur_orig + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm7 + punpckhbw xmm1, xmm7 + paddw xmm0, xmm1 ; 8x2 + SUM_WORD_8x2_SSE2 xmm0, xmm1 + movd r14d, xmm0 + and r14, 0ffffh + add SUM_CUR, r14 + + ; sqr_cur += y_src[x] * y_src[x]; + SUM_SQR_SSE2 xmm0, xmm3, xmm7 ; dst, pSrc, zero + movd r14d, xmm0 + add SQR_CUR, r14 + + lea r0, [r0+r1] + lea r2, [r2+r3] + dec r12 + jnz near .hloops + + mov r0, SUM + sar r0, 8 + imul r0, r0 + mov r1, SQR + sar r1, 8 + sub r1, r0 + mov [r4], r1w ; to store uiMotionIndex + mov r0, SUM_CUR + sar r0, 8 + imul r0, r0 + mov r1, SQR_CUR + sar r1, 8 + sub r1, r0 + mov [r4+2], r1w ; to store uiTextureIndex + + POP_XMM + LOAD_5_PARA_POP + pop r15 + pop r14 + pop r13 + pop r12 + + + %assign push_num 0 + + ret + + +;************************************************************************************************************* +;void VAACalcSad_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSad_sse2 +%define cur_data r0 +%define ref_data r1 +%define iPicWidth r2 +%define iPicHeight r3 +%define iPicStride r4 +%define psadframe r5 +%define psad8x8 r6 + + push r12 + push r13 + %assign push_num 2 + LOAD_7_PARA + PUSH_XMM 8 + SIGN_EXTENSION r2,r2d + SIGN_EXTENSION r3,r3d + SIGN_EXTENSION r4,r4d + + mov r12,r4 + shr r2, 4 ; iPicWidth/16 + shr r3, 4 ; iPicHeight/16 + + shl r12, 4 ; iPicStride*16 + pxor xmm0, xmm0 + pxor xmm7, xmm7 ; iFrameSad +height_loop: + mov r13, r2 + push r0 + push r1 +width_loop: + pxor xmm6, xmm6 + WELS_SAD_16x2_SSE2 r0,r1,r4 + WELS_SAD_16x2_SSE2 r0,r1,r4 + WELS_SAD_16x2_SSE2 r0,r1,r4 + WELS_SAD_16x2_SSE2 r0,r1,r4 + paddd xmm7, xmm6 + movd [r6], xmm6 + psrldq xmm6, 8 + movd [r6+4], xmm6 + + pxor xmm6, xmm6 + WELS_SAD_16x2_SSE2 r0,r1,r4 + WELS_SAD_16x2_SSE2 r0,r1,r4 + WELS_SAD_16x2_SSE2 r0,r1,r4 + WELS_SAD_16x2_SSE2 r0,r1,r4 + paddd xmm7, xmm6 + movd [r6+8], xmm6 + psrldq xmm6, 8 + movd [r6+12], xmm6 + + add r6, 16 + sub r0, r12 + sub r1, r12 + add r0, 16 + add r1, 16 + + dec r13 + jnz width_loop + + pop r1 + pop r0 + add r0, r12 + add r1, r12 + + dec r3 + jnz height_loop + + ;mov r13, [psadframe] + movdqa xmm5, xmm7 + psrldq xmm7, 8 + paddd xmm7, xmm5 + movd [psadframe], xmm7 + +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef pushsize + POP_XMM + LOAD_7_PARA_POP + pop r13 + pop r12 + %assign push_num 0 + ret + +%endif + + +%ifdef X86_32 +;************************************************************************************************************* +;void VAACalcSadVar_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadVar_sse2 +%define localsize 8 +%define cur_data esp + pushsize + localsize + 4 +%define ref_data esp + pushsize + localsize + 8 +%define iPicWidth esp + pushsize + localsize + 12 +%define iPicHeight esp + pushsize + localsize + 16 +%define iPicStride esp + pushsize + localsize + 20 +%define psadframe esp + pushsize + localsize + 24 +%define psad8x8 esp + pushsize + localsize + 28 +%define psum16x16 esp + pushsize + localsize + 32 +%define psqsum16x16 esp + pushsize + localsize + 36 +%define tmp_esi esp + 0 +%define tmp_edi esp + 4 +%define pushsize 16 + push ebp + push esi + push edi + push ebx + sub esp, localsize + mov esi, [cur_data] + mov edi, [ref_data] + mov ebx, [iPicStride] + mov edx, [psad8x8] + mov eax, ebx + + shr dword [iPicWidth], 4 ; iPicWidth/16 + shr dword [iPicHeight], 4 ; iPicHeight/16 + shl eax, 4 ; iPicStride*16 + pxor xmm0, xmm0 + pxor xmm7, xmm7 ; iFrameSad +var_height_loop: + mov ecx, dword [iPicWidth] + mov [tmp_esi], esi + mov [tmp_edi], edi +var_width_loop: + pxor xmm6, xmm6 ; hiQuad_loQuad pSad8x8 + pxor xmm5, xmm5 ; pSum16x16 + pxor xmm4, xmm4 ; sqsum_16x16 + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + paddd xmm7, xmm6 + movd [edx], xmm6 + psrldq xmm6, 8 + movd [edx+4], xmm6 + + pxor xmm6, xmm6 + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_16x1_SSE2 esi,edi,ebx + paddd xmm7, xmm6 + movd [edx+8], xmm6 + psrldq xmm6, 8 + movd [edx+12], xmm6 + + mov ebp, [psum16x16] + movdqa xmm1, xmm5 + psrldq xmm1, 8 + paddd xmm5, xmm1 + movd [ebp], xmm5 + add dword [psum16x16], 4 + + movdqa xmm5, xmm4 + psrldq xmm5, 8 + paddd xmm4, xmm5 + movdqa xmm3, xmm4 + psrldq xmm3, 4 + paddd xmm4, xmm3 + + mov ebp, [psqsum16x16] + movd [ebp], xmm4 + add dword [psqsum16x16], 4 + + add edx, 16 + sub esi, eax + sub edi, eax + add esi, 16 + add edi, 16 + + dec ecx + jnz var_width_loop + + mov esi, [tmp_esi] + mov edi, [tmp_edi] + add esi, eax + add edi, eax + + dec dword [iPicHeight] + jnz var_height_loop + + mov edx, [psadframe] + movdqa xmm5, xmm7 + psrldq xmm7, 8 + paddd xmm7, xmm5 + movd [edx], xmm7 + + add esp, localsize + pop ebx + pop edi + pop esi + pop ebp +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef psum16x16 +%undef psqsum16x16 +%undef tmp_esi +%undef tmp_edi +%undef pushsize +%undef localsize + ret + +%else ;64-bit + +;************************************************************************************************************* +;void VAACalcSadVar_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadVar_sse2 +%define cur_data arg1 ;r0 +%define ref_data arg2 ;r1 +%define iPicWidth arg3 ;r2 +%define iPicHeight arg4 ;r3 +%define iPicStride arg5 +%define psadframe arg6 +%define psad8x8 arg7 +%define psum16x16 arg8 +%define psqsum16x16 arg9 + + push r12 + push r13 + push r14 + push r15 + %assign push_num 4 + PUSH_XMM 8 + +%ifdef WIN64 + mov r4, arg5 ;iPicStride + mov r5, arg6 ;psad8x8 +%endif + mov r14,arg7 + SIGN_EXTENSION r2,r2d + SIGN_EXTENSION r3,r3d + SIGN_EXTENSION r4,r4d + + mov r13,r4 + shr r2,4 + shr r3,4 + + shl r13,4 ; iPicStride*16 + pxor xmm0, xmm0 + pxor xmm7, xmm7 ; iFrameSad +var_height_loop: + push r2 + %assign push_num push_num+1 + mov r11, r0 + mov r12, r1 +var_width_loop: + pxor xmm6, xmm6 ; hiQuad_loQuad pSad8x8 + pxor xmm5, xmm5 ; pSum16x16 + pxor xmm4, xmm4 ; sqsum_16x16 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + paddd xmm7, xmm6 + movd [r14], xmm6 + psrldq xmm6, 8 + movd [r14+4], xmm6 + + pxor xmm6, xmm6 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_16x1_SSE2 r0,r1,r4 + paddd xmm7, xmm6 + movd [r14+8], xmm6 + psrldq xmm6, 8 + movd [r14+12], xmm6 + + mov r15, psum16x16 + movdqa xmm1, xmm5 + psrldq xmm1, 8 + paddd xmm5, xmm1 + movd [r15], xmm5 + add dword psum16x16, 4 + + movdqa xmm5, xmm4 + psrldq xmm5, 8 + paddd xmm4, xmm5 + movdqa xmm3, xmm4 + psrldq xmm3, 4 + paddd xmm4, xmm3 + + mov r15, psqsum16x16 + movd [r15], xmm4 + add dword psqsum16x16, 4 + + add r14,16 + sub r0, r13 + sub r1, r13 + add r0, 16 + add r1, 16 + + dec r2 + jnz var_width_loop + + pop r2 + %assign push_num push_num-1 + mov r0, r11 + mov r1, r12 + add r0, r13 + add r1, r13 + dec r3 + jnz var_height_loop + + mov r15, psadframe + movdqa xmm5, xmm7 + psrldq xmm7, 8 + paddd xmm7, xmm5 + movd [r15], xmm7 + + POP_XMM + pop r15 + pop r14 + pop r13 + pop r12 +%assign push_num 0 +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef psum16x16 +%undef psqsum16x16 +%undef tmp_esi +%undef tmp_edi +%undef pushsize +%undef localsize + ret + +%endif + +%ifdef X86_32 + +;************************************************************************************************************* +;void VAACalcSadSsd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride,int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, int32_t *psqdiff16x16) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadSsd_sse2 +%define localsize 12 +%define cur_data esp + pushsize + localsize + 4 +%define ref_data esp + pushsize + localsize + 8 +%define iPicWidth esp + pushsize + localsize + 12 +%define iPicHeight esp + pushsize + localsize + 16 +%define iPicStride esp + pushsize + localsize + 20 +%define psadframe esp + pushsize + localsize + 24 +%define psad8x8 esp + pushsize + localsize + 28 +%define psum16x16 esp + pushsize + localsize + 32 +%define psqsum16x16 esp + pushsize + localsize + 36 +%define psqdiff16x16 esp + pushsize + localsize + 40 +%define tmp_esi esp + 0 +%define tmp_edi esp + 4 +%define tmp_sadframe esp + 8 +%define pushsize 16 + push ebp + push esi + push edi + push ebx + sub esp, localsize + + mov ecx, [iPicWidth] + mov ecx, [iPicHeight] + mov esi, [cur_data] + mov edi, [ref_data] + mov ebx, [iPicStride] + mov edx, [psad8x8] + mov eax, ebx + + shr dword [iPicWidth], 4 ; iPicWidth/16 + shr dword [iPicHeight], 4 ; iPicHeight/16 + shl eax, 4 ; iPicStride*16 + mov ecx, [iPicWidth] + mov ecx, [iPicHeight] + pxor xmm0, xmm0 + movd [tmp_sadframe], xmm0 +sqdiff_height_loop: + mov ecx, dword [iPicWidth] + mov [tmp_esi], esi + mov [tmp_edi], edi +sqdiff_width_loop: + pxor xmm7, xmm7 ; hiQuad_loQuad pSad8x8 + pxor xmm6, xmm6 ; pSum16x16 + pxor xmm5, xmm5 ; sqsum_16x16 four dword + pxor xmm4, xmm4 ; sqdiff_16x16 four Dword + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + movdqa xmm1, xmm7 + movd [edx], xmm7 + psrldq xmm7, 8 + paddd xmm1, xmm7 + movd [edx+4], xmm7 + movd ebp, xmm1 + add [tmp_sadframe], ebp + + pxor xmm7, xmm7 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 esi,edi,ebx + movdqa xmm1, xmm7 + movd [edx+8], xmm7 + psrldq xmm7, 8 + paddd xmm1, xmm7 + movd [edx+12], xmm7 + movd ebp, xmm1 + add [tmp_sadframe], ebp + + mov ebp, [psum16x16] + movdqa xmm1, xmm6 + psrldq xmm1, 8 + paddd xmm6, xmm1 + movd [ebp], xmm6 + add dword [psum16x16], 4 + + mov ebp, [psqsum16x16] + pshufd xmm6, xmm5, 14 ;00001110 + paddd xmm6, xmm5 + pshufd xmm5, xmm6, 1 ;00000001 + paddd xmm5, xmm6 + movd [ebp], xmm5 + add dword [psqsum16x16], 4 + + mov ebp, [psqdiff16x16] + pshufd xmm5, xmm4, 14 ; 00001110 + paddd xmm5, xmm4 + pshufd xmm4, xmm5, 1 ; 00000001 + paddd xmm4, xmm5 + movd [ebp], xmm4 + add dword [psqdiff16x16], 4 + + add edx, 16 + sub esi, eax + sub edi, eax + add esi, 16 + add edi, 16 + + dec ecx + jnz sqdiff_width_loop + + mov esi, [tmp_esi] + mov edi, [tmp_edi] + add esi, eax + add edi, eax + + dec dword [iPicHeight] + jnz sqdiff_height_loop + + mov ebx, [tmp_sadframe] + mov eax, [psadframe] + mov [eax], ebx + + add esp, localsize + pop ebx + pop edi + pop esi + pop ebp +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef psum16x16 +%undef psqsum16x16 +%undef psqdiff16x16 +%undef tmp_esi +%undef tmp_edi +%undef tmp_sadframe +%undef pushsize +%undef localsize + ret + +%else + + +;************************************************************************************************************* +;void VAACalcSadSsd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride,int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, int32_t *psqdiff16x16) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadSsd_sse2 +%define localsize 12 +%define cur_data arg1;r0 +%define ref_data arg2;r1 +%define iPicWidth arg3;r2 +%define iPicHeight arg4;r3 +%define iPicStride arg5; +%define psadframe arg6; +%define psad8x8 arg7; +%define psum16x16 arg8; +%define psqsum16x16 arg9; +%define psqdiff16x16 arg10 + + push r12 + push r13 + push r14 + push r15 + %assign push_num 4 + PUSH_XMM 10 + +%ifdef WIN64 + mov r4,arg5 +%endif + mov r14,arg7 + SIGN_EXTENSION r2,r2d + SIGN_EXTENSION r3,r3d + SIGN_EXTENSION r4,r4d + + mov r13,r4 + shr r2,4 ; iPicWidth/16 + shr r3,4 ; iPicHeight/16 + shl r13,4 ; iPicStride*16 + pxor xmm0, xmm0 + pxor xmm8, xmm8 ;framesad + pxor xmm9, xmm9 +sqdiff_height_loop: + ;mov ecx, dword [iPicWidth] + ;mov r14,r2 + push r2 + %assign push_num push_num +1 + mov r10, r0 + mov r11, r1 +sqdiff_width_loop: + pxor xmm7, xmm7 ; hiQuad_loQuad pSad8x8 + pxor xmm6, xmm6 ; pSum16x16 + pxor xmm5, xmm5 ; sqsum_16x16 four dword + pxor xmm4, xmm4 ; sqdiff_16x16 four Dword + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + movdqa xmm1, xmm7 + movd [r14], xmm7 + psrldq xmm7, 8 + paddd xmm1, xmm7 + movd [r14+4], xmm7 + movd r15d, xmm1 + movd xmm9, r15d + paddd xmm8,xmm9 + + + pxor xmm7, xmm7 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + WELS_SAD_SUM_SQSUM_SQDIFF_16x1_SSE2 r0,r1,r4 + movdqa xmm1, xmm7 + movd [r14+8], xmm7 + psrldq xmm7, 8 + paddd xmm1, xmm7 + movd [r14+12], xmm7 + movd r15d, xmm1 + movd xmm9, r15d + paddd xmm8,xmm9 + + mov r15, psum16x16 + movdqa xmm1, xmm6 + psrldq xmm1, 8 + paddd xmm6, xmm1 + movd [r15], xmm6 + add dword psum16x16, 4 + + mov r15, psqsum16x16 + pshufd xmm6, xmm5, 14 ;00001110 + paddd xmm6, xmm5 + pshufd xmm5, xmm6, 1 ;00000001 + paddd xmm5, xmm6 + movd [r15], xmm5 + add dword psqsum16x16, 4 + + mov r15, psqdiff16x16 + pshufd xmm5, xmm4, 14 ; 00001110 + paddd xmm5, xmm4 + pshufd xmm4, xmm5, 1 ; 00000001 + paddd xmm4, xmm5 + movd [r15], xmm4 + add dword psqdiff16x16, 4 + + add r14,16 + sub r0, r13 + sub r1, r13 + add r0, 16 + add r1, 16 + + dec r2 + jnz sqdiff_width_loop + + pop r2 + %assign push_num push_num -1 + + mov r0, r10 + mov r1, r11 + add r0, r13 + add r1, r13 + + dec r3 + jnz sqdiff_height_loop + + mov r13, psadframe + movd [r13], xmm8 + + POP_XMM + pop r15 + pop r14 + pop r13 + pop r12 + %assign push_num 0 + +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef psum16x16 +%undef psqsum16x16 +%undef psqdiff16x16 +%undef tmp_esi +%undef tmp_edi +%undef tmp_sadframe +%undef pushsize +%undef localsize + ret + + + +%endif + +%ifdef X86_32 +;************************************************************************************************************* +;void VAACalcSadBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadBgd_sse2 +%define localsize 12 +%define cur_data esp + pushsize + localsize + 4 +%define ref_data esp + pushsize + localsize + 8 +%define iPicWidth esp + pushsize + localsize + 12 +%define iPicHeight esp + pushsize + localsize + 16 +%define iPicStride esp + pushsize + localsize + 20 +%define psadframe esp + pushsize + localsize + 24 +%define psad8x8 esp + pushsize + localsize + 28 +%define p_sd8x8 esp + pushsize + localsize + 32 +%define p_mad8x8 esp + pushsize + localsize + 36 +%define tmp_esi esp + 0 +%define tmp_edi esp + 4 +%define tmp_ecx esp + 8 +%define pushsize 16 + push ebp + push esi + push edi + push ebx + sub esp, localsize + mov esi, [cur_data] + mov edi, [ref_data] + mov ebx, [iPicStride] + mov eax, ebx + + shr dword [iPicWidth], 4 ; iPicWidth/16 + shr dword [iPicHeight], 4 ; iPicHeight/16 + shl eax, 4 ; iPicStride*16 + xor ebp, ebp + pxor xmm0, xmm0 +bgd_height_loop: + mov ecx, dword [iPicWidth] + mov [tmp_esi], esi + mov [tmp_edi], edi +bgd_width_loop: + pxor xmm7, xmm7 ; pSad8x8 + pxor xmm6, xmm6 ; sum_cur_8x8 + pxor xmm5, xmm5 ; sum_ref_8x8 + pxor xmm4, xmm4 ; pMad8x8 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + + + mov edx, [p_mad8x8] + WELS_MAX_REG_SSE2 xmm4 + + ;movdqa xmm1, xmm4 + ;punpcklbw xmm1, xmm0 + ;punpcklwd xmm1, xmm0 + ;movd [edx], xmm1 + ;punpckhbw xmm4, xmm0 + ;punpcklwd xmm4, xmm0 + ;movd [edx+4], xmm4 + ;add edx, 8 + ;mov [p_mad8x8], edx + mov [tmp_ecx], ecx + movhlps xmm1, xmm4 + movd ecx, xmm4 + mov [edx], cl + movd ecx, xmm1 + mov [edx+1],cl + add edx, 2 + mov [p_mad8x8], edx + + + pslldq xmm7, 4 + pslldq xmm6, 4 + pslldq xmm5, 4 + + + pxor xmm4, xmm4 ; pMad8x8 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,esi ,edi, ebx + + mov edx, [p_mad8x8] + WELS_MAX_REG_SSE2 xmm4 + + ;movdqa xmm1, xmm4 + ;punpcklbw xmm1, xmm0 + ;punpcklwd xmm1, xmm0 + ;movd [edx], xmm1 + ;punpckhbw xmm4, xmm0 + ;punpcklwd xmm4, xmm0 + ;movd [edx+4], xmm4 + ;add edx, 8 + ;mov [p_mad8x8], edx + movhlps xmm1, xmm4 + movd ecx, xmm4 + mov [edx], cl + movd ecx, xmm1 + mov [edx+1],cl + add edx, 2 + mov [p_mad8x8], edx + + ; data in xmm7, xmm6, xmm5: D1 D3 D0 D2 + + mov edx, [psad8x8] + pshufd xmm1, xmm7, 10001101b ; D3 D2 D1 D0 + movdqa [edx], xmm1 + add edx, 16 + mov [psad8x8], edx ; sad8x8 + + paddd xmm1, xmm7 ; D1+3 D3+2 D0+1 D2+0 + pshufd xmm2, xmm1, 00000011b + paddd xmm1, xmm2 + movd edx, xmm1 + add ebp, edx ; sad frame + + mov edx, [p_sd8x8] + psubd xmm6, xmm5 + pshufd xmm1, xmm6, 10001101b + movdqa [edx], xmm1 + add edx, 16 + mov [p_sd8x8], edx + + + add edx, 16 + sub esi, eax + sub edi, eax + add esi, 16 + add edi, 16 + + mov ecx, [tmp_ecx] + dec ecx + jnz bgd_width_loop + + mov esi, [tmp_esi] + mov edi, [tmp_edi] + add esi, eax + add edi, eax + + dec dword [iPicHeight] + jnz bgd_height_loop + + mov edx, [psadframe] + mov [edx], ebp + + add esp, localsize + pop ebx + pop edi + pop esi + pop ebp +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef p_sd8x8 +%undef p_mad8x8 +%undef tmp_esi +%undef tmp_edi +%undef pushsize +%undef localsize + ret + + + +;************************************************************************************************************* +;void VAACalcSadSsdBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, +; int32_t *psqdiff16x16, int32_t *p_sd8x8, uint8_t *p_mad8x8) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadSsdBgd_sse2 +%define localsize 16 +%define cur_data esp + pushsize + localsize + 4 +%define ref_data esp + pushsize + localsize + 8 +%define iPicWidth esp + pushsize + localsize + 12 +%define iPicHeight esp + pushsize + localsize + 16 +%define iPicStride esp + pushsize + localsize + 20 +%define psadframe esp + pushsize + localsize + 24 +%define psad8x8 esp + pushsize + localsize + 28 +%define psum16x16 esp + pushsize + localsize + 32 +%define psqsum16x16 esp + pushsize + localsize + 36 +%define psqdiff16x16 esp + pushsize + localsize + 40 +%define p_sd8x8 esp + pushsize + localsize + 44 +%define p_mad8x8 esp + pushsize + localsize + 48 +%define tmp_esi esp + 0 +%define tmp_edi esp + 4 +%define tmp_sadframe esp + 8 +%define tmp_ecx esp + 12 +%define pushsize 16 + push ebp + push esi + push edi + push ebx + sub esp, localsize + mov esi, [cur_data] + mov edi, [ref_data] + mov ebx, [iPicStride] + mov eax, ebx + + shr dword [iPicWidth], 4 ; iPicWidth/16 + shr dword [iPicHeight], 4 ; iPicHeight/16 + shl eax, 4 ; iPicStride*16 + pxor xmm0, xmm0 + movd [tmp_sadframe], xmm0 +sqdiff_bgd_height_loop: + mov ecx, dword [iPicWidth] + mov [tmp_esi], esi + mov [tmp_edi], edi +sqdiff_bgd_width_loop: + pxor xmm7, xmm7 ; pSad8x8 interleaves sqsum16x16: sqsum1 sad1 sqsum0 sad0 + pxor xmm6, xmm6 ; sum_8x8 interleaves cur and pRef in Dword, Sref1 Scur1 Sref0 Scur0 + pxor xmm5, xmm5 ; pMad8x8 + pxor xmm4, xmm4 ; sqdiff_16x16 four Dword + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + + mov edx, [psad8x8] + movdqa xmm2, xmm7 + pshufd xmm1, xmm2, 00001110b + movd [edx], xmm2 + movd [edx+4], xmm1 + add edx, 8 + mov [psad8x8], edx ; sad8x8 + + paddd xmm1, xmm2 + movd edx, xmm1 + add [tmp_sadframe], edx ; iFrameSad + + mov edx, [psum16x16] + movdqa xmm1, xmm6 + pshufd xmm2, xmm1, 00001110b + paddd xmm1, xmm2 + movd [edx], xmm1 ; sum + + mov edx, [p_sd8x8] + pshufd xmm1, xmm6, 11110101b ; Sref1 Sref1 Sref0 Sref0 + psubd xmm6, xmm1 ; 00 diff1 00 diff0 + pshufd xmm1, xmm6, 00001000b ; xx xx diff1 diff0 + movq [edx], xmm1 + add edx, 8 + mov [p_sd8x8], edx + + mov edx, [p_mad8x8] + WELS_MAX_REG_SSE2 xmm5 + ;movdqa xmm1, xmm5 + ;punpcklbw xmm1, xmm0 + ;punpcklwd xmm1, xmm0 + ;movd [edx], xmm1 + ;punpckhbw xmm5, xmm0 + ;punpcklwd xmm5, xmm0 + ;movd [edx+4], xmm5 + ;add edx, 8 + ;mov [p_mad8x8], edx + mov [tmp_ecx], ecx + movhlps xmm1, xmm5 + movd ecx, xmm5 + mov [edx], cl + movd ecx, xmm1 + mov [edx+1],cl + add edx, 2 + mov [p_mad8x8], edx + + psrlq xmm7, 32 + psllq xmm7, 32 ; clear sad + pxor xmm6, xmm6 ; sum_8x8 interleaves cur and pRef in Dword, Sref1 Scur1 Sref0 Scur0 + pxor xmm5, xmm5 ; pMad8x8 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, esi , edi , ebx + + mov edx, [psad8x8] + movdqa xmm2, xmm7 + pshufd xmm1, xmm2, 00001110b + movd [edx], xmm2 + movd [edx+4], xmm1 + add edx, 8 + mov [psad8x8], edx ; sad8x8 + + paddd xmm1, xmm2 + movd edx, xmm1 + add [tmp_sadframe], edx ; iFrameSad + + mov edx, [psum16x16] + movdqa xmm1, xmm6 + pshufd xmm2, xmm1, 00001110b + paddd xmm1, xmm2 + movd ebp, xmm1 ; sum + add [edx], ebp + add edx, 4 + mov [psum16x16], edx + + mov edx, [psqsum16x16] + psrlq xmm7, 32 + pshufd xmm2, xmm7, 00001110b + paddd xmm2, xmm7 + movd [edx], xmm2 ; sqsum + add edx, 4 + mov [psqsum16x16], edx + + mov edx, [p_sd8x8] + pshufd xmm1, xmm6, 11110101b ; Sref1 Sref1 Sref0 Sref0 + psubd xmm6, xmm1 ; 00 diff1 00 diff0 + pshufd xmm1, xmm6, 00001000b ; xx xx diff1 diff0 + movq [edx], xmm1 + add edx, 8 + mov [p_sd8x8], edx + + mov edx, [p_mad8x8] + WELS_MAX_REG_SSE2 xmm5 + ;movdqa xmm1, xmm5 + ;punpcklbw xmm1, xmm0 + ;punpcklwd xmm1, xmm0 + ;movd [edx], xmm1 + ;punpckhbw xmm5, xmm0 + ;punpcklwd xmm5, xmm0 + ;movd [edx+4], xmm5 + ;add edx, 8 + ;mov [p_mad8x8], edx + movhlps xmm1, xmm5 + movd ecx, xmm5 + mov [edx], cl + movd ecx, xmm1 + mov [edx+1],cl + add edx, 2 + mov [p_mad8x8], edx + + mov edx, [psqdiff16x16] + pshufd xmm1, xmm4, 00001110b + paddd xmm4, xmm1 + pshufd xmm1, xmm4, 00000001b + paddd xmm4, xmm1 + movd [edx], xmm4 + add edx, 4 + mov [psqdiff16x16], edx + + add edx, 16 + sub esi, eax + sub edi, eax + add esi, 16 + add edi, 16 + + mov ecx, [tmp_ecx] + dec ecx + jnz sqdiff_bgd_width_loop + + mov esi, [tmp_esi] + mov edi, [tmp_edi] + add esi, eax + add edi, eax + + dec dword [iPicHeight] + jnz sqdiff_bgd_height_loop + + mov edx, [psadframe] + mov ebp, [tmp_sadframe] + mov [edx], ebp + + add esp, localsize + pop ebx + pop edi + pop esi + pop ebp +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef psum16x16 +%undef psqsum16x16 +%undef psqdiff16x16 +%undef p_sd8x8 +%undef p_mad8x8 +%undef tmp_esi +%undef tmp_edi +%undef pushsize +%undef localsize + ret +%else + +;************************************************************************************************************* +;void VAACalcSadBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadBgd_sse2 +%define cur_data arg1; +%define ref_data arg2; +%define iPicWidth arg3; +%define iPicHeight arg4; +%define iPicStride arg5; +%define psadframe arg6; +%define psad8x8 arg7; +%define p_sd8x8 arg8; +%define p_mad8x8 arg9; + + push r12 + push r13 + push r14 + push r15 +%assign push_num 4 + PUSH_XMM 10 +%ifdef WIN64 + mov r4,arg5 + ; mov r5,arg6 +%endif + mov r14,arg7 + SIGN_EXTENSION r2,r2d + SIGN_EXTENSION r3,r3d + SIGN_EXTENSION r4,r4d + + + mov r13,r4 + mov r15,r0 + shr r2,4 + shr r3,4 + shl r13,4 + pxor xmm0, xmm0 + pxor xmm8, xmm8 + pxor xmm9, xmm9 +bgd_height_loop: + ;mov ecx, dword [iPicWidth] + push r2 + %assign push_num push_num+1 + mov r10, r15 + mov r11, r1 +bgd_width_loop: + pxor xmm7, xmm7 ; pSad8x8 + pxor xmm6, xmm6 ; sum_cur_8x8 + pxor xmm5, xmm5 ; sum_ref_8x8 + pxor xmm4, xmm4 ; pMad8x8 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + + + mov r14, p_mad8x8 + WELS_MAX_REG_SSE2 xmm4 + + ;mov [tmp_ecx], ecx + movhlps xmm1, xmm4 + movd r0d, xmm4 + + + mov [r14], r0b + movd r0d, xmm1 + mov [r14+1],r0b + add r14, 2 + ;mov p_mad8x8, r14 + + + pslldq xmm7, 4 + pslldq xmm6, 4 + pslldq xmm5, 4 + + + pxor xmm4, xmm4 ; pMad8x8 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + WELS_SAD_SD_MAD_16x1_SSE2 xmm7, xmm6, xmm5, xmm4 ,r15 ,r1, r4 + + ;mov r14, [p_mad8x8] + WELS_MAX_REG_SSE2 xmm4 + + movhlps xmm1, xmm4 + movd r0d, xmm4 + mov [r14], r0b + movd r0d, xmm1 + mov [r14+1],r0b + add r14, 2 + mov p_mad8x8, r14 + + ; data in xmm7, xmm6, xmm5: D1 D3 D0 D2 + + mov r14, psad8x8 + pshufd xmm1, xmm7, 10001101b ; D3 D2 D1 D0 + movdqa [r14], xmm1 + add r14, 16 + mov psad8x8, r14 ; sad8x8 + + paddd xmm1, xmm7 ; D1+3 D3+2 D0+1 D2+0 + pshufd xmm2, xmm1, 00000011b + paddd xmm1, xmm2 + movd r14d, xmm1 + movd xmm9, r14d + paddd xmm8, xmm9 ; sad frame + + mov r14, p_sd8x8 + psubd xmm6, xmm5 + pshufd xmm1, xmm6, 10001101b + movdqa [r14], xmm1 + add r14, 16 + mov p_sd8x8, r14 + + + ;add edx, 16 + sub r15, r13 + sub r1, r13 + add r15, 16 + add r1, 16 + + + dec r2 + jnz bgd_width_loop + pop r2 +%assign push_num push_num-1 + mov r15, r10 + mov r1, r11 + add r15, r13 + add r1, r13 + + dec r3 + jnz bgd_height_loop + + mov r13, psadframe + movd [r13], xmm8 + + POP_XMM + pop r15 + pop r14 + pop r13 + pop r12 +%assign push_num 0 +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef p_sd8x8 +%undef p_mad8x8 +%undef tmp_esi +%undef tmp_edi +%undef pushsize +%undef localsize + ret + + + +;************************************************************************************************************* +;void VAACalcSadSsdBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, +; int32_t *psqdiff16x16, int32_t *p_sd8x8, uint8_t *p_mad8x8) +;************************************************************************************************************* + + +WELS_EXTERN VAACalcSadSsdBgd_sse2 +%define cur_data arg1; +%define ref_data arg2; +%define iPicWidth arg3; +%define iPicHeight arg4; +%define iPicStride arg5; +%define psadframe arg6; +%define psad8x8 arg7; +%define psum16x16 arg8; +%define psqsum16x16 arg9; +%define psqdiff16x16 arg10; +%define p_sd8x8 arg11 +%define p_mad8x8 arg12 + + push r12 + push r13 + push r14 + push r15 +%assign push_num 4 + PUSH_XMM 10 +%ifdef WIN64 + mov r4,arg5 + ;mov r5,arg6 +%endif + SIGN_EXTENSION r2,r2d + SIGN_EXTENSION r3,r3d + SIGN_EXTENSION r4,r4d + + mov r13,r4 + shr r2, 4 ; iPicWidth/16 + shr r3, 4 ; iPicHeight/16 + shl r13, 4 ; iPicStride*16 + pxor xmm0, xmm0 + pxor xmm8, xmm8 + pxor xmm9, xmm9 + + +sqdiff_bgd_height_loop: + mov r10, r0 + mov r11, r1 + push r2 +%assign push_num push_num+1 +sqdiff_bgd_width_loop: + + pxor xmm7, xmm7 ; pSad8x8 interleaves sqsum16x16: sqsum1 sad1 sqsum0 sad0 + pxor xmm6, xmm6 ; sum_8x8 interleaves cur and pRef in Dword, Sref1 Scur1 Sref0 Scur0 + pxor xmm5, xmm5 ; pMad8x8 + pxor xmm4, xmm4 ; sqdiff_16x16 four Dword + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + + mov r14, psad8x8 + movdqa xmm2, xmm7 + pshufd xmm1, xmm2, 00001110b + movd [r14], xmm2 + movd [r14+4], xmm1 + add r14, 8 + mov psad8x8, r14 ; sad8x8 + + paddd xmm1, xmm2 + movd r14d, xmm1 + movd xmm9,r14d + paddd xmm8, xmm9 ; iFrameSad + + mov r14, psum16x16 + movdqa xmm1, xmm6 + pshufd xmm2, xmm1, 00001110b + paddd xmm1, xmm2 + movd [r14], xmm1 ; sum + + mov r14, p_sd8x8 + pshufd xmm1, xmm6, 11110101b ; Sref1 Sref1 Sref0 Sref0 + psubd xmm6, xmm1 ; 00 diff1 00 diff0 + pshufd xmm1, xmm6, 00001000b ; xx xx diff1 diff0 + movq [r14], xmm1 + add r14, 8 + mov p_sd8x8, r14 + + mov r14, p_mad8x8 + WELS_MAX_REG_SSE2 xmm5 + + movhlps xmm1, xmm5 + push r0 + movd r0d, xmm5 + mov [r14], r0b + movd r0d, xmm1 + mov [r14+1],r0b + pop r0 + add r14, 2 + mov p_mad8x8, r14 + + psrlq xmm7, 32 + psllq xmm7, 32 ; clear sad + pxor xmm6, xmm6 ; sum_8x8 interleaves cur and pRef in Dword, Sref1 Scur1 Sref0 Scur0 + pxor xmm5, xmm5 ; pMad8x8 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + WELS_SAD_BGD_SQDIFF_16x1_SSE2 xmm7, xmm6, xmm5, xmm4, r0 , r1 , r4 + + mov r14, psad8x8 + movdqa xmm2, xmm7 + pshufd xmm1, xmm2, 00001110b + movd [r14], xmm2 + movd [r14+4], xmm1 + add r14, 8 + mov psad8x8, r14 ; sad8x8 + + paddd xmm1, xmm2 + movd r14d, xmm1 + movd xmm9, r14d + paddd xmm8, xmm9 ; iFrameSad + + mov r14, psum16x16 + movdqa xmm1, xmm6 + pshufd xmm2, xmm1, 00001110b + paddd xmm1, xmm2 + movd r15d, xmm1 ; sum + add [r14], r15d + add r14, 4 + mov psum16x16, r14 + + mov r14, psqsum16x16 + psrlq xmm7, 32 + pshufd xmm2, xmm7, 00001110b + paddd xmm2, xmm7 + movd [r14], xmm2 ; sqsum + add r14, 4 + mov psqsum16x16, r14 + + mov r14, p_sd8x8 + pshufd xmm1, xmm6, 11110101b ; Sref1 Sref1 Sref0 Sref0 + psubd xmm6, xmm1 ; 00 diff1 00 diff0 + pshufd xmm1, xmm6, 00001000b ; xx xx diff1 diff0 + movq [r14], xmm1 + add r14, 8 + mov p_sd8x8, r14 + + mov r14, p_mad8x8 + WELS_MAX_REG_SSE2 xmm5 + + + movhlps xmm1, xmm5 + push r0 + movd r0d, xmm5 + mov [r14], r0b + movd r0d, xmm1 + mov [r14+1],r0b + pop r0 + add r14, 2 + mov p_mad8x8, r14 + + mov r14, psqdiff16x16 + pshufd xmm1, xmm4, 00001110b + paddd xmm4, xmm1 + pshufd xmm1, xmm4, 00000001b + paddd xmm4, xmm1 + movd [r14], xmm4 + add r14, 4 + mov psqdiff16x16, r14 + + add r14, 16 + sub r0, r13 + sub r1, r13 + add r0, 16 + add r1, 16 + + dec r2 + jnz sqdiff_bgd_width_loop + pop r2 + %assign push_num push_num-1 + mov r0, r10 + mov r1, r11 + add r0, r13 + add r1, r13 + + dec r3 + jnz sqdiff_bgd_height_loop + + mov r14, psadframe + movd [r14], xmm8 + + POP_XMM + pop r15 + pop r14 + pop r13 + pop r12 +%assign push_num 0 +%undef cur_data +%undef ref_data +%undef iPicWidth +%undef iPicHeight +%undef iPicStride +%undef psadframe +%undef psad8x8 +%undef psum16x16 +%undef psqsum16x16 +%undef psqdiff16x16 +%undef p_sd8x8 +%undef p_mad8x8 +%undef tmp_esi +%undef tmp_edi +%undef pushsize +%undef localsize + ret +%endif + +%ifdef X86_32 +%define ptrword dword +%else +%define ptrword qword +%endif + +%define xmm_width 16 +%define ymm_width 32 + +%macro PUSHM 1-* + %rep %0 + push %1 + %rotate 1 + %endrep + %assign push_num push_num + %0 +%endmacro + +%macro POPM 1-* + %rep %0 + %rotate -1 + pop %1 + %endrep + %assign push_num push_num - %0 +%endmacro + +%ifdef X86_32 +%define stack_alloc_min 4 +%else +%define stack_alloc_min 8 +%endif + +; Allocate aligned stack space. +; address_out=%1 size=%2 alignment=%3 +%macro STACK_ALLOC 3 +%if (%3) & ((%3) - 1) + %error non-power-of-2 alignment requested. +%endif +%if (%3) > 0 + %assign stack_alloc_align ((%3) + stack_alloc_min - 1) / stack_alloc_min +%else + %assign stack_alloc_align 1 +%endif + %assign stack_alloc_num ((%2) + stack_alloc_min - 1) / stack_alloc_min + stack_alloc_align - 1 + %assign push_num push_num + stack_alloc_num + sub r7, stack_alloc_min * stack_alloc_num +%if stack_alloc_align == 1 + mov %1, r7 +%else + lea %1, [r7 + stack_alloc_min * (stack_alloc_align - 1)] + and %1, -(stack_alloc_min * stack_alloc_align) +%endif +%endmacro + +; Deallocate stack space allocated with STACK_ALLOC. +%macro STACK_DEALLOC 0 + add r7, stack_alloc_min * stack_alloc_num + %assign push_num push_num - stack_alloc_num +%endmacro + +%ifdef HAVE_AVX2 +; Max unsigned byte per quadword +; out=%1 in=%2 tmp=%3 +%macro AVX2_Maxubq 3 + vpsrlq %3, %2, 32 + vpmaxub %1, %2, %3 + vpsrlq %3, %1, 16 + vpmaxub %1, %1, %3 + vpsrlq %3, %1, 8 + vpmaxub %1, %1, %3 +%endmacro + +; Max unsigned byte per quadword. 2 register input. +; Results interleaved as least significant byte of even/odd doublewords. +; out=%1 in_a=%2 in_b=%3 tmp=%4 +%macro AVX2_Maxubq2 4 + vpblendd %4, %2, %3, 10101010b + vpshufd %4, %4, 10110001b + vpblendd %1, %2, %3, 01010101b + vpmaxub %1, %4, %1 + vpsrld %4, %1, 16 + vpmaxub %1, %1, %4 + vpsrld %4, %1, 8 + vpmaxub %1, %1, %4 +%endmacro + +; res=%1 src=%2 zero=%3 tmp=%4 add_to_res=%5 +%macro AVX2_Sqsumbdw 5 + vpunpcklbw %4, %2, %3 +%if %5 + vpmaddwd %4, %4, %4 + vpaddd %1, %1, %4 +%else + vpmaddwd %1, %4, %4 +%endif + vpunpckhbw %4, %2, %3 + vpmaddwd %4, %4, %4 + vpaddd %1, %1, %4 +%endmacro + +; res=%1 src=%2 zero=%3 tmp=%4 add_to_res=%5 +%macro AVX2_Sumbdw 5 +%if %5 + vpsadbw %4, %2, %3 + vpaddd %1, %1, %4 +%else + vpsadbw %1, %2, %3 +%endif +%endmacro + +; res=%1 a=%2 b=%3 a=%4 tmp=%5 +%macro AVX2_AbsDiffub 5 + vpsubusb %5, %2, %3 + vpsubusb %1, %3, %4 + vpor %1, %5, %1 +%endmacro + +; sad=%1 cur_data=%2 ref_data=%3 tmp=%4 accumulate_results=%5 +%macro AVX2_Sadbdw 5 +%if %5 + vpsadbw %4, %2, %3 + vpaddd %1, %1, %4 +%else + vpsadbw %1, %2, %3 +%endif +%endmacro + +; sad=%1 sum_cur=%2 sqsum_cur=%3 cur_data=%4 ref_data=%5 zero=%6 tmp=%7 accumulate_results=%8 +%macro AVX2_SadSumSqsumbdw 8 + AVX2_Sadbdw %1, %4, %5, %7, %8 + AVX2_Sumbdw %2, %4, %6, %7, %8 + AVX2_Sqsumbdw %3, %4, %6, %7, %8 +%endmacro + +; sad=%1 pCur=%2 pRef=%3 tmp=%4 accumulate_results=%5 +%macro AVX2_Sad 5 + vmovdqu %4, [%2] + AVX2_Sadbdw %1, %4, [%3], %4, %5 +%endmacro + +; sad=%1 sum_cur=%2 sqsum_cur=%3 pCur=%4 pRef=%5 zero=%6 tmp=%7,%8 accumulate_results=%9 +%macro AVX2_SadSumSqsum 9 + vmovdqu %7, [%4] + AVX2_SadSumSqsumbdw %1, %2, %3, %7, [%5], %6, %8, %9 +%endmacro + +; sad=%1 sum_cur=%2 sqsum_cur=%3 sqdiff=%4 pCur=%5 pRef=%6 zero=%7 tmp=%8,%9,%10 accumulate_results=%11 +%macro AVX2_SadSumSqsumSqdiff 11 + vmovdqu %8, [%5] + vmovdqu %9, [%6] + AVX2_SadSumSqsumbdw %1, %2, %3, %8, %9, %7, %10, %11 + AVX2_AbsDiffub %9, %8, %9, %8, %10 + AVX2_Sqsumbdw %4, %9, %7, %10, %11 +%endmacro + +; sad=%1 sum_cur=%2 sum_ref=%3 mad=%4 pCur=%5 pRef=%6 zero=%7 tmp=%8,%9,%10 accumulate_results=%11 +%macro AVX2_SadSdMad 11 + vmovdqu %8, [%5] + vmovdqu %9, [%6] + AVX2_Sumbdw %2, %8, %7, %10, %11 + AVX2_Sumbdw %3, %9, %7, %10, %11 + AVX2_Sadbdw %1, %8, %9, %10, %11 +%if %11 + AVX2_AbsDiffub %9, %8, %9, %8, %10 + vpmaxub %4, %4, %9 +%else + AVX2_AbsDiffub %4, %8, %9, %8, %10 +%endif +%endmacro + +; sad=%1 sum_cur=%2 sum_ref=%3 mad=%4 sqdiff=%5 sqsum_cur=%6 pCur=%7 pRef=%8 zero=%9 tmp=%10,%11,%12 accumulate_results=%13 +%macro AVX2_SadBgdSqdiff 13 +%ifidn %12, 0 + vmovdqu %10, [%7] + AVX2_Sumbdw %2, %10, %9, %11, %13 + AVX2_Sqsumbdw %6, %10, %9, %11, %13 + vmovdqu %11, [%8] + AVX2_Sadbdw %1, %10, %11, %10, %13 + AVX2_Sumbdw %3, %11, %9, %10, %13 + vmovdqu %10, [%7] +%if %13 + AVX2_AbsDiffub %11, %10, %11, [%7], %10 + vpmaxub %4, %4, %11 + AVX2_Sqsumbdw %5, %11, %9, %10, %13 +%else + AVX2_AbsDiffub %4, %10, %11, [%7], %10 + AVX2_Sqsumbdw %5, %4, %9, %10, %13 +%endif +%else + vmovdqu %10, [%7] + vmovdqu %11, [%8] + AVX2_Sadbdw %1, %10, %11, %12, %13 + AVX2_Sumbdw %2, %10, %9, %12, %13 + AVX2_Sumbdw %3, %11, %9, %12, %13 + AVX2_Sqsumbdw %6, %10, %9, %12, %13 +%if %13 + AVX2_AbsDiffub %11, %10, %11, %10, %12 + vpmaxub %4, %4, %11 + AVX2_Sqsumbdw %5, %11, %9, %10, %13 +%else + AVX2_AbsDiffub %4, %10, %11, %10, %12 + AVX2_Sqsumbdw %5, %4, %9, %10, %13 +%endif +%endif +%endmacro + +; p_dst=%1 mmreg_prefix=%2 data=%3 tmp=%4 second_blocks=%5 +%macro AVX2_Store8x8Accdw 5 + vpshufd %2%4, %2%3, 1000b +%ifidni %2, x + vmovlps [%1 + 8 * %5], x%4 +%elif %5 == 0 + vmovdqu [%1], %2%4 +%else + vmovlps [%1 + 8], x%4 + vextracti128 x%4, %2%4, 1 + vmovlps [%1 + 24], x%4 +%endif +%endmacro + +; p_dst=%1 mmreg_prefix=%2 data=%3 tmp=%4 second_blocks=%5 +%macro AVX2_Store8x8Accb 5 + vpunpckhqdq %2%4, %2%3, %2%3 + vpunpcklbw %2%4, %2%3, %2%4 +%if %5 == 0 + vmovd [%1 + 0], x%4 +%ifidni %2, y + vextracti128 x%4, %2%4, 1 + vmovd [%1 + 4], x%4 +%endif +%else + vpextrw [%1 + 2], x%4, 0 +%ifidni %2, y + vextracti128 x%4, %2%4, 1 + vpextrw [%1 + 6], x%4, 0 +%endif +%endif +%endmacro + +; p_dst=%1 data=%2 tmp=%3,%4 second_blocks=%5 +%macro AVX2_Store2x8x8Accb 5 + vpunpckhqdq y%3, y%2, y%2 + vpunpcklbw y%3, y%2, y%3 + vextracti128 x%4, y%3, 1 + vpsllq x%4, x%4, 32 + vpblendd x%4, x%3, x%4, 1010b +%if %5 + vpslld x%4, x%4, 16 + vpblendw x%4, x%4, [%1], 01010101b +%endif + vmovdqu [%1], x%4 +%endmacro + +; p_dst=%1 mmreg_prefix=%2 data=%3 tmp=%4 add_to_dst=%5 +%macro AVX2_Store16x16Accdw 5 +%ifidni %2, x +%if %5 + vmovd x%4, [%1 + 0] + vpaddd x%3, x%4, x%3 +%endif + vmovd [%1 + 0], x%3 +%elif %5 == 0 + vmovd [%1 + 0], x%3 + vextracti128 x%3, %2%3, 1 + vmovd [%1 + 4], x%3 +%else + vextracti128 x%4, %2%3, 1 + vpunpckldq x%4, x%3, x%4 + vmovq x%3, [%1 + 0] + vpaddd x%3, x%3, x%4 + vmovlps [%1 + 0], x%3 +%endif +%endmacro + +; p_dst1=%1 p_dst2=%2 i_dst_offset=%3 gpr_tmp=%4 mmreg_prefix=%5 data=%6 mm_tmp=%7 add_to_dst=%8 +%macro AVX2_Store2x16x16Accdw 8 +%ifidni %5, x + mov %4, %1 +%if %8 == 0 + vmovd [%4 + %3], x%6 + mov %4, %2 + vpextrd [%4 + %3], x%6, 2 +%else + vmovd x%7, [%4 + %3] + vpaddd x%7, x%7, x%6 + vmovd [%4 + %3], x%7 + mov %4, %2 + vpbroadcastd x%7, [%4 + %3] + vpaddd x%7, x%7, x%6 + vpextrd [%4 + %3], x%7, 2 +%endif +%else + vextracti128 x%7, %5%6, 1 + vpblendd x%6, x%6, x%7, 1010b + mov %4, %1 +%if %8 == 0 + vmovlps [%4 + %3], x%6 + mov %4, %2 + vmovhps [%4 + %3], x%6 +%else + vmovq x%7, [%4 + %3] + vpaddd x%7, x%7, x%6 + vmovlps [%4 + %3], x%7 + mov %4, %2 + vpbroadcastq x%7, [%4 + %3] + vpaddd x%7, x%7, x%6 + vmovhps [%4 + %3], x%7 +%endif +%endif +%endmacro + + +; x/y-mm_prefix=%1 mm_clobber=%2,%3,%4,%5,%6 b_second_blocks=%7 +%macro AVX2_CalcSad_8Lines 7 +%define mm_tmp0 %2 +%define mm_sad %3 +%define mm_sad2 %4 +%define mm_sad3 %5 +%define mm_sad4 %6 +%define b_second_blocks %7 +%ifdef i_stride5 + %define i_stride5_ i_stride5 +%else + lea r_tmp, [5 * i_stride] + %define i_stride5_ r_tmp +%endif + ; Use multiple accumulators to shorten dependency chains and enable more parallelism. + AVX2_Sad %1 %+ mm_sad, p_cur, p_ref, %1 %+ mm_tmp0, 0 + AVX2_Sad %1 %+ mm_sad2, p_cur + 1 * i_stride, p_ref + 1 * i_stride, %1 %+ mm_tmp0, 0 + AVX2_Sad %1 %+ mm_sad3, p_cur + 2 * i_stride, p_ref + 2 * i_stride, %1 %+ mm_tmp0, 0 + AVX2_Sad %1 %+ mm_sad4, p_cur + 1 * i_stride3, p_ref + 1 * i_stride3, %1 %+ mm_tmp0, 0 + AVX2_Sad %1 %+ mm_sad, p_cur + 4 * i_stride, p_ref + 4 * i_stride, %1 %+ mm_tmp0, 1 + AVX2_Sad %1 %+ mm_sad2, p_cur + 1 * i_stride5_, p_ref + 1 * i_stride5_, %1 %+ mm_tmp0, 1 +%ifdef i_stride7 + %define i_stride7_ i_stride7 +%else + lea r_tmp, [i_stride + 2 * i_stride3] + %define i_stride7_ r_tmp +%endif + AVX2_Sad %1 %+ mm_sad3, p_cur + 2 * i_stride3, p_ref + 2 * i_stride3, %1 %+ mm_tmp0, 1 + AVX2_Sad %1 %+ mm_sad4, p_cur + 1 * i_stride7_, p_ref + 1 * i_stride7_, %1 %+ mm_tmp0, 1 +%undef i_stride5_ +%undef i_stride7_ + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + add p_cur, %1 %+ mm_width + add p_ref, %1 %+ mm_width + ; Collapse accumulators. + vpaddd %1 %+ mm_sad, %1 %+ mm_sad, %1 %+ mm_sad2 + vpaddd %1 %+ mm_sad3, %1 %+ mm_sad3, %1 %+ mm_sad4 + vpaddd %1 %+ mm_sad, %1 %+ mm_sad, %1 %+ mm_sad3 + AVX2_Store8x8Accdw p_sad8x8 + xcnt_unit * i_xcnt, %1, mm_sad, mm_tmp0, b_second_blocks + vpaddd y %+ mm_sadframe, y %+ mm_sadframe, y %+ mm_sad +%undef mm_tmp0 +%undef mm_sad +%undef mm_sad2 +%undef mm_sad3 +%undef mm_sad4 +%undef b_second_blocks +%endmacro + +;************************************************************************************************************* +;void VAACalcSad_avx2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8) +;************************************************************************************************************* + +WELS_EXTERN VAACalcSad_avx2 +%define p_sadframe ptrword arg6 +%define p_sad8x8 ptrword arg7 +%ifdef X86_32 +%define saveregs r5, r6 +%else +%define saveregs rbx, rbp, r12 +%endif + +%assign push_num 0 + LOAD_5_PARA + PUSH_XMM 7 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + PUSHM saveregs + +%define mm_zero mm0 +%define mm_sadframe mm6 + vpxor x %+ mm_zero, x %+ mm_zero, x %+ mm_zero + vmovdqa y %+ mm_sadframe, y %+ mm_zero + + and r2, -16 ; iPicWidth &= -16 + jle .done ; bail if iPicWidth < 16 + sar r3, 4 ; iPicHeight / 16 + jle .done ; bail if iPicHeight < 16 + shr r2, 2 ; iPicWidth / 4 + +%define p_cur r0 +%define p_ref r1 +%define i_xcnt r2 +%define i_ycnt ptrword arg4 +%define i_stride r4 +%define xcnt_unit 4 +%ifdef X86_32 + mov i_ycnt, r3 + mov r5, p_sad8x8 + %define i_stride3 r3 + %undef p_sad8x8 + %define p_sad8x8 r5 + %define r_tmp r6 + lea i_stride3, [3 * i_stride] +%else + mov rbp, p_sad8x8 + %define i_stride3 rbx + %define i_stride5 r12 + %define i_stride7 r6 + %undef p_sad8x8 + %define p_sad8x8 rbp + lea i_stride3, [3 * i_stride] + lea i_stride5, [5 * i_stride] + lea i_stride7, [i_stride + 2 * i_stride3] +%endif + + ; offset pointer so as to compensate for the i_xcnt offset below. + sub p_sad8x8, 4 * 16 / xcnt_unit + + push i_xcnt +%assign push_num push_num + 1 +%define i_xcnt_load ptrword [r7] + +.height_loop: + ; use end-of-line pointers so as to enable use of a negative counter as index. + lea p_sad8x8, [p_sad8x8 + xcnt_unit * i_xcnt] + ; use a negative loop counter so as to enable counting toward zero and indexing with the same counter. + neg i_xcnt + add i_xcnt, 16 / xcnt_unit + jz .width_loop_upper8_remaining16 +.width_loop_upper8: + AVX2_CalcSad_8Lines y, mm1, mm2, mm3, mm4, mm5, 0 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_upper8 + jg .width_loop_upper8_end +.width_loop_upper8_remaining16: + AVX2_CalcSad_8Lines x, mm1, mm2, mm3, mm4, mm5, 0 +.width_loop_upper8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + xor i_xcnt, i_xcnt + sub i_xcnt, i_xcnt_load + lea p_cur, [p_cur + xcnt_unit * i_xcnt] + lea p_ref, [p_ref + xcnt_unit * i_xcnt] + add i_xcnt, 16 / xcnt_unit + jz .width_loop_lower8_remaining16 +.width_loop_lower8: + AVX2_CalcSad_8Lines y, mm1, mm2, mm3, mm4, mm5, 1 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_lower8 + jg .width_loop_lower8_end +.width_loop_lower8_remaining16: + AVX2_CalcSad_8Lines x, mm1, mm2, mm3, mm4, mm5, 1 +.width_loop_lower8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + xor i_xcnt, i_xcnt + sub i_xcnt, i_xcnt_load + lea p_cur, [p_cur + xcnt_unit * i_xcnt] + lea p_ref, [p_ref + xcnt_unit * i_xcnt] + neg i_xcnt + sub i_ycnt, 1 + jnz .height_loop + + pop i_xcnt +%assign push_num push_num - 1 +%undef i_xcnt_load + +.done: + mov r6, p_sadframe + vextracti128 xmm2, y %+ mm_sadframe, 1 + vpaddd xmm2, x %+ mm_sadframe, xmm2 + vpunpckhqdq xmm1, xmm2, xmm2 + vpaddd xmm2, xmm2, xmm1 + vmovd [r6], xmm2 + vzeroupper + + POPM saveregs + POP_XMM + LOAD_5_PARA_POP +%undef p_cur +%undef p_ref +%undef i_xcnt +%undef i_ycnt +%undef i_stride +%undef r_tmp +%undef xcnt_unit +%undef i_stride3 +%undef i_stride5 +%undef i_stride7 +%undef mm_sadframe +%undef mm_zero +%undef saveregs +%undef p_sadframe +%undef p_sad8x8 + ret + + +; x/y-mm_prefix=%1 mm_clobber=%2,%3,%4,%5,%6 b_second_blocks=%7 +%macro AVX2_CalcSadVar_8Lines 7 +%define mm_tmp0 %2 +%define mm_tmp1 %3 +%define mm_sad %4 +%define mm_sum %5 +%define mm_sqsum %6 +%define b_second_blocks %7 + ; Unroll for better performance on Haswell. + ; Avoid unrolling for the 16 px case so as to reduce the code footprint. +%ifidni %1, y + lea r_tmp, [5 * i_stride] + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur, p_ref, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 0 + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + 1 * i_stride, p_ref + 1 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + 2 * i_stride, p_ref + 2 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + 1 * i_stride3, p_ref + 1 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + 4 * i_stride, p_ref + 4 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + lea r_tmp, [i_stride + 2 * i_stride3] + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + 2 * i_stride3, p_ref + 2 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + add p_cur, %1 %+ mm_width + add p_ref, %1 %+ mm_width +%else + vpxor x %+ mm_sad, x %+ mm_sad, x %+ mm_sad + vpxor x %+ mm_sum, x %+ mm_sum, x %+ mm_sum + vpxor x %+ mm_sqsum, x %+ mm_sqsum, x %+ mm_sqsum + lea r_tmp, [8 * i_stride] + add p_cur, r_tmp + add p_ref, r_tmp + neg r_tmp +%%loop: + AVX2_SadSumSqsum %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, 1 + add r_tmp, i_stride + jl %%loop + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + lea r_tmp, [8 * i_stride - %1 %+ mm_width] + sub p_cur, r_tmp + sub p_ref, r_tmp +%endif + AVX2_Store8x8Accdw p_sad8x8 + 4 * i_xcnt, %1, mm_sad, mm_tmp1, b_second_blocks + vpaddd y %+ mm_sadframe, y %+ mm_sadframe, y %+ mm_sad + vpunpcklqdq %1 %+ mm_tmp0, %1 %+ mm_sum, %1 %+ mm_sqsum + vpunpckhqdq %1 %+ mm_tmp1, %1 %+ mm_sum, %1 %+ mm_sqsum + vpaddd %1 %+ mm_tmp0, %1 %+ mm_tmp0, %1 %+ mm_tmp1 + vpshufd %1 %+ mm_tmp1, %1 %+ mm_tmp0, 10110001b + vpaddd %1 %+ mm_tmp0, %1 %+ mm_tmp0, %1 %+ mm_tmp1 + AVX2_Store2x16x16Accdw p_sum16x16, p_sqsum16x16, i_xcnt, r_tmp, %1, mm_tmp0, mm_tmp1, b_second_blocks +%undef mm_tmp0 +%undef mm_tmp1 +%undef mm_sad +%undef mm_sum +%undef mm_sqsum +%undef b_second_blocks +%endmacro + +;************************************************************************************************************* +;void VAACalcSadVar_avx2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16) +;************************************************************************************************************* + +WELS_EXTERN VAACalcSadVar_avx2 +%define p_sadframe ptrword arg6 +%define p_sad8x8 ptrword arg7 +%define p_sum16x16 ptrword arg8 +%define p_sqsum16x16 ptrword arg9 +%ifdef X86_32 +%define saveregs r5, r6 +%else +%define saveregs rbx, rbp, r12, r13 +%endif + +%assign push_num 0 + LOAD_5_PARA + PUSH_XMM 7 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + PUSHM saveregs + +%define mm_zero mm0 +%define mm_sadframe mm6 + vpxor x %+ mm_zero, x %+ mm_zero, x %+ mm_zero + vmovdqa y %+ mm_sadframe, y %+ mm_zero + + and r2, -16 ; iPicWidth &= -16 + jle .done ; bail if iPicWidth < 16 + sar r3, 4 ; iPicHeight / 16 + jle .done ; bail if iPicHeight < 16 + shr r2, 2 ; iPicWidth / 4 + +%define p_cur r0 +%define p_ref r1 +%define i_xcnt r2 +%define i_ycnt ptrword arg4 +%define i_stride r4 +%define r_tmp r6 +%define xcnt_unit 4 +%ifdef X86_32 + mov i_ycnt, r3 + mov r3, p_sad8x8 + %undef p_sad8x8 + %define p_sad8x8 r3 + %define i_stride3 r5 +%else + mov rbp, p_sad8x8 + mov r12, p_sum16x16 + mov r13, p_sqsum16x16 + %undef p_sad8x8 + %undef p_sum16x16 + %undef p_sqsum16x16 + %define p_sad8x8 rbp + %define p_sum16x16 r12 + %define p_sqsum16x16 r13 + %define i_stride3 rbx +%endif + lea i_stride3, [3 * i_stride] + + ; offset pointers so as to compensate for the i_xcnt offset below. + sub p_sad8x8, 4 * 16 / xcnt_unit + sub p_sum16x16, 1 * 16 / xcnt_unit + sub p_sqsum16x16, 1 * 16 / xcnt_unit + + ; use a negative loop counter so as to enable counting toward zero and indexing with the same counter. + neg i_xcnt + +.height_loop: + push i_xcnt +%assign push_num push_num + 1 +%define i_xcnt_load ptrword [r7] + ; use end-of-line pointers so as to enable use of a negative counter as index. + lea r_tmp, [xcnt_unit * i_xcnt] + sub p_sad8x8, r_tmp + sub p_sum16x16, i_xcnt + sub p_sqsum16x16, i_xcnt + add i_xcnt, 16 / xcnt_unit + jz .width_loop_upper8_remaining16 +.width_loop_upper8: + AVX2_CalcSadVar_8Lines y, mm1, mm2, mm3, mm4, mm5, 0 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_upper8 + jg .width_loop_upper8_end +.width_loop_upper8_remaining16: + AVX2_CalcSadVar_8Lines x, mm1, mm2, mm3, mm4, mm5, 0 +.width_loop_upper8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + mov i_xcnt, i_xcnt_load + lea p_cur, [p_cur + xcnt_unit * i_xcnt] + lea p_ref, [p_ref + xcnt_unit * i_xcnt] + add i_xcnt, 16 / xcnt_unit + jz .width_loop_lower8_remaining16 +.width_loop_lower8: + AVX2_CalcSadVar_8Lines y, mm1, mm2, mm3, mm4, mm5, 1 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_lower8 + jg .width_loop_lower8_end +.width_loop_lower8_remaining16: + AVX2_CalcSadVar_8Lines x, mm1, mm2, mm3, mm4, mm5, 1 +.width_loop_lower8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] +%undef i_xcnt_load + pop i_xcnt + %assign push_num push_num - 1 + lea p_cur, [p_cur + xcnt_unit * i_xcnt] + lea p_ref, [p_ref + xcnt_unit * i_xcnt] + sub i_ycnt, 1 + jnz .height_loop + +.done: + mov r_tmp, p_sadframe + vextracti128 xmm2, y %+ mm_sadframe, 1 + vpaddd xmm2, x %+ mm_sadframe, xmm2 + vpunpckhqdq xmm1, xmm2, xmm2 + vpaddd xmm2, xmm2, xmm1 + vmovd [r_tmp], xmm2 + vzeroupper + + POPM saveregs + POP_XMM + LOAD_5_PARA_POP +%undef p_cur +%undef p_ref +%undef i_xcnt +%undef i_ycnt +%undef i_stride +%undef i_stride3 +%undef r_tmp +%undef xcnt_unit +%undef mm_sadframe +%undef mm_zero +%undef saveregs +%undef p_sadframe +%undef p_sad8x8 +%undef p_sum16x16 +%undef p_sqsum16x16 + ret + + +; x/y-mm_prefix=%1 mm_clobber=%2,%3,%4,%5,%6,%7,%8 b_second_blocks=%9 +%macro AVX2_CalcSadSsd_8Lines 9 +%define mm_tmp0 %2 +%define mm_tmp1 %3 +%define mm_tmp2 %4 +%define mm_sad %5 +%define mm_sum %6 +%define mm_sqsum %7 +%define mm_sqdiff %8 +%define b_second_blocks %9 + ; Unroll for better performance on Haswell. + ; Avoid unrolling for the 16 px case so as to reduce the code footprint. +%ifidni %1, y +%ifdef i_stride5 + lea r_tmp, [i_stride + 2 * i_stride3] + %define i_stride5_ i_stride5 +%else + lea r_tmp, [5 * i_stride] + %define i_stride5_ r_tmp +%endif + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur, p_ref, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 0 + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + 1 * i_stride, p_ref + 1 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + 2 * i_stride, p_ref + 2 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + 1 * i_stride3, p_ref + 1 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + 4 * i_stride, p_ref + 4 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + 1 * i_stride5_, p_ref + 1 * i_stride5_, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 +%ifndef i_stride5 + lea r_tmp, [i_stride + 2 * i_stride3] +%endif +%undef i_stride5_ + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + 2 * i_stride3, p_ref + 2 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + add p_cur, %1 %+ mm_width + add p_ref, %1 %+ mm_width +%else + vpxor x %+ mm_sad, x %+ mm_sad, x %+ mm_sad + vpxor x %+ mm_sum, x %+ mm_sum, x %+ mm_sum + vpxor x %+ mm_sqsum, x %+ mm_sqsum, x %+ mm_sqsum + vpxor x %+ mm_sqdiff, x %+ mm_sqdiff, x %+ mm_sqdiff + lea r_tmp, [8 * i_stride] + add p_cur, r_tmp + add p_ref, r_tmp + neg r_tmp +%%loop: + AVX2_SadSumSqsumSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sqsum, %1 %+ mm_sqdiff, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + add r_tmp, i_stride + jl %%loop + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + lea r_tmp, [8 * i_stride - %1 %+ mm_width] + sub p_cur, r_tmp + sub p_ref, r_tmp +%endif + mov r_tmp, p_sad8x8 + AVX2_Store8x8Accdw r_tmp + 4 * i_xcnt, %1, mm_sad, mm_tmp1, b_second_blocks +%ifdef X86_32 + vpaddd y %+ mm_tmp1, y %+ mm_sad, sadframe_acc + vmovdqa sadframe_acc, y %+ mm_tmp1 +%else + vpaddd sadframe_acc, sadframe_acc, y %+ mm_sad +%endif + mov r_tmp, i_xcnt + add r_tmp, p_sum16x16 + vpunpckhqdq %1 %+ mm_tmp1, %1 %+ mm_sum, %1 %+ mm_sum + vpaddd %1 %+ mm_tmp0, %1 %+ mm_sum, %1 %+ mm_tmp1 + AVX2_Store16x16Accdw r_tmp, %1, mm_tmp0, mm_tmp1, b_second_blocks + vpunpcklqdq %1 %+ mm_tmp0, %1 %+ mm_sqsum, %1 %+ mm_sqdiff + vpunpckhqdq %1 %+ mm_tmp1, %1 %+ mm_sqsum, %1 %+ mm_sqdiff + vpaddd %1 %+ mm_tmp0, %1 %+ mm_tmp0, %1 %+ mm_tmp1 + vpshufd %1 %+ mm_tmp1, %1 %+ mm_tmp0, 10110001b + vpaddd %1 %+ mm_tmp0, %1 %+ mm_tmp0, %1 %+ mm_tmp1 + AVX2_Store2x16x16Accdw p_sqsum16x16, p_sqdiff16x16, i_xcnt, r_tmp, %1, mm_tmp0, mm_tmp1, b_second_blocks +%undef mm_tmp0 +%undef mm_tmp1 +%undef mm_tmp2 +%undef mm_sad +%undef mm_sum +%undef mm_sqsum +%undef mm_sqdiff +%undef b_second_blocks +%endmacro + +;************************************************************************************************************* +;void VAACalcSadSsd_avx2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride,int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, int32_t *psqdiff16x16) +;************************************************************************************************************* + +WELS_EXTERN VAACalcSadSsd_avx2 +%define p_sadframe ptrword arg6 +%define p_sad8x8 ptrword arg7 +%define p_sum16x16 ptrword arg8 +%define p_sqsum16x16 ptrword arg9 +%define p_sqdiff16x16 ptrword arg10 +%ifdef X86_32 +%define saveregs r5, r6 +%else +%define saveregs rbx, rbp, r12, r13, r14, r15 +%endif + +%assign push_num 0 + LOAD_5_PARA + PUSH_XMM 9 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + PUSHM saveregs + +%define mm_zero mm0 + vpxor x %+ mm_zero, x %+ mm_zero, x %+ mm_zero + +%ifdef X86_32 + STACK_ALLOC r5, ymm_width, ymm_width + %define sadframe_acc_addr r5 + %define sadframe_acc [sadframe_acc_addr] +%else + %define sadframe_acc ymm8 + %define xsadframe_acc xmm8 +%endif + vmovdqa sadframe_acc, y %+ mm_zero + + and r2, -16 ; iPicWidth &= -16 + jle .done ; bail if iPicWidth < 16 + sar r3, 4 ; iPicHeight / 16 + jle .done ; bail if iPicHeight < 16 + shr r2, 2 ; iPicWidth / 4 + +%define p_cur r0 +%define p_ref r1 +%define i_xcnt r2 +%define i_ycnt ptrword arg4 +%define i_stride r4 +%define r_tmp r6 +%define xcnt_unit 4 +%ifdef X86_32 + mov i_ycnt, r3 + %define i_stride3 r3 +%else + mov r12, p_sad8x8 + mov r13, p_sum16x16 + mov r14, p_sqsum16x16 + mov r15, p_sqdiff16x16 + %undef p_sad8x8 + %undef p_sum16x16 + %undef p_sqsum16x16 + %undef p_sqdiff16x16 + %define p_sad8x8 r12 + %define p_sum16x16 r13 + %define p_sqsum16x16 r14 + %define p_sqdiff16x16 r15 + %define i_stride3 rbx + %define i_stride5 rbp + lea i_stride5, [5 * i_stride] +%endif + lea i_stride3, [3 * i_stride] + + ; offset pointers so as to compensate for i_xcnt offset below. + sub p_sad8x8, 4 * 16 / xcnt_unit + sub p_sum16x16, 1 * 16 / xcnt_unit + sub p_sqsum16x16, 1 * 16 / xcnt_unit + sub p_sqdiff16x16, 1 * 16 / xcnt_unit + + ; use a negative loop counter so as to enable counting toward zero and indexing with the same counter. + neg i_xcnt + +.height_loop: + push i_xcnt +%assign push_num push_num + 1 +%define i_xcnt_load ptrword [r7] + ; use end-of-line pointers so as to enable use of a negative counter as index. + lea r_tmp, [xcnt_unit * i_xcnt] + sub p_sad8x8, r_tmp + sub p_sum16x16, i_xcnt + sub p_sqsum16x16, i_xcnt + sub p_sqdiff16x16, i_xcnt + add i_xcnt, 16 / xcnt_unit + jz .width_loop_upper8_remaining16 +.width_loop_upper8: + AVX2_CalcSadSsd_8Lines y, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 0 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_upper8 + jg .width_loop_upper8_end +.width_loop_upper8_remaining16: + AVX2_CalcSadSsd_8Lines x, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 0 +.width_loop_upper8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + mov i_xcnt, i_xcnt_load + lea p_cur, [p_cur + xcnt_unit * i_xcnt] + lea p_ref, [p_ref + xcnt_unit * i_xcnt] + add i_xcnt, 16 / xcnt_unit + jz .width_loop_lower8_remaining16 +.width_loop_lower8: + AVX2_CalcSadSsd_8Lines y, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 1 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_lower8 + jg .width_loop_lower8_end +.width_loop_lower8_remaining16: + AVX2_CalcSadSsd_8Lines x, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 1 +.width_loop_lower8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] +%undef i_xcnt_load + pop i_xcnt + %assign push_num push_num - 1 + lea p_cur, [p_cur + xcnt_unit * i_xcnt] + lea p_ref, [p_ref + xcnt_unit * i_xcnt] + sub i_ycnt, 1 + jnz .height_loop + +.done: + mov r_tmp, p_sadframe +%ifdef X86_32 + vmovdqa xmm2, sadframe_acc + vpaddd xmm2, xmm2, [sadframe_acc_addr + xmm_width] +%else + vextracti128 xmm2, sadframe_acc, 1 + vpaddd xmm2, xsadframe_acc, xmm2 +%endif + vpunpckhqdq xmm1, xmm2, xmm2 + vpaddd xmm2, xmm2, xmm1 + vmovd [r_tmp], xmm2 + vzeroupper +%ifdef X86_32 + STACK_DEALLOC +%endif + POPM saveregs + POP_XMM + LOAD_5_PARA_POP +%undef p_cur +%undef p_ref +%undef i_xcnt +%undef i_ycnt +%undef i_stride +%undef i_stride3 +%undef i_stride5 +%undef r_tmp +%undef xcnt_unit +%undef sadframe_acc +%undef sadframe_acc_addr +%undef xsadframe_acc +%undef mm_zero +%undef saveregs +%undef p_sadframe +%undef p_sad8x8 +%undef p_sum16x16 +%undef p_sqsum16x16 +%undef p_sqdiff16x16 + ret + + +; x/y-mm_prefix=%1 mm_clobber=%2,%3,%4,%5,%6,%7,%8 b_second_blocks=%9 +%macro AVX2_CalcSadBgd_8Lines 9 +%define mm_tmp0 %2 +%define mm_tmp1 %3 +%define mm_tmp2 %8 +%define mm_mad %4 +%define mm_sumcur %5 +%define mm_sumref %6 +%define mm_sad %7 +%define b_second_blocks %9 + ; Unroll for better performance on Haswell. + ; Avoid unrolling for the 16 px case so as to reduce the code footprint. +%ifidni %1, y + lea r_tmp, [5 * i_stride] + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur, p_ref, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 0 + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + 1 * i_stride, p_ref + 1 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + 2 * i_stride, p_ref + 2 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + 1 * i_stride3, p_ref + 1 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + 4 * i_stride, p_ref + 4 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + lea r_tmp, [i_stride + 2 * i_stride3] + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + 2 * i_stride3, p_ref + 2 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + add p_cur, %1 %+ mm_width + add p_ref, %1 %+ mm_width +%else + vpxor x %+ mm_sad, x %+ mm_sad, x %+ mm_sad + vpxor x %+ mm_sumcur, x %+ mm_sumcur, x %+ mm_sumcur + vpxor x %+ mm_sumref, x %+ mm_sumref, x %+ mm_sumref + vpxor x %+ mm_mad, x %+ mm_mad, x %+ mm_mad + lea r_tmp, [8 * i_stride] + add p_cur, r_tmp + add p_ref, r_tmp + neg r_tmp +%%loop: + AVX2_SadSdMad %1 %+ mm_sad, %1 %+ mm_sumcur, %1 %+ mm_sumref, %1 %+ mm_mad, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, %1 %+ mm_tmp2, 1 + add r_tmp, i_stride + jl %%loop + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + lea r_tmp, [8 * i_stride - %1 %+ mm_width] + sub p_cur, r_tmp + sub p_ref, r_tmp +%endif + mov r_tmp, p_sad8x8 + AVX2_Store8x8Accdw r_tmp + 4 * i_xcnt, %1, mm_sad, mm_tmp1, b_second_blocks +%ifdef X86_32 + vpaddd y %+ mm_tmp1, y %+ mm_sad, sadframe_acc + vmovdqa sadframe_acc, y %+ mm_tmp1 +%else + vpaddd sadframe_acc, sadframe_acc, y %+ mm_sad +%endif + mov r_tmp, p_sd8x8 + vpsubd %1 %+ mm_tmp0, %1 %+ mm_sumcur, %1 %+ mm_sumref + AVX2_Store8x8Accdw r_tmp + 4 * i_xcnt, %1, mm_tmp0, mm_tmp1, b_second_blocks + ; Coalesce store and horizontal reduction of MAD accumulator for even and + ; odd iterations so as to enable more parallelism. +%ifidni %1, y + test i_xcnt, 32 / xcnt_unit + jz %%preserve_mad + mov r_tmp, p_mad8x8 + AVX2_Maxubq2 y %+ mm_mad, y %+ mm_mad, prev_mad, y %+ mm_tmp0 + AVX2_Store2x8x8Accb r_tmp + i_xcnt - 8, mm_mad, mm_tmp0, mm_tmp1, b_second_blocks +%%preserve_mad: + vmovdqa prev_mad, y %+ mm_mad +%else + mov r_tmp, p_mad8x8 + AVX2_Maxubq %1 %+ mm_mad, %1 %+ mm_mad, %1 %+ mm_tmp0 + AVX2_Store8x8Accb r_tmp + i_xcnt, %1, mm_mad, mm_tmp0, b_second_blocks +%endif +%undef mm_tmp0 +%undef mm_tmp1 +%undef mm_tmp2 +%undef mm_mad +%undef mm_sumcur +%undef mm_sumref +%undef mm_sad +%undef b_second_blocks +%endmacro + +; Store remaining MAD accumulator for width & 32 cases. +; width/xcnt_unit=%1 mm_tmp=%2,%3 b_second_blocks=%4 +%macro AVX2_StoreRemainingSingleMad 4 + test %1, 32 / xcnt_unit + jz %%skip + mov r_tmp, p_mad8x8 + vmovdqa y%2, prev_mad + AVX2_Maxubq y%2, y%2, y%3 + AVX2_Store8x8Accb r_tmp + i_xcnt - 8, y, %2, %3, %4 +%%skip: +%endmacro + +;************************************************************************************************************* +;void VAACalcSadBgd_avx2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8) +;************************************************************************************************************* + +WELS_EXTERN VAACalcSadBgd_avx2 +%define p_sadframe arg6 +%define p_sad8x8 arg7 +%define p_sd8x8 arg8 +%define p_mad8x8 arg9 +%ifdef X86_32 +%define saveregs r5, r6 +%else +%define saveregs rbx, rbp, r12, r13 +%endif + +%assign push_num 0 + LOAD_5_PARA + PUSH_XMM 10 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + PUSHM saveregs + +%define mm_zero mm0 + vpxor x %+ mm_zero, x %+ mm_zero, x %+ mm_zero + +%ifdef X86_32 + STACK_ALLOC r5, 2 * ymm_width, ymm_width + %define sadframe_acc_addr r5 + %define sadframe_acc [sadframe_acc_addr] + %define prev_mad [r5 + ymm_width] +%else + %define sadframe_acc ymm8 + %define xsadframe_acc xmm8 + %define prev_mad ymm9 +%endif + vmovdqa sadframe_acc, y %+ mm_zero + + and r2, -16 ; iPicWidth &= -16 + jle .done ; bail if iPicWidth < 16 + sar r3, 4 ; iPicHeight / 16 + jle .done ; bail if iPicHeight < 16 + shr r2, 2 ; iPicWidth / 4 + +%define p_cur r0 +%define p_ref r1 +%define i_xcnt r2 +%define i_ycnt ptrword arg4 +%define i_stride r4 +%define r_tmp r6 +%define xcnt_unit 4 +%ifdef X86_32 + mov i_ycnt, r3 + %define i_stride3 r3 +%else + mov rbp, p_sad8x8 + mov r12, p_sd8x8 + mov r13, p_mad8x8 + %undef p_sad8x8 + %undef p_sd8x8 + %undef p_mad8x8 + %define p_sad8x8 rbp + %define p_sd8x8 r12 + %define p_mad8x8 r13 + %define i_stride3 rbx +%endif + lea i_stride3, [3 * i_stride] + + ; offset pointers to compensate for the i_xcnt offset below. + mov r_tmp, i_xcnt + and r_tmp, 64 / xcnt_unit - 1 + sub p_mad8x8, r_tmp + shl r_tmp, 2 + sub p_sad8x8, r_tmp + sub p_sd8x8, r_tmp + +.height_loop: + push i_xcnt +%assign push_num push_num + 1 +%define i_xcnt_load ptrword [r7] + ; use end-of-line pointers so as to enable use of a negative counter as index. + lea r_tmp, [xcnt_unit * i_xcnt] + add p_sad8x8, r_tmp + add p_sd8x8, r_tmp + add p_mad8x8, i_xcnt + and i_xcnt, -(64 / xcnt_unit) + jz .width_loop_upper8_64x_end + ; use a negative loop counter to enable counting toward zero and indexing with the same counter. + neg i_xcnt +.width_loop_upper8: + AVX2_CalcSadBgd_8Lines y, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 0 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_upper8 + jg .width_loop_upper8_32x_end +.width_loop_upper8_64x_end: + test i_xcnt_load, 32 / xcnt_unit + jnz .width_loop_upper8 +.width_loop_upper8_32x_end: + AVX2_StoreRemainingSingleMad i_xcnt_load, mm1, mm2, 0 + test i_xcnt_load, 16 / xcnt_unit + jz .width_loop_upper8_end + ; remaining 16. + AVX2_CalcSadBgd_8Lines x, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 0 +.width_loop_upper8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + mov i_xcnt, i_xcnt_load + lea r_tmp, [xcnt_unit * i_xcnt] + sub p_cur, r_tmp + sub p_ref, r_tmp + and i_xcnt, -(64 / xcnt_unit) + jz .width_loop_lower8_64x_end + neg i_xcnt +.width_loop_lower8: + AVX2_CalcSadBgd_8Lines y, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 1 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_lower8 + jg .width_loop_lower8_32x_end +.width_loop_lower8_64x_end: + test i_xcnt_load, 32 / xcnt_unit + jnz .width_loop_lower8 +.width_loop_lower8_32x_end: + AVX2_StoreRemainingSingleMad i_xcnt_load, mm1, mm2, 1 + test i_xcnt_load, 16 / xcnt_unit + jz .width_loop_lower8_end + ; remaining 16. + AVX2_CalcSadBgd_8Lines x, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 1 +.width_loop_lower8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + pop i_xcnt +%undef i_xcnt_load + %assign push_num push_num - 1 + lea r_tmp, [xcnt_unit * i_xcnt] + sub p_cur, r_tmp + sub p_ref, r_tmp + sub i_ycnt, 1 + jnz .height_loop + +.done: + mov r_tmp, p_sadframe +%ifdef X86_32 + vmovdqa xmm2, sadframe_acc + vpaddd xmm2, xmm2, [sadframe_acc_addr + xmm_width] +%else + vextracti128 xmm2, sadframe_acc, 1 + vpaddd xmm2, xsadframe_acc, xmm2 +%endif + vpunpckhqdq xmm1, xmm2, xmm2 + vpaddd xmm2, xmm2, xmm1 + vmovd [r_tmp], xmm2 + vzeroupper +%ifdef X86_32 + STACK_DEALLOC +%endif + POPM saveregs + POP_XMM + LOAD_5_PARA_POP +%undef p_cur +%undef p_ref +%undef i_xcnt +%undef i_ycnt +%undef i_stride +%undef i_stride3 +%undef r_tmp +%undef xcnt_unit +%undef sadframe_acc +%undef sadframe_acc_addr +%undef xsadframe_acc +%undef prev_mad +%undef mm_zero +%undef saveregs +%undef p_sadframe +%undef p_sad8x8 +%undef p_sd8x8 +%undef p_mad8x8 + ret + + +; x/y-mm_prefix=%1 mm_clobber=%2,%3,%4,%5,%6,%7,%8,%9,%10 b_second_blocks=%11 +%macro AVX2_CalcSadSsdBgd_8Lines 11 +%define mm_tmp0 %2 +%define mm_tmp1 %3 +%define mm_sad %4 +%define mm_sum %5 +%define mm_sumref %6 +%define mm_mad %7 +%define mm_sqsum %8 +%define mm_sqdiff %9 +%ifidn %10, 0 +%define tmp2 0 +%else +%define tmp2 %1 %+ %10 +%endif +%define b_second_blocks %11 + ; Unroll for better performance on Haswell. + ; Avoid unrolling for the 16 px case so as to reduce the code footprint. +%ifidni %1, y + lea r_tmp, [5 * i_stride] + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur, p_ref, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 0 + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + 1 * i_stride, p_ref + 1 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + 2 * i_stride, p_ref + 2 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + 1 * i_stride3, p_ref + 1 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + 4 * i_stride, p_ref + 4 * i_stride, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + lea r_tmp, [i_stride + 2 * i_stride3] + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + 2 * i_stride3, p_ref + 2 * i_stride3, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + add p_cur, %1 %+ mm_width + add p_ref, %1 %+ mm_width +%else + vpxor x %+ mm_sad, x %+ mm_sad, x %+ mm_sad + vpxor x %+ mm_sum, x %+ mm_sum, x %+ mm_sum + vpxor x %+ mm_sumref, x %+ mm_sumref, x %+ mm_sumref + vpxor x %+ mm_mad, x %+ mm_mad, x %+ mm_mad + vpxor x %+ mm_sqsum, x %+ mm_sqsum, x %+ mm_sqsum + vpxor x %+ mm_sqdiff, x %+ mm_sqdiff, x %+ mm_sqdiff + lea r_tmp, [8 * i_stride] + add p_cur, r_tmp + add p_ref, r_tmp + neg r_tmp +%%loop: + AVX2_SadBgdSqdiff %1 %+ mm_sad, %1 %+ mm_sum, %1 %+ mm_sumref, %1 %+ mm_mad, %1 %+ mm_sqdiff, %1 %+ mm_sqsum, p_cur + r_tmp, p_ref + r_tmp, %1 %+ mm_zero, %1 %+ mm_tmp0, %1 %+ mm_tmp1, tmp2, 1 + add r_tmp, i_stride + jl %%loop + ; Increment addresses for the next iteration. Doing this early is beneficial on Haswell. + lea r_tmp, [8 * i_stride - %1 %+ mm_width] + sub p_cur, r_tmp + sub p_ref, r_tmp +%endif + mov r_tmp, p_sad8x8 + AVX2_Store8x8Accdw r_tmp + 4 * i_xcnt, %1, mm_sad, mm_tmp1, b_second_blocks +%ifdef X86_32 + vpaddd y %+ mm_tmp1, y %+ mm_sad, sadframe_acc + vmovdqa sadframe_acc, y %+ mm_tmp1 +%else + vpaddd sadframe_acc, sadframe_acc, y %+ mm_sad +%endif + mov r_tmp, i_xcnt + add r_tmp, p_sum16x16 + vpunpckhqdq %1 %+ mm_tmp1, %1 %+ mm_sum, %1 %+ mm_sum + vpaddd %1 %+ mm_tmp0, %1 %+ mm_sum, %1 %+ mm_tmp1 + AVX2_Store16x16Accdw r_tmp, %1, mm_tmp0, mm_tmp1, b_second_blocks + mov r_tmp, p_sd8x8 + vpsubd %1 %+ mm_sum, %1 %+ mm_sum, %1 %+ mm_sumref + AVX2_Store8x8Accdw r_tmp + 4 * i_xcnt, %1, mm_sum, mm_tmp0, b_second_blocks + ; Coalesce store and horizontal reduction of MAD accumulator for even and + ; odd iterations so as to enable more parallelism. +%ifidni %1, y + test i_xcnt, 32 / xcnt_unit + jz %%preserve_mad + mov r_tmp, p_mad8x8 + AVX2_Maxubq2 y %+ mm_mad, y %+ mm_mad, prev_mad, y %+ mm_tmp0 + AVX2_Store2x8x8Accb r_tmp + i_xcnt - 8, mm_mad, mm_tmp0, mm_tmp1, b_second_blocks +%%preserve_mad: + vmovdqa prev_mad, y %+ mm_mad +%else + mov r_tmp, p_mad8x8 + AVX2_Maxubq %1 %+ mm_mad, %1 %+ mm_mad, %1 %+ mm_tmp0 + AVX2_Store8x8Accb r_tmp + i_xcnt, %1, mm_mad, mm_tmp0, b_second_blocks +%endif + vpunpcklqdq %1 %+ mm_tmp0, %1 %+ mm_sqsum, %1 %+ mm_sqdiff + vpunpckhqdq %1 %+ mm_tmp1, %1 %+ mm_sqsum, %1 %+ mm_sqdiff + vpaddd %1 %+ mm_tmp0, %1 %+ mm_tmp0, %1 %+ mm_tmp1 + vpshufd %1 %+ mm_tmp1, %1 %+ mm_tmp0, 10110001b + vpaddd %1 %+ mm_tmp0, %1 %+ mm_tmp0, %1 %+ mm_tmp1 + AVX2_Store2x16x16Accdw p_sqsum16x16, p_sqdiff16x16, i_xcnt, r_tmp, %1, mm_tmp0, mm_tmp1, b_second_blocks +%undef mm_tmp0 +%undef mm_tmp1 +%undef mm_sqsum +%undef mm_sqdiff +%undef mm_mad +%undef mm_sum +%undef mm_sumref +%undef mm_sad +%undef tmp2 +%undef b_second_blocks +%endmacro + +;************************************************************************************************************* +;void VAACalcSadSsdBgd_avx2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight, +; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, +; int32_t *psqdiff16x16, int32_t *p_sd8x8, uint8_t *p_mad8x8) +;************************************************************************************************************* + +WELS_EXTERN VAACalcSadSsdBgd_avx2 +%define p_sadframe arg6 +%define p_sad8x8 arg7 +%define p_sum16x16 arg8 +%define p_sqsum16x16 arg9 +%define p_sqdiff16x16 arg10 +%define p_sd8x8 arg11 +%define p_mad8x8 arg12 +%ifdef X86_32 +%define saveregs r5, r6 +%else +%define saveregs rbx, rbp, r12, r13, r14, r15 +%endif + +%assign push_num 0 + LOAD_5_PARA + PUSH_XMM 12 + SIGN_EXTENSION r2, r2d + SIGN_EXTENSION r3, r3d + SIGN_EXTENSION r4, r4d + PUSHM saveregs + +%ifdef X86_32 + STACK_ALLOC r5, 3 * ymm_width, ymm_width + %define mm8 0 + %define sadframe_acc_addr r5 + %define sadframe_acc [sadframe_acc_addr] + %define prev_mad [r5 + ymm_width] + %define ymm_zero [r5 + 2 * ymm_width] + %define xmm_zero ymm_zero + vpxor xmm0, xmm0, xmm0 + vmovdqa sadframe_acc, ymm0 + vmovdqa ymm_zero, ymm0 +%else + %define sadframe_acc ymm9 + %define xsadframe_acc xmm9 + %define prev_mad ymm10 + %define ymm_zero ymm11 + %define xmm_zero xmm11 + vpxor xmm_zero, xmm_zero, xmm_zero + vpxor xsadframe_acc, xsadframe_acc, xsadframe_acc +%endif + + and r2, -16 ; iPicWidth &= -16 + jle .done ; bail if iPicWidth < 16 + sar r3, 4 ; iPicHeight / 16 + jle .done ; bail if iPicHeight < 16 + shr r2, 2 ; iPicWidth / 4 + +%define p_cur r0 +%define p_ref r1 +%define i_xcnt r2 +%define i_ycnt ptrword arg4 +%define i_stride r4 +%define r_tmp r6 +%define xcnt_unit 4 +%ifdef X86_32 + mov i_ycnt, r3 + %define i_stride3 r3 +%else + mov rbp, p_sad8x8 + mov r12, p_sum16x16 + mov r13, p_sqsum16x16 + mov r14, p_sqdiff16x16 + mov r15, p_sd8x8 + %undef p_sad8x8 + %undef p_sum16x16 + %undef p_sqsum16x16 + %undef p_sqdiff16x16 + %undef p_sd8x8 + %define p_sad8x8 rbp + %define p_sum16x16 r12 + %define p_sqsum16x16 r13 + %define p_sqdiff16x16 r14 + %define p_sd8x8 r15 + %define i_stride3 rbx +%endif + lea i_stride3, [3 * i_stride] + + ; offset pointers so as to compensate for the i_xcnt offset below. + mov r_tmp, i_xcnt + and r_tmp, 64 / xcnt_unit - 1 + sub p_sum16x16, r_tmp + sub p_sqsum16x16, r_tmp + sub p_sqdiff16x16, r_tmp + sub p_mad8x8, r_tmp + shl r_tmp, 2 + sub p_sad8x8, r_tmp + sub p_sd8x8, r_tmp + +.height_loop: + push i_xcnt +%assign push_num push_num + 1 +%define i_xcnt_load ptrword [r7] + ; use end-of-line pointers so as to enable use of a negative counter as index. + lea r_tmp, [xcnt_unit * i_xcnt] + add p_sad8x8, r_tmp + add p_sum16x16, i_xcnt + add p_sqsum16x16, i_xcnt + add p_sqdiff16x16, i_xcnt + add p_sd8x8, r_tmp + add p_mad8x8, i_xcnt + and i_xcnt, -(64 / xcnt_unit) + jz .width_loop_upper8_64x_end + ; use a negative loop counter to enable counting toward zero and indexing with the same counter. + neg i_xcnt +.width_loop_upper8: + AVX2_CalcSadSsdBgd_8Lines y, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, mm8, 0 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_upper8 + jg .width_loop_upper8_32x_end +.width_loop_upper8_64x_end: + test i_xcnt_load, 32 / xcnt_unit + jnz .width_loop_upper8 +.width_loop_upper8_32x_end: + AVX2_StoreRemainingSingleMad i_xcnt_load, mm1, mm2, 0 + test i_xcnt_load, 16 / xcnt_unit + jz .width_loop_upper8_end + ; remaining 16. + AVX2_CalcSadSsdBgd_8Lines x, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, mm8, 0 +.width_loop_upper8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + mov i_xcnt, i_xcnt_load + lea r_tmp, [xcnt_unit * i_xcnt] + sub p_cur, r_tmp + sub p_ref, r_tmp + and i_xcnt, -(64 / xcnt_unit) + jz .width_loop_lower8_64x_end + neg i_xcnt +.width_loop_lower8: + AVX2_CalcSadSsdBgd_8Lines y, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, mm8, 1 + add i_xcnt, 32 / xcnt_unit + jl .width_loop_lower8 + jg .width_loop_lower8_32x_end +.width_loop_lower8_64x_end: + test i_xcnt_load, 32 / xcnt_unit + jnz .width_loop_lower8 +.width_loop_lower8_32x_end: + AVX2_StoreRemainingSingleMad i_xcnt_load, mm1, mm2, 1 + test i_xcnt_load, 16 / xcnt_unit + jz .width_loop_lower8_end + ; remaining 16. + AVX2_CalcSadSsdBgd_8Lines x, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, mm8, 1 +.width_loop_lower8_end: + lea p_cur, [p_cur + 8 * i_stride] + lea p_ref, [p_ref + 8 * i_stride] + pop i_xcnt +%undef i_xcnt_load + %assign push_num push_num - 1 + lea r_tmp, [xcnt_unit * i_xcnt] + sub p_cur, r_tmp + sub p_ref, r_tmp + sub i_ycnt, 1 + jnz .height_loop + +.done: + mov r_tmp, p_sadframe +%ifdef X86_32 + vmovdqa xmm2, sadframe_acc + vpaddd xmm2, xmm2, [sadframe_acc_addr + xmm_width] +%else + vextracti128 xmm2, sadframe_acc, 1 + vpaddd xmm2, xsadframe_acc, xmm2 +%endif + vpunpckhqdq xmm1, xmm2, xmm2 + vpaddd xmm2, xmm2, xmm1 + vmovd [r_tmp], xmm2 + vzeroupper +%ifdef X86_32 + STACK_DEALLOC +%endif + POPM saveregs + POP_XMM + LOAD_5_PARA_POP +%undef p_cur +%undef p_ref +%undef i_xcnt +%undef i_ycnt +%undef i_stride +%undef i_stride3 +%undef r_tmp +%undef xcnt_unit +%undef mm8 +%undef sadframe_acc +%undef sadframe_acc_addr +%undef xsadframe_acc +%undef prev_mad +%undef ymm_zero +%undef xmm_zero +%undef saveregs +%undef p_sadframe +%undef p_sad8x8 +%undef p_sum16x16 +%undef p_sqsum16x16 +%undef p_sqdiff16x16 +%undef p_sd8x8 +%undef p_mad8x8 + ret + +%endif + diff --git a/TMessagesProj/jni/voip/CMakeLists.txt b/TMessagesProj/jni/voip/CMakeLists.txt index 27f609718..3c7db474c 100644 --- a/TMessagesProj/jni/voip/CMakeLists.txt +++ b/TMessagesProj/jni/voip/CMakeLists.txt @@ -47,7 +47,7 @@ set_target_properties(tgvoip PROPERTIES target_compile_definitions(tgvoip PUBLIC HAVE_PTHREAD __STDC_LIMIT_MACROS BSD=1 USE_KISS_FFT TGVOIP_NO_VIDEO NULL=0 SOCKLEN_T=socklen_t LOCALE_NOT_USED _LARGEFILE_SOURCE=1 _FILE_OFFSET_BITS=64 restrict= __EMX__ OPUS_BUILD FIXED_POINT USE_ALLOCA HAVE_LRINT HAVE_LRINTF) target_compile_definitions(tgvoip PUBLIC - RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE __Userspace__ SCTP_SIMPLE_ALLOCATOR SCTP_PROCESS_LEVEL_LOCKS __Userspace_os_Linux) + RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID WEBRTC_USE_H264 NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE __Userspace__ SCTP_SIMPLE_ALLOCATOR SCTP_PROCESS_LEVEL_LOCKS __Userspace_os_Linux) target_include_directories(tgvoip PUBLIC ./ voip @@ -76,6 +76,134 @@ elseif(${ANDROID_ABI} STREQUAL "x86_64") HAVE_SSE2) endif() +#openh264 +add_library(openh264 STATIC + third_party/openh264/src/codec/encoder/core/src/au_set.cpp + third_party/openh264/src/codec/encoder/core/src/deblocking.cpp + third_party/openh264/src/codec/encoder/core/src/decode_mb_aux.cpp + third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp + third_party/openh264/src/codec/encoder/core/src/encoder_data_tables.cpp + third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp + third_party/openh264/src/codec/encoder/core/src/encoder.cpp + third_party/openh264/src/codec/encoder/core/src/get_intra_predictor.cpp + third_party/openh264/src/codec/encoder/core/src/md.cpp + third_party/openh264/src/codec/encoder/core/src/mv_pred.cpp + third_party/openh264/src/codec/encoder/core/src/nal_encap.cpp + third_party/openh264/src/codec/encoder/core/src/paraset_strategy.cpp + third_party/openh264/src/codec/encoder/core/src/picture_handle.cpp + third_party/openh264/src/codec/encoder/core/src/ratectl.cpp + third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp + third_party/openh264/src/codec/encoder/core/src/sample.cpp + third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cabac.cpp + third_party/openh264/src/codec/encoder/core/src/set_mb_syn_cavlc.cpp + third_party/openh264/src/codec/encoder/core/src/slice_multi_threading.cpp + third_party/openh264/src/codec/encoder/core/src/svc_base_layer_md.cpp + third_party/openh264/src/codec/encoder/core/src/svc_enc_slice_segment.cpp + third_party/openh264/src/codec/encoder/core/src/svc_encode_mb.cpp + third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp + third_party/openh264/src/codec/encoder/core/src/svc_mode_decision.cpp + third_party/openh264/src/codec/encoder/core/src/svc_motion_estimate.cpp + third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cabac.cpp + third_party/openh264/src/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp + third_party/openh264/src/codec/encoder/core/src/wels_preprocess.cpp + third_party/openh264/src/codec/encoder/core/src/wels_task_base.cpp + third_party/openh264/src/codec/encoder/core/src/wels_task_encoder.cpp + third_party/openh264/src/codec/encoder/core/src/wels_task_management.cpp + third_party/openh264/src/codec/encoder/plus/src/welsEncoderExt.cpp + third_party/openh264/src/codec/common/src/welsCodecTrace.cpp + third_party/openh264/src/codec/common/src/common_tables.cpp + third_party/openh264/src/codec/common/src/copy_mb.cpp + third_party/openh264/src/codec/common/src/cpu.cpp + third_party/openh264/src/codec/common/src/crt_util_safe_x.cpp + third_party/openh264/src/codec/common/src/deblocking_common.cpp + third_party/openh264/src/codec/common/src/expand_pic.cpp + third_party/openh264/src/codec/common/src/intra_pred_common.cpp + third_party/openh264/src/codec/common/src/mc.cpp + third_party/openh264/src/codec/common/src/memory_align.cpp + third_party/openh264/src/codec/common/src/sad_common.cpp + third_party/openh264/src/codec/common/src/WelsTaskThread.cpp + third_party/openh264/src/codec/common/src/WelsThread.cpp + third_party/openh264/src/codec/common/src/WelsThreadLib.cpp + third_party/openh264/src/codec/common/src/WelsThreadPool.cpp + third_party/openh264/src/codec/common/src/utils.cpp + third_party/openh264/src/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp + third_party/openh264/src/codec/processing/src/backgrounddetection/BackgroundDetection.cpp + third_party/openh264/src/codec/processing/src/common/memory.cpp + third_party/openh264/src/codec/processing/src/common/WelsFrameWork.cpp + third_party/openh264/src/codec/processing/src/common/WelsFrameWorkEx.cpp + third_party/openh264/src/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp + third_party/openh264/src/codec/processing/src/denoise/denoise.cpp + third_party/openh264/src/codec/processing/src/denoise/denoise_filter.cpp + third_party/openh264/src/codec/processing/src/downsample/downsample.cpp + third_party/openh264/src/codec/processing/src/downsample/downsamplefuncs.cpp + third_party/openh264/src/codec/processing/src/imagerotate/imagerotate.cpp + third_party/openh264/src/codec/processing/src/imagerotate/imagerotatefuncs.cpp + third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.cpp + third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetection.cpp + third_party/openh264/src/codec/processing/src/scrolldetection/ScrollDetectionFuncs.cpp + third_party/openh264/src/codec/processing/src/vaacalc/vaacalcfuncs.cpp + third_party/openh264/src/codec/processing/src/vaacalc/vaacalculation.cpp) +target_compile_options(openh264 PUBLIC + -Wall -finline-functions -fno-strict-aliasing -O3 -frtti -Wno-unknown-pragmas -funroll-loops -fexceptions -fno-math-errno) +set_target_properties(openh264 PROPERTIES + ANDROID_ARM_MODE arm) +target_compile_definitions(openh264 PRIVATE + ) +target_compile_definitions(openh264 PUBLIC + ) +target_include_directories(openh264 PUBLIC + third_party/openh264/src/codec/encoder/core/inc + third_party/openh264/src/codec/encoder/plus/inc + third_party/openh264/src/codec/decoder/plus/inc + third_party/openh264/src/codec/common/inc + third_party/openh264/src/codec/api/svc + third_party/openh264/src/codec/processing/interface + third_party/openh264/src/codec/processing/src/common) + +if (${ANDROID_ABI} STREQUAL "armeabi-v7a") + target_sources(openh264 PRIVATE + third_party/openh264/src/codec/encoder/core/arm/intra_pred_neon.S + third_party/openh264/src/codec/encoder/core/arm/intra_pred_sad_3_opt_neon.S + third_party/openh264/src/codec/encoder/core/arm/memory_neon.S + third_party/openh264/src/codec/encoder/core/arm/pixel_neon.S + third_party/openh264/src/codec/encoder/core/arm/reconstruct_neon.S + third_party/openh264/src/codec/encoder/core/arm/svc_motion_estimation.S + third_party/openh264/src/codec/common/arm/copy_mb_neon.S + third_party/openh264/src/codec/common/arm/deblocking_neon.S + third_party/openh264/src/codec/common/arm/expand_picture_neon.S + third_party/openh264/src/codec/common/arm/intra_pred_common_neon.S + third_party/openh264/src/codec/common/arm/mc_neon.S + third_party/openh264/src/codec/processing/src/arm/adaptive_quantization.S + third_party/openh264/src/codec/processing/src/arm/down_sample_neon.S + third_party/openh264/src/codec/processing/src/arm/pixel_sad_neon.S + third_party/openh264/src/codec/processing/src/arm/vaa_calc_neon.S) + target_include_directories(openh264 PUBLIC + third_party/openh264/src/codec/common/arm) + target_compile_definitions(openh264 PUBLIC + HAVE_NEON=1) +elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") + target_sources(openh264 PRIVATE + third_party/openh264/src/codec/encoder/core/arm64/intra_pred_aarch64_neon.S + third_party/openh264/src/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S + third_party/openh264/src/codec/encoder/core/arm64/memory_aarch64_neon.S + third_party/openh264/src/codec/encoder/core/arm64/pixel_aarch64_neon.S + third_party/openh264/src/codec/encoder/core/arm64/reconstruct_aarch64_neon.S + third_party/openh264/src/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S + third_party/openh264/src/codec/common/arm64/copy_mb_aarch64_neon.S + third_party/openh264/src/codec/common/arm64/deblocking_aarch64_neon.S + third_party/openh264/src/codec/common/arm64/expand_picture_aarch64_neon.S + third_party/openh264/src/codec/common/arm64/intra_pred_common_aarch64_neon.S + third_party/openh264/src/codec/common/arm64/mc_aarch64_neon.S + third_party/openh264/src/codec/processing/src/arm64/adaptive_quantization_aarch64_neon.S + third_party/openh264/src/codec/processing/src/arm64/down_sample_aarch64_neon.S + third_party/openh264/src/codec/processing/src/arm64/pixel_sad_aarch64_neon.S + third_party/openh264/src/codec/processing/src/arm64/vaa_calc_aarch64_neon.S) + target_include_directories(openh264 PUBLIC + third_party/openh264/src/codec/common/arm64) + target_compile_definitions(openh264 PUBLIC + HAVE_NEON_AARCH64=1) +endif() + #rnnoise add_library(rnnoise STATIC voip/rnnoise/src/celt_lpc.c @@ -116,7 +244,7 @@ target_compile_definitions(rnnoise PRIVATE pitch_search=rnnoise_pitch_search remove_doubling=rnnoise_remove_doubling) target_compile_definitions(rnnoise PUBLIC - RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE __Userspace__ SCTP_SIMPLE_ALLOCATOR SCTP_PROCESS_LEVEL_LOCKS __Userspace_os_Linux) + RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID WEBRTC_USE_H264 NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE __Userspace__ SCTP_SIMPLE_ALLOCATOR SCTP_PROCESS_LEVEL_LOCKS __Userspace_os_Linux) target_include_directories(rnnoise PUBLIC voip/rnnoise/include) @@ -471,7 +599,7 @@ target_compile_options(tgcalls_tp PUBLIC set_target_properties(tgcalls_tp PROPERTIES ANDROID_ARM_MODE arm) target_compile_definitions(tgcalls_tp PUBLIC - RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 HAVE_PTHREAD RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE __Userspace__ SCTP_SIMPLE_ALLOCATOR SCTP_PROCESS_LEVEL_LOCKS __Userspace_os_Linux HAVE_WEBRTC_VIDEO __ANDROID__) + RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 HAVE_PTHREAD RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID WEBRTC_USE_H264 NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE __Userspace__ SCTP_SIMPLE_ALLOCATOR SCTP_PROCESS_LEVEL_LOCKS __Userspace_os_Linux HAVE_WEBRTC_VIDEO __ANDROID__) target_include_directories(tgcalls_tp PUBLIC ./ voip @@ -1999,7 +2127,7 @@ target_compile_options(tgcalls PUBLIC set_target_properties(tgcalls PROPERTIES ANDROID_ARM_MODE arm) target_compile_definitions(tgcalls PUBLIC - RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 WEBRTC_NS_FLOAT HAVE_PTHREAD RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE HAVE_WEBRTC_VIDEO) + RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 WEBRTC_NS_FLOAT HAVE_PTHREAD RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID WEBRTC_USE_H264 NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE HAVE_WEBRTC_VIDEO) target_include_directories(tgcalls PUBLIC ./ voip @@ -2087,6 +2215,7 @@ add_library(voipandroid STATIC voip/tgcalls/platform/android/VideoCapturerInterfaceImpl.cpp voip/webrtc/modules/audio_device/android/audio_manager.cc voip/webrtc/modules/audio_device/android/audio_record_jni.cc + voip/webrtc/modules/audio_device/android/audio_screen_record_jni.cc voip/webrtc/modules/audio_device/android/audio_track_jni.cc voip/webrtc/modules/audio_device/android/build_info.cc voip/webrtc/modules/audio_device/android/opensles_common.cc @@ -2171,6 +2300,7 @@ add_library(voipandroid STATIC voip/webrtc/sdk/android/src/jni/video_track.cc voip/webrtc/sdk/android/src/jni/vp8_codec.cc voip/webrtc/sdk/android/src/jni/vp9_codec.cc + voip/webrtc/sdk/android/src/jni/h264_codec.cc voip/webrtc/sdk/android/src/jni/wrapped_native_i420_buffer.cc voip/webrtc/sdk/android/src/jni/yuv_helper.cc voip/org_telegram_messenger_voip_Instance.cpp) @@ -2179,7 +2309,7 @@ target_compile_options(voipandroid PUBLIC set_target_properties(voipandroid PROPERTIES ANDROID_ARM_MODE arm) target_compile_definitions(voipandroid PUBLIC - RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 WEBRTC_NS_FLOAT HAVE_PTHREAD RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE HAVE_WEBRTC_VIDEO) + RTC_DISABLE_TRACE_EVENTS WEBRTC_OPUS_SUPPORT_120MS_PTIME=1 BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0 ABSL_ALLOCATOR_NOTHROW=1 WEBRTC_NS_FLOAT HAVE_PTHREAD RTC_ENABLE_VP9 WEBRTC_POSIX WEBRTC_LINUX WEBRTC_ANDROID WEBRTC_USE_H264 NDEBUG WEBRTC_HAVE_USRSCTP WEBRTC_HAVE_SCTP WEBRTC_APM_DEBUG_DUMP=0 WEBRTC_USE_BUILTIN_ISAC_FLOAT WEBRTC_OPUS_VARIABLE_COMPLEXITY=0 HAVE_NETINET_IN_H WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE HAVE_WEBRTC_VIDEO) target_include_directories(voipandroid PUBLIC ./ voip diff --git a/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.cpp b/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.cpp index 3d06cfdd3..1d9166e19 100644 --- a/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.cpp +++ b/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.cpp @@ -34,6 +34,7 @@ #include "audio/audio_state.h" #include "modules/audio_coding/neteq/default_neteq_factory.h" #include "modules/audio_coding/include/audio_coding_module.h" +#include "common_audio/include/audio_util.h" #include "AudioFrame.h" #include "ThreadLocalObject.h" @@ -45,6 +46,7 @@ #include "CodecSelectHelper.h" #include "StreamingPart.h" #include "AudioDeviceHelper.h" +#include "FakeAudioDeviceModule.h" #include #include @@ -134,6 +136,134 @@ static void addDefaultFeedbackParams(cricket::VideoCodec *codec) { codec->AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamNack, cricket::kRtcpFbNackParamPli)); } +struct H264FormatParameters { + std::string profileLevelId; + std::string packetizationMode; + std::string levelAssymetryAllowed; +}; + +H264FormatParameters parseH264FormatParameters(webrtc::SdpVideoFormat const &format) { + H264FormatParameters result; + + for (const auto ¶meter : format.parameters) { + if (parameter.first == "profile-level-id") { + result.profileLevelId = parameter.second; + } else if (parameter.first == "packetization-mode") { + result.packetizationMode = parameter.second; + } else if (parameter.first == "level-asymmetry-allowed") { + result.levelAssymetryAllowed = parameter.second; + } + } + + return result; +} + +static int getH264ProfileLevelIdPriority(std::string const &profileLevelId) { + if (profileLevelId == cricket::kH264ProfileLevelConstrainedHigh) { + return 0; + } else if (profileLevelId == cricket::kH264ProfileLevelConstrainedBaseline) { + return 1; + } else { + return 2; + } +} + +static int getH264PacketizationModePriority(std::string const &packetizationMode) { + if (packetizationMode == "1") { + return 0; + } else { + return 1; + } +} + +static int getH264LevelAssymetryAllowedPriority(std::string const &levelAssymetryAllowed) { + if (levelAssymetryAllowed == "1") { + return 0; + } else { + return 1; + } +} + +static std::vector filterSupportedVideoFormats(std::vector const &formats) { + std::vector filteredFormats; + + std::vector filterCodecNames = { + cricket::kVp8CodecName, + cricket::kVp9CodecName, + cricket::kH264CodecName + }; + + std::vector vp9Formats; + std::vector h264Formats; + + for (const auto &format : formats) { + if (std::find(filterCodecNames.begin(), filterCodecNames.end(), format.name) == filterCodecNames.end()) { + continue; + } + + if (format.name == cricket::kVp9CodecName) { + vp9Formats.push_back(format); + } else if (format.name == cricket::kH264CodecName) { + h264Formats.push_back(format); + } else { + filteredFormats.push_back(format); + } + } + + if (!vp9Formats.empty()) { + bool added = false; + for (const auto &format : vp9Formats) { + if (added) { + break; + } + for (const auto ¶meter : format.parameters) { + if (parameter.first == "profile-id") { + if (parameter.second == "0") { + filteredFormats.push_back(format); + added = true; + break; + } + } + } + } + + if (!added) { + filteredFormats.push_back(vp9Formats[0]); + } + } + + if (!h264Formats.empty()) { + std::sort(h264Formats.begin(), h264Formats.end(), [](const webrtc::SdpVideoFormat &lhs, const webrtc::SdpVideoFormat &rhs) { + auto lhsParameters = parseH264FormatParameters(lhs); + auto rhsParameters = parseH264FormatParameters(rhs); + + int lhsLevelIdPriority = getH264ProfileLevelIdPriority(lhsParameters.profileLevelId); + int lhsPacketizationModePriority = getH264PacketizationModePriority(lhsParameters.packetizationMode); + int lhsLevelAssymetryAllowedPriority = getH264LevelAssymetryAllowedPriority(lhsParameters.levelAssymetryAllowed); + + int rhsLevelIdPriority = getH264ProfileLevelIdPriority(rhsParameters.profileLevelId); + int rhsPacketizationModePriority = getH264PacketizationModePriority(rhsParameters.packetizationMode); + int rhsLevelAssymetryAllowedPriority = getH264LevelAssymetryAllowedPriority(rhsParameters.levelAssymetryAllowed); + + if (lhsLevelIdPriority != rhsLevelIdPriority) { + return lhsLevelIdPriority < rhsLevelIdPriority; + } + if (lhsPacketizationModePriority != rhsPacketizationModePriority) { + return lhsPacketizationModePriority < rhsPacketizationModePriority; + } + if (lhsLevelAssymetryAllowedPriority != rhsLevelAssymetryAllowedPriority) { + return lhsLevelAssymetryAllowedPriority < rhsLevelAssymetryAllowedPriority; + } + + return true; + }); + + filteredFormats.push_back(h264Formats[0]); + } + + return filteredFormats; +} + static std::vector assignPayloadTypes(std::vector const &formats) { if (formats.empty()) { return {}; @@ -148,7 +278,8 @@ static std::vector assignPayloadTypes(std::vector filterCodecNames = { cricket::kVp8CodecName, - cricket::kVp9CodecName + cricket::kVp9CodecName, + cricket::kH264CodecName, }; for (const auto &codecName : filterCodecNames) { @@ -216,6 +347,11 @@ struct VideoSsrcs { } }; +struct InternalGroupLevelValue { + GroupLevelValue value; + int64_t timestamp = 0; +}; + struct ChannelId { uint32_t networkSsrc = 0; uint32_t actualSsrc = 0; @@ -464,7 +600,8 @@ private: class VideoSinkImpl : public rtc::VideoSinkInterface { public: - VideoSinkImpl() { + VideoSinkImpl(std::string const &endpointId) : + _endpointId(endpointId) { } virtual ~VideoSinkImpl() { @@ -472,7 +609,22 @@ public: virtual void OnFrame(const webrtc::VideoFrame& frame) override { std::unique_lock lock{ _mutex }; - //_lastFrame = frame; + /*int64_t timestamp = rtc::TimeMillis(); + if (_lastFrame) { + if (_lastFrame->video_frame_buffer()->width() != frame.video_frame_buffer()->width()) { + int64_t deltaTime = std::abs(_lastFrameSizeChangeTimestamp - timestamp); + if (deltaTime < 200) { + RTC_LOG(LS_WARNING) << "VideoSinkImpl: frequent frame size change detected for " << _endpointId << ": " << _lastFrameSizeChangeHeight << " -> " << _lastFrame->video_frame_buffer()->height() << " -> " << frame.video_frame_buffer()->height() << " in " << deltaTime << " ms"; + } + + _lastFrameSizeChangeHeight = _lastFrame->video_frame_buffer()->height(); + _lastFrameSizeChangeTimestamp = timestamp; + } + } else { + _lastFrameSizeChangeHeight = 0; + _lastFrameSizeChangeTimestamp = timestamp; + } + _lastFrame = frame;*/ for (int i = (int)(_sinks.size()) - 1; i >= 0; i--) { auto strong = _sinks[i].lock(); if (!strong) { @@ -510,6 +662,9 @@ private: std::vector>> _sinks; absl::optional _lastFrame; std::mutex _mutex; + int64_t _lastFrameSizeChangeTimestamp = 0; + int _lastFrameSizeChangeHeight = 0; + std::string _endpointId; }; @@ -525,9 +680,11 @@ struct NoiseSuppressionConfiguration { #if USE_RNNOISE class AudioCapturePostProcessor : public webrtc::CustomProcessing { public: - AudioCapturePostProcessor(std::function updated, std::shared_ptr noiseSuppressionConfiguration) : + AudioCapturePostProcessor(std::function updated, std::shared_ptr noiseSuppressionConfiguration, std::vector *externalAudioSamples, webrtc::Mutex *externalAudioSamplesMutex) : _updated(updated), - _noiseSuppressionConfiguration(noiseSuppressionConfiguration) { + _noiseSuppressionConfiguration(noiseSuppressionConfiguration), + _externalAudioSamples(externalAudioSamples), + _externalAudioSamplesMutex(externalAudioSamplesMutex) { int frameSize = rnnoise_get_frame_size(); _frameSamples.resize(frameSize); @@ -633,6 +790,24 @@ private: }); } } + + _externalAudioSamplesMutex->Lock(); + if (!_externalAudioSamples->empty()) { + float *bufferData = buffer->channels()[0]; + int takenSamples = 0; + for (int i = 0; i < _externalAudioSamples->size() && i < _frameSamples.size(); i++) { + float sample = (*_externalAudioSamples)[i]; + sample += bufferData[i]; + sample = std::min(sample, 32768.f); + sample = std::max(sample, -32768.f); + bufferData[i] = sample; + takenSamples++; + } + if (takenSamples != 0) { + _externalAudioSamples->erase(_externalAudioSamples->begin(), _externalAudioSamples->begin() + takenSamples); + } + } + _externalAudioSamplesMutex->Unlock(); } virtual std::string ToString() const override { @@ -652,9 +827,64 @@ private: float _peak = 0; VadHistory _history; SparseVad _vad; + + std::vector *_externalAudioSamples = nullptr; + webrtc::Mutex *_externalAudioSamplesMutex = nullptr; }; #endif +class ExternalAudioRecorder : public FakeAudioDeviceModule::Recorder { +public: + ExternalAudioRecorder(std::vector *externalAudioSamples, webrtc::Mutex *externalAudioSamplesMutex) : + _externalAudioSamples(externalAudioSamples), + _externalAudioSamplesMutex(externalAudioSamplesMutex) { + _samples.resize(480); + } + + virtual ~ExternalAudioRecorder() { + } + + virtual AudioFrame Record() override { + AudioFrame result; + + _externalAudioSamplesMutex->Lock(); + if (!_externalAudioSamples->empty() && _externalAudioSamples->size() >= 480) { + size_t takenSamples = std::min(_samples.size(), _externalAudioSamples->size()); + webrtc::FloatS16ToS16(_externalAudioSamples->data(), takenSamples, _samples.data()); + + result.num_samples = takenSamples; + + if (takenSamples != 0) { + _externalAudioSamples->erase(_externalAudioSamples->begin(), _externalAudioSamples->begin() + takenSamples); + } + } else { + result.num_samples = 0; + } + _externalAudioSamplesMutex->Unlock(); + + result.audio_samples = _samples.data(); + result.bytes_per_sample = 2; + result.num_channels = 1; + result.samples_per_sec = 48000; + result.elapsed_time_ms = 0; + result.ntp_time_ms = 0; + + return result; + } + + virtual int32_t WaitForUs() override { + _externalAudioSamplesMutex->Lock(); + _externalAudioSamplesMutex->Unlock(); + + return 1000; + } + +private: + std::vector *_externalAudioSamples = nullptr; + webrtc::Mutex *_externalAudioSamplesMutex = nullptr; + std::vector _samples; +}; + class IncomingAudioChannel : public sigslot::has_slots<> { public: IncomingAudioChannel( @@ -724,9 +954,10 @@ public: outgoingAudioDescription.reset(); incomingAudioDescription.reset(); - std::unique_ptr audioLevelSink(new AudioSinkImpl(std::move(onAudioLevelUpdated), _ssrc, std::move(onAudioFrame))); - - _audioChannel->media_channel()->SetRawAudioSink(ssrc.networkSsrc, std::move(audioLevelSink)); + if (_ssrc.actualSsrc != 1) { + std::unique_ptr audioLevelSink(new AudioSinkImpl(std::move(onAudioLevelUpdated), _ssrc, std::move(onAudioFrame))); + _audioChannel->media_channel()->SetRawAudioSink(ssrc.networkSsrc, std::move(audioLevelSink)); + } _audioChannel->Enable(true); }); @@ -785,7 +1016,7 @@ public: _call(call), _requestedMinQuality(minQuality), _requestedMaxQuality(maxQuality) { - _videoSink.reset(new VideoSinkImpl()); + _videoSink.reset(new VideoSinkImpl(_endpointId)); _threads->getWorkerThread()->Invoke(RTC_FROM_HERE, [this, rtpTransport, &availableVideoFormats, &description, randomIdGenerator]() mutable { uint32_t mid = randomIdGenerator->GenerateId(); @@ -991,7 +1222,7 @@ public: _requestMediaChannelDescriptions(descriptor.requestMediaChannelDescriptions), _requestBroadcastPart(descriptor.requestBroadcastPart), _videoCapture(descriptor.videoCapture), - _videoCaptureSink(new VideoSinkImpl()), + _videoCaptureSink(new VideoSinkImpl("VideoCapture")), _getVideoSource(descriptor.getVideoSource), _disableIncomingChannels(descriptor.disableIncomingChannels), _useDummyChannel(descriptor.useDummyChannel), @@ -1012,6 +1243,9 @@ public: _threads->getWorkerThread()->Invoke(RTC_FROM_HERE, [this] { _workerThreadSafery = webrtc::PendingTaskSafetyFlag::Create(); }); + _threads->getNetworkThread()->Invoke(RTC_FROM_HERE, [this] { + _networkThreadSafery = webrtc::PendingTaskSafetyFlag::Create(); + }); if (_videoCapture) { assert(!_getVideoSource); @@ -1020,6 +1254,8 @@ public: generateSsrcs(); _noiseSuppressionConfiguration = std::make_shared(descriptor.initialEnableNoiseSuppression); + + _externalAudioRecorder.reset(new ExternalAudioRecorder(&_externalAudioSamples, &_externalAudioSamplesMutex)); } ~GroupInstanceCustomInternal() { @@ -1092,29 +1328,34 @@ public: strong->receiveDataChannelMessage(message); } }); + }, + [=](uint32_t ssrc, uint8_t audioLevel, bool isSpeech) { + threads->getMediaThread()->PostTask(RTC_FROM_HERE, [weak, ssrc, audioLevel, isSpeech]() { + if (const auto strong = weak.lock()) { + strong->updateSsrcAudioLevel(ssrc, audioLevel, isSpeech); + } + }); }, threads); })); + std::unique_ptr audioProcessor = nullptr; if (_videoContentType != VideoContentType::Screencast) { PlatformInterface::SharedInstance()->configurePlatformAudio(); + + #if USE_RNNOISE + audioProcessor = std::make_unique([weak, threads = _threads](GroupLevelValue const &level) { + threads->getMediaThread()->PostTask(RTC_FROM_HERE, [weak, level](){ + auto strong = weak.lock(); + if (!strong) { + return; + } + strong->_myAudioLevel = level; + }); + }, _noiseSuppressionConfiguration, &_externalAudioSamples, &_externalAudioSamplesMutex); + #endif } -#if USE_RNNOISE - auto processor = std::make_unique([weak, threads = _threads](GroupLevelValue const &level) { - threads->getMediaThread()->PostTask(RTC_FROM_HERE, [weak, level](){ - auto strong = weak.lock(); - if (!strong) { - return; - } - strong->_myAudioLevel = level; - }); - }, _noiseSuppressionConfiguration); -#endif - - _threads->getWorkerThread()->Invoke(RTC_FROM_HERE, [this -#if USE_RNNOISE - , processor = std::move(processor) -#endif + _threads->getWorkerThread()->Invoke(RTC_FROM_HERE, [this, audioProcessor = std::move(audioProcessor) ]() mutable { cricket::MediaEngineDependencies mediaDeps; mediaDeps.task_queue_factory = _taskQueueFactory.get(); @@ -1124,14 +1365,12 @@ public: mediaDeps.video_encoder_factory = PlatformInterface::SharedInstance()->makeVideoEncoderFactory(_platformContext); mediaDeps.video_decoder_factory = PlatformInterface::SharedInstance()->makeVideoDecoderFactory(_platformContext); - #if USE_RNNOISE - if (_audioLevelsUpdated) { + if (_audioLevelsUpdated && audioProcessor) { webrtc::AudioProcessingBuilder builder; - builder.SetCapturePostProcessing(std::move(processor)); + builder.SetCapturePostProcessing(std::move(audioProcessor)); mediaDeps.audio_processing = builder.Create(); } - #endif _audioDeviceModule = createAudioDeviceModule(); if (!_audioDeviceModule) { @@ -1139,7 +1378,7 @@ public: } mediaDeps.adm = _audioDeviceModule; - _availableVideoFormats = mediaDeps.video_encoder_factory->GetSupportedFormats(); + _availableVideoFormats = filterSupportedVideoFormats(mediaDeps.video_encoder_factory->GetSupportedFormats()); std::unique_ptr mediaEngine = cricket::CreateMediaEngine(std::move(mediaDeps)); @@ -1172,9 +1411,6 @@ public: _videoBitrateAllocatorFactory = webrtc::CreateBuiltinVideoBitrateAllocatorFactory(); - configureVideoParams(); - createOutgoingVideoChannel(); - if (_audioLevelsUpdated) { beginLevelsTimer(100); } @@ -1187,6 +1423,10 @@ public: addIncomingAudioChannel(ChannelId(1), true); } + if (_videoContentType == VideoContentType::Screencast) { + setIsMuted(false); + } + /*if (_videoContentType != VideoContentType::Screencast) { createOutgoingAudioChannel(); }*/ @@ -1234,7 +1474,6 @@ public: return; } - _videoSourceGroups.clear(); cricket::StreamParams videoSendStreamParams; std::vector simulcastGroupSsrcs; @@ -1255,7 +1494,6 @@ public: GroupJoinPayloadVideoSourceGroup payloadSimulcastGroup; payloadSimulcastGroup.semantics = "SIM"; payloadSimulcastGroup.ssrcs = simulcastGroupSsrcs; - _videoSourceGroups.push_back(payloadSimulcastGroup); } for (auto fidGroup : fidGroups) { @@ -1264,7 +1502,6 @@ public: GroupJoinPayloadVideoSourceGroup payloadFidGroup; payloadFidGroup.semantics = "FID"; payloadFidGroup.ssrcs = fidGroup.ssrcs; - _videoSourceGroups.push_back(payloadFidGroup); } videoSendStreamParams.cname = "cname"; @@ -1326,6 +1563,44 @@ public: rtpParameters.encodings[i].active = _outgoingVideoConstraint >= 720; } } + } else if (rtpParameters.encodings.size() == 2) { + for (int i = 0; i < (int)rtpParameters.encodings.size(); i++) { + if (i == 0) { + rtpParameters.encodings[i].min_bitrate_bps = 50000; + rtpParameters.encodings[i].max_bitrate_bps = 100000; + rtpParameters.encodings[i].scale_resolution_down_by = 2.0; + } else if (i == 1) { + rtpParameters.encodings[i].min_bitrate_bps = 200000; + rtpParameters.encodings[i].max_bitrate_bps = 900000 + 100000; + } + } + } else { + rtpParameters.encodings[0].max_bitrate_bps = (800000 + 100000) * 2; + } + + _outgoingVideoChannel->media_channel()->SetRtpSendParameters(_outgoingVideoSsrcs.simulcastLayers[0].ssrc, rtpParameters); + }); + } else { + _threads->getWorkerThread()->Invoke(RTC_FROM_HERE, [this]() { + webrtc::RtpParameters rtpParameters = _outgoingVideoChannel->media_channel()->GetRtpSendParameters(_outgoingVideoSsrcs.simulcastLayers[0].ssrc); + if (rtpParameters.encodings.size() == 3) { + for (int i = 0; i < (int)rtpParameters.encodings.size(); i++) { + if (i == 0) { + rtpParameters.encodings[i].min_bitrate_bps = 50000; + rtpParameters.encodings[i].max_bitrate_bps = 60000; + rtpParameters.encodings[i].scale_resolution_down_by = 4.0; + rtpParameters.encodings[i].active = _outgoingVideoConstraint >= 180; + } else if (i == 1) { + rtpParameters.encodings[i].max_bitrate_bps = 100000; + rtpParameters.encodings[i].max_bitrate_bps = 110000; + rtpParameters.encodings[i].scale_resolution_down_by = 2.0; + rtpParameters.encodings[i].active = _outgoingVideoConstraint >= 360; + } else if (i == 2) { + rtpParameters.encodings[i].min_bitrate_bps = 300000; + rtpParameters.encodings[i].max_bitrate_bps = 800000 + 100000; + rtpParameters.encodings[i].active = _outgoingVideoConstraint >= 720; + } + } } else if (rtpParameters.encodings.size() == 2) { for (int i = 0; i < (int)rtpParameters.encodings.size(); i++) { if (i == 0) { @@ -1377,13 +1652,12 @@ public: } void createOutgoingAudioChannel() { - if (_outgoingAudioChannel - || _videoContentType == VideoContentType::Screencast) { + if (_outgoingAudioChannel) { return; } cricket::AudioOptions audioOptions; - if (_disableOutgoingAudioProcessing) { + if (_disableOutgoingAudioProcessing || _videoContentType == VideoContentType::Screencast) { audioOptions.echo_cancellation = false; audioOptions.noise_suppression = false; audioOptions.auto_gain_control = false; @@ -1453,6 +1727,31 @@ public: void stop() { } + void updateSsrcAudioLevel(uint32_t ssrc, uint8_t audioLevel, bool isSpeech) { + float mappedLevel = ((float)audioLevel) / (float)(0x7f); + mappedLevel = (fabs(1.0f - mappedLevel)) * 1.0f; + + auto it = _audioLevels.find(ChannelId(ssrc)); + if (it != _audioLevels.end()) { + it->second.value.level = fmax(it->second.value.level, mappedLevel); + if (isSpeech) { + it->second.value.voice = true; + } + it->second.timestamp = rtc::TimeMillis(); + } else { + InternalGroupLevelValue updated; + updated.value.level = mappedLevel; + updated.value.voice = isSpeech; + updated.timestamp = rtc::TimeMillis(); + _audioLevels.insert(std::make_pair(ChannelId(ssrc), std::move(updated))); + } + + auto audioChannel = _incomingAudioChannels.find(ChannelId(ssrc)); + if (audioChannel != _incomingAudioChannels.end()) { + audioChannel->second->updateActivity(); + } + } + void beginLevelsTimer(int timeoutMs) { const auto weak = std::weak_ptr(shared_from_this()); _threads->getMediaThread()->PostDelayedTask(RTC_FROM_HERE, [weak]() { @@ -1461,10 +1760,13 @@ public: return; } + int64_t timestamp = rtc::TimeMillis(); + int64_t maxSampleTimeout = 400; + GroupLevelsUpdate levelsUpdate; levelsUpdate.updates.reserve(strong->_audioLevels.size() + 1); for (auto &it : strong->_audioLevels) { - if (it.second.level > 0.001f) { + if (it.second.value.level > 0.001f && it.second.timestamp > timestamp - maxSampleTimeout) { uint32_t effectiveSsrc = it.first.actualSsrc; if (std::find_if(levelsUpdate.updates.begin(), levelsUpdate.updates.end(), [&](GroupLevelUpdate const &item) { return item.ssrc == effectiveSsrc; @@ -1473,25 +1775,33 @@ public: } levelsUpdate.updates.push_back(GroupLevelUpdate{ effectiveSsrc, - it.second, + it.second.value, }); - if (it.second.level > 0.001f) { + if (it.second.value.level > 0.001f) { auto audioChannel = strong->_incomingAudioChannels.find(it.first); if (audioChannel != strong->_incomingAudioChannels.end()) { audioChannel->second->updateActivity(); } } + + it.second.value.level *= 0.5f; + it.second.value.voice = false; } } + auto myAudioLevel = strong->_myAudioLevel; myAudioLevel.isMuted = strong->_isMuted; levelsUpdate.updates.push_back(GroupLevelUpdate{ 0, myAudioLevel }); - strong->_audioLevels.clear(); if (strong->_audioLevelsUpdated) { strong->_audioLevelsUpdated(levelsUpdate); } + bool isSpeech = myAudioLevel.voice && !myAudioLevel.isMuted; + strong->_networkManager->perform(RTC_FROM_HERE, [isSpeech = isSpeech](GroupNetworkManager *networkManager) { + networkManager->setOutgoingVoiceActivity(isSpeech); + }); + strong->beginLevelsTimer(100); }, timeoutMs); } @@ -1793,6 +2103,9 @@ public: } void configureVideoParams() { + if (!_sharedVideoInformation) { + return; + } if (_selectedPayloadType) { // Already configured. return; @@ -1859,6 +2172,10 @@ public: codecName = cricket::kVp9CodecName; break; } + case VideoCodecName::H264: { + codecName = cricket::kH264CodecName; + break; + } default: { break; } @@ -1871,6 +2188,18 @@ public: cricket::kVp8CodecName, cricket::kVp9CodecName }; + + bool enableH264 = false; + for (const auto &payloadType : _sharedVideoInformation->payloadTypes) { + if (payloadType.name == cricket::kH264CodecName) { + enableH264 = true; + break; + } + } + if (enableH264) { + defaultCodecPriorities.insert(defaultCodecPriorities.begin(), cricket::kH264CodecName); + } + for (const auto &name : defaultCodecPriorities) { if (std::find(codecPriorities.begin(), codecPriorities.end(), name) == codecPriorities.end()) { codecPriorities.push_back(name); @@ -1921,7 +2250,7 @@ public: if (_videoContentType == VideoContentType::Screencast) { preferences.max_bitrate_bps = std::max(preferences.min_bitrate_bps, (1020 + 32) * 1000); } else { - preferences.max_bitrate_bps = std::max(preferences.min_bitrate_bps, (700 + 32) * 1000); + preferences.max_bitrate_bps = std::max(preferences.min_bitrate_bps, (1020 + 32) * 1000); } } else { preferences.min_bitrate_bps = 32000; @@ -2356,6 +2685,32 @@ public: for (int layerIndex = 0; layerIndex < numVideoSimulcastLayers; layerIndex++) { _outgoingVideoSsrcs.simulcastLayers.push_back(VideoSsrcs::SimulcastLayer(outgoingVideoSsrcBase + layerIndex * 2 + 0, outgoingVideoSsrcBase + layerIndex * 2 + 1)); } + + _videoSourceGroups.clear(); + + std::vector simulcastGroupSsrcs; + std::vector fidGroups; + for (const auto &layer : _outgoingVideoSsrcs.simulcastLayers) { + simulcastGroupSsrcs.push_back(layer.ssrc); + + cricket::SsrcGroup fidGroup(cricket::kFidSsrcGroupSemantics, { layer.ssrc, layer.fidSsrc }); + fidGroups.push_back(fidGroup); + } + if (simulcastGroupSsrcs.size() > 1) { + cricket::SsrcGroup simulcastGroup(cricket::kSimSsrcGroupSemantics, simulcastGroupSsrcs); + + GroupJoinPayloadVideoSourceGroup payloadSimulcastGroup; + payloadSimulcastGroup.semantics = "SIM"; + payloadSimulcastGroup.ssrcs = simulcastGroupSsrcs; + _videoSourceGroups.push_back(payloadSimulcastGroup); + } + + for (auto fidGroup : fidGroups) { + GroupJoinPayloadVideoSourceGroup payloadFidGroup; + payloadFidGroup.semantics = "FID"; + payloadFidGroup.ssrcs = fidGroup.ssrcs; + _videoSourceGroups.push_back(payloadFidGroup); + } } void emitJoinPayload(std::function completion) { @@ -2428,6 +2783,23 @@ public: #endif // WEBRTC_IOS } + void addExternalAudioSamples(std::vector &&samples) { + if (samples.size() % 2 != 0) { + return; + } + _externalAudioSamplesMutex.Lock(); + + size_t previousSize = _externalAudioSamples.size(); + _externalAudioSamples.resize(_externalAudioSamples.size() + samples.size() / 2); + webrtc::S16ToFloatS16((const int16_t *)samples.data(), samples.size() / 2, _externalAudioSamples.data() + previousSize); + + if (_externalAudioSamples.size() > 2 * 48000) { + _externalAudioSamples.erase(_externalAudioSamples.begin(), _externalAudioSamples.begin() + (_externalAudioSamples.size() - 2 * 48000)); + } + + _externalAudioSamplesMutex.Unlock(); + } + void setJoinResponsePayload(std::string const &payload) { RTC_LOG(LS_INFO) << formatTimestampMillis(rtc::TimeMillis()) << ": " << "setJoinResponsePayload"; @@ -2478,6 +2850,9 @@ public: networkManager->setRemoteParams(remoteIceParameters, iceCandidates, fingerprint.get()); }); + configureVideoParams(); + createOutgoingVideoChannel(); + adjustBitratePreferences(true); if (!_pendingRequestedVideo.empty()) { @@ -2554,6 +2929,12 @@ public: } void setIsMuted(bool isMuted) { + if (_videoContentType == VideoContentType::Screencast) { + if (isMuted) { + return; + } + } + if (_isMuted == isMuted) { return; } @@ -2630,7 +3011,7 @@ public: const auto weak = std::weak_ptr(shared_from_this()); std::function onAudioSinkUpdate; - if (_audioLevelsUpdated) { + /*if (_audioLevelsUpdated) { onAudioSinkUpdate = [weak, ssrc = ssrc, threads = _threads](AudioSinkImpl::Update update) { threads->getProcessThread()->PostTask(RTC_FROM_HERE, [weak, ssrc, update, threads]() { bool voice = update.vad->update(update.buffer.get()); @@ -2646,7 +3027,7 @@ public: }); }); }; - } + }*/ std::unique_ptr channel(new IncomingAudioChannel( _channelManager.get(), @@ -2857,6 +3238,14 @@ private: if (const auto result = check(_createAudioDeviceModule(_taskQueueFactory.get()))) { return result; } + } else if (_videoContentType == VideoContentType::Screencast) { +#ifdef WEBRTC_ANDROID + return check(create(webrtc::AudioDeviceModule::kAndroidScreenAudio)); +#else + FakeAudioDeviceModule::Options options; + options.num_channels = 1; + return check(FakeAudioDeviceModule::Creator(nullptr, _externalAudioRecorder, options)(_taskQueueFactory.get())); +#endif } return check(create(webrtc::AudioDeviceModule::kPlatformDefaultAudio)); } @@ -2921,7 +3310,7 @@ private: int _pendingOutgoingVideoConstraint = -1; int _pendingOutgoingVideoConstraintRequestId = 0; - std::map _audioLevels; + std::map _audioLevels; GroupLevelValue _myAudioLevel; bool _isMuted = true; @@ -2948,6 +3337,10 @@ private: absl::optional _currentRequestedBroadcastPart; int64_t _lastBroadcastPartReceivedTimestamp = 0; + std::vector _externalAudioSamples; + webrtc::Mutex _externalAudioSamplesMutex; + std::shared_ptr _externalAudioRecorder; + bool _isRtcConnected = false; bool _isBroadcastConnected = false; absl::optional _broadcastEnabledUntilRtcIsConnectedAtTimestamp; @@ -2955,6 +3348,7 @@ private: GroupNetworkState _effectiveNetworkState; rtc::scoped_refptr _workerThreadSafery; + rtc::scoped_refptr _networkThreadSafery; std::shared_ptr _platformContext; }; @@ -3062,6 +3456,12 @@ void GroupInstanceCustomImpl::setAudioInputDevice(std::string id) { }); } +void GroupInstanceCustomImpl::addExternalAudioSamples(std::vector &&samples) { + _internal->perform(RTC_FROM_HERE, [samples = std::move(samples)](GroupInstanceCustomInternal *internal) mutable { + internal->addExternalAudioSamples(std::move(samples)); + }); +} + void GroupInstanceCustomImpl::addIncomingVideoOutput(std::string const &endpointId, std::weak_ptr> sink) { _internal->perform(RTC_FROM_HERE, [endpointId, sink](GroupInstanceCustomInternal *internal) mutable { internal->addIncomingVideoOutput(endpointId, sink); diff --git a/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.h b/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.h index e11c00762..5397bc8ea 100644 --- a/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.h +++ b/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceCustomImpl.h @@ -36,6 +36,7 @@ public: void setVideoSource(std::function getVideoSource); void setAudioOutputDevice(std::string id); void setAudioInputDevice(std::string id); + void addExternalAudioSamples(std::vector &&samples); void addIncomingVideoOutput(std::string const &endpointId, std::weak_ptr> sink); diff --git a/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceImpl.h b/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceImpl.h index bff07d9bb..548ebc57c 100644 --- a/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceImpl.h +++ b/TMessagesProj/jni/voip/tgcalls/group/GroupInstanceImpl.h @@ -88,7 +88,8 @@ enum class VideoContentType { enum class VideoCodecName { VP8, - VP9 + VP9, + H264 }; class RequestMediaChannelDescriptionTask { @@ -177,6 +178,7 @@ public: virtual void setVideoSource(std::function getVideoSource) = 0; virtual void setAudioOutputDevice(std::string id) = 0; virtual void setAudioInputDevice(std::string id) = 0; + virtual void addExternalAudioSamples(std::vector &&samples) = 0; virtual void addIncomingVideoOutput(std::string const &endpointId, std::weak_ptr> sink) = 0; diff --git a/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.cpp b/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.cpp index 0d13513e5..167046858 100644 --- a/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.cpp +++ b/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.cpp @@ -14,6 +14,8 @@ #include "pc/dtls_srtp_transport.h" #include "pc/dtls_transport.h" #include "media/sctp/sctp_transport_factory.h" +#include "modules/rtp_rtcp/source/rtp_utility.h" +#include "modules/rtp_rtcp/source/byte_io.h" #include "platform/PlatformInterface.h" #include "StaticThreads.h" @@ -43,11 +45,13 @@ public: SctpDataChannelProviderInterfaceImpl( cricket::DtlsTransport *transportChannel, std::function onStateChanged, + std::function onTerminated, std::function onMessageReceived, std::shared_ptr threads ) : _threads(std::move(threads)), _onStateChanged(onStateChanged), + _onTerminated(onTerminated), _onMessageReceived(onMessageReceived) { assert(_threads->getNetworkThread()->IsCurrent()); @@ -56,6 +60,7 @@ public: _sctpTransport = _sctpTransportFactory->CreateSctpTransport(transportChannel); _sctpTransport->SignalReadyToSendData.connect(this, &SctpDataChannelProviderInterfaceImpl::sctpReadyToSendData); _sctpTransport->SignalDataReceived.connect(this, &SctpDataChannelProviderInterfaceImpl::sctpDataReceived); + _sctpTransport->SignalClosedAbruptly.connect(this, &SctpDataChannelProviderInterfaceImpl::sctpClosedAbruptly); webrtc::InternalDataChannelInit dataChannelInit; dataChannelInit.id = 0; @@ -133,6 +138,14 @@ public: _dataChannel->OnTransportReady(true); } + void sctpClosedAbruptly() { + assert(_threads->getNetworkThread()->IsCurrent()); + + if (_onTerminated) { + _onTerminated(); + } + } + void sctpDataReceived(const cricket::ReceiveDataParams& params, const rtc::CopyOnWriteBuffer& buffer) { assert(_threads->getNetworkThread()->IsCurrent()); @@ -180,6 +193,7 @@ public: private: std::shared_ptr _threads; std::function _onStateChanged; + std::function _onTerminated; std::function _onMessageReceived; std::unique_ptr _sctpTransportFactory; @@ -191,6 +205,279 @@ private: }; +enum { + kRtcpExpectedVersion = 2, + kRtcpMinHeaderLength = 4, + kRtcpMinParseLength = 8, + + kRtpExpectedVersion = 2, + kRtpMinParseLength = 12 +}; + +static void updateHeaderWithVoiceActivity(rtc::CopyOnWriteBuffer *packet, const uint8_t* ptrRTPDataExtensionEnd, const uint8_t* ptr, bool voiceActivity) { + while (ptrRTPDataExtensionEnd - ptr > 0) { + // 0 + // 0 1 2 3 4 5 6 7 + // +-+-+-+-+-+-+-+-+ + // | ID | len | + // +-+-+-+-+-+-+-+-+ + + // Note that 'len' is the header extension element length, which is the + // number of bytes - 1. + const int id = (*ptr & 0xf0) >> 4; + const int len = (*ptr & 0x0f); + ptr++; + + if (id == 0) { + // Padding byte, skip ignoring len. + continue; + } + + if (id == 15) { + RTC_LOG(LS_VERBOSE) + << "RTP extension header 15 encountered. Terminate parsing."; + return; + } + + if (ptrRTPDataExtensionEnd - ptr < (len + 1)) { + RTC_LOG(LS_WARNING) << "Incorrect one-byte extension len: " << (len + 1) + << ", bytes left in buffer: " + << (ptrRTPDataExtensionEnd - ptr); + return; + } + + if (id == 1) { // kAudioLevelUri + uint8_t audioLevel = ptr[0] & 0x7f; + bool parsedVoiceActivity = (ptr[0] & 0x80) != 0; + + if (parsedVoiceActivity != voiceActivity) { + ptrdiff_t byteOffset = ptr - packet->data(); + uint8_t *mutableBytes = packet->MutableData(); + uint8_t audioActivityBit = voiceActivity ? 0x80 : 0; + mutableBytes[byteOffset] = audioLevel | audioActivityBit; + } + return; + } + + ptr += (len + 1); + } +} + +static void readHeaderVoiceActivity(const uint8_t* ptrRTPDataExtensionEnd, const uint8_t* ptr, bool &didRead, uint8_t &audioLevel, bool &voiceActivity) { + while (ptrRTPDataExtensionEnd - ptr > 0) { + // 0 + // 0 1 2 3 4 5 6 7 + // +-+-+-+-+-+-+-+-+ + // | ID | len | + // +-+-+-+-+-+-+-+-+ + + // Note that 'len' is the header extension element length, which is the + // number of bytes - 1. + const int id = (*ptr & 0xf0) >> 4; + const int len = (*ptr & 0x0f); + ptr++; + + if (id == 0) { + // Padding byte, skip ignoring len. + continue; + } + + if (id == 15) { + RTC_LOG(LS_VERBOSE) + << "RTP extension header 15 encountered. Terminate parsing."; + return; + } + + if (ptrRTPDataExtensionEnd - ptr < (len + 1)) { + RTC_LOG(LS_WARNING) << "Incorrect one-byte extension len: " << (len + 1) + << ", bytes left in buffer: " + << (ptrRTPDataExtensionEnd - ptr); + return; + } + + if (id == 1) { // kAudioLevelUri + didRead = true; + audioLevel = ptr[0] & 0x7f; + voiceActivity = (ptr[0] & 0x80) != 0; + + return; + } + + ptr += (len + 1); + } +} + + +static void maybeUpdateRtpVoiceActivity(rtc::CopyOnWriteBuffer *packet, bool voiceActivity) { + const uint8_t *_ptrRTPDataBegin = packet->data(); + const uint8_t *_ptrRTPDataEnd = packet->data() + packet->size(); + + const ptrdiff_t length = _ptrRTPDataEnd - _ptrRTPDataBegin; + if (length < kRtpMinParseLength) { + return; + } + + // Version + const uint8_t V = _ptrRTPDataBegin[0] >> 6; + // eXtension + const bool X = ((_ptrRTPDataBegin[0] & 0x10) == 0) ? false : true; + const uint8_t CC = _ptrRTPDataBegin[0] & 0x0f; + + const uint8_t PT = _ptrRTPDataBegin[1] & 0x7f; + + const uint8_t* ptr = &_ptrRTPDataBegin[4]; + + ptr += 4; + + ptr += 4; + + if (V != kRtpExpectedVersion) { + return; + } + + const size_t CSRCocts = CC * 4; + + if ((ptr + CSRCocts) > _ptrRTPDataEnd) { + return; + } + + if (PT != 111) { + return; + } + + for (uint8_t i = 0; i < CC; ++i) { + ptr += 4; + } + + if (X) { + /* RTP header extension, RFC 3550. + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | defined by profile | length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | header extension | + | .... | + */ + const ptrdiff_t remain = _ptrRTPDataEnd - ptr; + if (remain < 4) { + return; + } + + uint16_t definedByProfile = webrtc::ByteReader::ReadBigEndian(ptr); + ptr += 2; + + // in 32 bit words + size_t XLen = webrtc::ByteReader::ReadBigEndian(ptr); + ptr += 2; + XLen *= 4; // in bytes + + if (static_cast(remain) < (4 + XLen)) { + return; + } + static constexpr uint16_t kRtpOneByteHeaderExtensionId = 0xBEDE; + if (definedByProfile == kRtpOneByteHeaderExtensionId) { + const uint8_t* ptrRTPDataExtensionEnd = ptr + XLen; + updateHeaderWithVoiceActivity(packet, ptrRTPDataExtensionEnd, ptr, voiceActivity); + } + } +} + +static void maybeReadRtpVoiceActivity(rtc::CopyOnWriteBuffer *packet, bool &didRead, uint32_t &ssrc, uint8_t &audioLevel, bool &voiceActivity) { + const uint8_t *_ptrRTPDataBegin = packet->data(); + const uint8_t *_ptrRTPDataEnd = packet->data() + packet->size(); + + const ptrdiff_t length = _ptrRTPDataEnd - _ptrRTPDataBegin; + if (length < kRtpMinParseLength) { + return; + } + + // Version + const uint8_t V = _ptrRTPDataBegin[0] >> 6; + // eXtension + const bool X = ((_ptrRTPDataBegin[0] & 0x10) == 0) ? false : true; + const uint8_t CC = _ptrRTPDataBegin[0] & 0x0f; + + const uint8_t PT = _ptrRTPDataBegin[1] & 0x7f; + + const uint8_t* ptr = &_ptrRTPDataBegin[4]; + + ptr += 4; + + ssrc = webrtc::ByteReader::ReadBigEndian(ptr); + ptr += 4; + + if (V != kRtpExpectedVersion) { + return; + } + + const size_t CSRCocts = CC * 4; + + if ((ptr + CSRCocts) > _ptrRTPDataEnd) { + return; + } + + if (PT != 111) { + return; + } + + for (uint8_t i = 0; i < CC; ++i) { + ptr += 4; + } + + if (X) { + /* RTP header extension, RFC 3550. + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | defined by profile | length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | header extension | + | .... | + */ + const ptrdiff_t remain = _ptrRTPDataEnd - ptr; + if (remain < 4) { + return; + } + + uint16_t definedByProfile = webrtc::ByteReader::ReadBigEndian(ptr); + ptr += 2; + + // in 32 bit words + size_t XLen = webrtc::ByteReader::ReadBigEndian(ptr); + ptr += 2; + XLen *= 4; // in bytes + + if (static_cast(remain) < (4 + XLen)) { + return; + } + static constexpr uint16_t kRtpOneByteHeaderExtensionId = 0xBEDE; + if (definedByProfile == kRtpOneByteHeaderExtensionId) { + const uint8_t* ptrRTPDataExtensionEnd = ptr + XLen; + readHeaderVoiceActivity(ptrRTPDataExtensionEnd, ptr, didRead, audioLevel, voiceActivity); + } + } +} + +class WrappedDtlsSrtpTransport : public webrtc::DtlsSrtpTransport { +public: + bool _voiceActivity = false; + +public: + WrappedDtlsSrtpTransport(bool rtcp_mux_enabled) : + webrtc::DtlsSrtpTransport(rtcp_mux_enabled) { + + } + + virtual ~WrappedDtlsSrtpTransport() { + } + + bool SendRtpPacket(rtc::CopyOnWriteBuffer *packet, const rtc::PacketOptions& options, int flags) override { + maybeUpdateRtpVoiceActivity(packet, _voiceActivity); + return webrtc::DtlsSrtpTransport::SendRtpPacket(packet, options, flags); + } +}; + webrtc::CryptoOptions GroupNetworkManager::getDefaulCryptoOptions() { auto options = webrtc::CryptoOptions(); options.srtp.enable_aes128_sha1_80_crypto_cipher = false; @@ -203,30 +490,32 @@ GroupNetworkManager::GroupNetworkManager( std::function transportMessageReceived, std::function dataChannelStateUpdated, std::function dataChannelMessageReceived, + std::function audioActivityUpdated, std::shared_ptr threads) : _threads(std::move(threads)), _stateUpdated(std::move(stateUpdated)), _transportMessageReceived(std::move(transportMessageReceived)), _dataChannelStateUpdated(dataChannelStateUpdated), -_dataChannelMessageReceived(dataChannelMessageReceived) { +_dataChannelMessageReceived(dataChannelMessageReceived), +_audioActivityUpdated(audioActivityUpdated) { assert(_threads->getNetworkThread()->IsCurrent()); - + _localIceParameters = PeerIceParameters(rtc::CreateRandomString(cricket::ICE_UFRAG_LENGTH), rtc::CreateRandomString(cricket::ICE_PWD_LENGTH)); - + _localCertificate = rtc::RTCCertificateGenerator::GenerateCertificate(rtc::KeyParams(rtc::KT_ECDSA), absl::nullopt); _networkMonitorFactory = PlatformInterface::SharedInstance()->createNetworkMonitorFactory(); - + _socketFactory.reset(new rtc::BasicPacketSocketFactory(_threads->getNetworkThread())); _networkManager = std::make_unique(_networkMonitorFactory.get()); _asyncResolverFactory = std::make_unique(); - - _dtlsSrtpTransport = std::make_unique(true); + + _dtlsSrtpTransport = std::make_unique(true); _dtlsSrtpTransport->SetDtlsTransports(nullptr, nullptr); _dtlsSrtpTransport->SetActiveResetSrtpParams(false); _dtlsSrtpTransport->SignalReadyToSend.connect(this, &GroupNetworkManager::DtlsReadyToSend); _dtlsSrtpTransport->SignalRtpPacketReceived.connect(this, &GroupNetworkManager::RtpPacketReceived_n); - + resetDtlsSrtpTransport(); } @@ -284,13 +573,19 @@ void GroupNetworkManager::resetDtlsSrtpTransport() { _dtlsTransport->SetDtlsRole(rtc::SSLRole::SSL_SERVER); _dtlsTransport->SetLocalCertificate(_localCertificate); - + _dtlsSrtpTransport->SetDtlsTransports(_dtlsTransport.get(), nullptr); } void GroupNetworkManager::start() { _transportChannel->MaybeStartGathering(); + restartDataChannel(); +} + +void GroupNetworkManager::restartDataChannel() { + _dataChannelStateUpdated(false); + const auto weak = std::weak_ptr(shared_from_this()); _dataChannelInterface.reset(new SctpDataChannelProviderInterfaceImpl( _dtlsTransport.get(), @@ -302,6 +597,14 @@ void GroupNetworkManager::start() { } strong->_dataChannelStateUpdated(state); }, + [weak, threads = _threads]() { + assert(threads->getNetworkThread()->IsCurrent()); + const auto strong = weak.lock(); + if (!strong) { + return; + } + strong->restartDataChannel(); + }, [weak, threads = _threads](std::string const &message) { assert(threads->getNetworkThread()->IsCurrent()); const auto strong = weak.lock(); @@ -312,26 +615,28 @@ void GroupNetworkManager::start() { }, _threads )); + + _dataChannelInterface->updateIsConnected(_isConnected); } void GroupNetworkManager::stop() { _transportChannel->SignalIceTransportStateChanged.disconnect(this); _transportChannel->SignalReadPacket.disconnect(this); - + _dtlsTransport->SignalWritableState.disconnect(this); _dtlsTransport->SignalReceivingState.disconnect(this); - + _dtlsSrtpTransport->SetDtlsTransports(nullptr, nullptr); - + _dataChannelInterface.reset(); _dtlsTransport.reset(); _transportChannel.reset(); _portAllocator.reset(); - + _localIceParameters = PeerIceParameters(rtc::CreateRandomString(cricket::ICE_UFRAG_LENGTH), rtc::CreateRandomString(cricket::ICE_PWD_LENGTH)); - + _localCertificate = rtc::RTCCertificateGenerator::GenerateCertificate(rtc::KeyParams(rtc::KT_ECDSA), absl::nullopt); - + resetDtlsSrtpTransport(); } @@ -373,6 +678,12 @@ void GroupNetworkManager::sendDataChannelMessage(std::string const &message) { } } +void GroupNetworkManager::setOutgoingVoiceActivity(bool isSpeech) { + if (_dtlsSrtpTransport) { + ((WrappedDtlsSrtpTransport *)_dtlsSrtpTransport.get())->_voiceActivity = isSpeech; + } +} + webrtc::RtpTransport *GroupNetworkManager::getRtpTransport() { return _dtlsSrtpTransport.get(); } @@ -448,6 +759,17 @@ void GroupNetworkManager::transportPacketReceived(rtc::PacketTransportInternal * } void GroupNetworkManager::RtpPacketReceived_n(rtc::CopyOnWriteBuffer *packet, int64_t packet_time_us, bool isUnresolved) { + bool didRead = false; + uint32_t ssrc = 0; + uint8_t audioLevel = 0; + bool isSpeech = false; + maybeReadRtpVoiceActivity(packet, didRead, ssrc, audioLevel, isSpeech); + if (didRead && ssrc != 0) { + if (_audioActivityUpdated) { + _audioActivityUpdated(ssrc, audioLevel, isSpeech); + } + } + if (_transportMessageReceived) { _transportMessageReceived(*packet, isUnresolved); } diff --git a/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.h b/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.h index d62289f39..eacefae0b 100644 --- a/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.h +++ b/TMessagesProj/jni/voip/tgcalls/group/GroupNetworkManager.h @@ -61,6 +61,7 @@ public: std::function transportMessageReceived, std::function dataChannelStateUpdated, std::function dataChannelMessageReceived, + std::function audioActivityUpdated, std::shared_ptr threads); ~GroupNetworkManager(); @@ -73,10 +74,13 @@ public: void sendDataChannelMessage(std::string const &message); + void setOutgoingVoiceActivity(bool isSpeech); + webrtc::RtpTransport *getRtpTransport(); private: void resetDtlsSrtpTransport(); + void restartDataChannel(); void checkConnectionTimeout(); void candidateGathered(cricket::IceTransportInternal *transport, const cricket::Candidate &candidate); void candidateGatheringState(cricket::IceTransportInternal *transport); @@ -98,6 +102,7 @@ private: std::function _transportMessageReceived; std::function _dataChannelStateUpdated; std::function _dataChannelMessageReceived; + std::function _audioActivityUpdated; std::unique_ptr _networkMonitorFactory; std::unique_ptr _socketFactory; diff --git a/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.cc b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.cc index a3aa85565..e057cc258 100644 --- a/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.cc +++ b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.cc @@ -54,8 +54,6 @@ AudioRecordJni::JavaAudioRecord::JavaAudioRecord( enable_built_in_aec_(native_reg->GetMethodId("enableBuiltInAEC", "(Z)Z")), enable_built_in_ns_(native_reg->GetMethodId("enableBuiltInNS", "(Z)Z")) {} -AudioRecordJni::JavaAudioRecord::~JavaAudioRecord() {} - int AudioRecordJni::JavaAudioRecord::InitRecording(int sample_rate, size_t channels) { return audio_record_->CallIntMethod(init_recording_, @@ -108,7 +106,7 @@ AudioRecordJni::AudioRecordJni(AudioManager* audio_manager) j_audio_record_.reset( new JavaAudioRecord(j_native_registration_.get(), j_native_registration_->NewObject( - "", "(J)V", PointerTojlong(this)))); + "", "(JZ)V", PointerTojlong(this), false))); // Detach from this thread since we want to use the checker to verify calls // from the Java based audio thread. thread_checker_java_.Detach(); diff --git a/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.h b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.h index c445360d6..32ee8fb47 100644 --- a/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.h +++ b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_record_jni.h @@ -49,7 +49,6 @@ class AudioRecordJni { public: JavaAudioRecord(NativeRegistration* native_registration, std::unique_ptr audio_track); - ~JavaAudioRecord(); int InitRecording(int sample_rate, size_t channels); bool StartRecording(); diff --git a/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_screen_record_jni.cc b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_screen_record_jni.cc new file mode 100644 index 000000000..e32922dbe --- /dev/null +++ b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_screen_record_jni.cc @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/audio_screen_record_jni.h" + +#include +#include + +#include "modules/audio_device/android/audio_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/format_macros.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { +// Scoped class which logs its time of life as a UMA statistic. It generates +// a histogram which measures the time it takes for a method/scope to execute. +class ScopedHistogramTimer { + public: + explicit ScopedHistogramTimer(const std::string& name) + : histogram_name_(name), start_time_ms_(rtc::TimeMillis()) {} + ~ScopedHistogramTimer() { + const int64_t life_time_ms = rtc::TimeSince(start_time_ms_); + RTC_HISTOGRAM_COUNTS_1000(histogram_name_, life_time_ms); + RTC_LOG(INFO) << histogram_name_ << ": " << life_time_ms; + } + + private: + const std::string histogram_name_; + int64_t start_time_ms_; +}; +} // namespace + +// AudioRecordJni::JavaAudioRecord implementation. +AudioScreenRecordJni::JavaAudioRecord::JavaAudioRecord( + NativeRegistration* native_reg, + std::unique_ptr audio_record) + : audio_record_(std::move(audio_record)), + init_recording_(native_reg->GetMethodId("initRecording", "(II)I")), + start_recording_(native_reg->GetMethodId("startRecording", "()Z")), + stop_recording_(native_reg->GetMethodId("stopRecording", "()Z")), + enable_built_in_aec_(native_reg->GetMethodId("enableBuiltInAEC", "(Z)Z")), + enable_built_in_ns_(native_reg->GetMethodId("enableBuiltInNS", "(Z)Z")) {} + +int AudioScreenRecordJni::JavaAudioRecord::InitRecording(int sample_rate, + size_t channels) { + return audio_record_->CallIntMethod(init_recording_, + static_cast(sample_rate), + static_cast(channels)); +} + +bool AudioScreenRecordJni::JavaAudioRecord::StartRecording() { + return audio_record_->CallBooleanMethod(start_recording_); +} + +bool AudioScreenRecordJni::JavaAudioRecord::StopRecording() { + return audio_record_->CallBooleanMethod(stop_recording_); +} + +bool AudioScreenRecordJni::JavaAudioRecord::EnableBuiltInAEC(bool enable) { + return audio_record_->CallBooleanMethod(enable_built_in_aec_, + static_cast(enable)); +} + +bool AudioScreenRecordJni::JavaAudioRecord::EnableBuiltInNS(bool enable) { + return audio_record_->CallBooleanMethod(enable_built_in_ns_, + static_cast(enable)); +} + +// AudioRecordJni implementation. +AudioScreenRecordJni::AudioScreenRecordJni(AudioManager* audio_manager) + : j_environment_(JVM::GetInstance()->environment()), + audio_manager_(audio_manager), + audio_parameters_(audio_manager->GetRecordAudioParameters()), + total_delay_in_milliseconds_(0), + direct_buffer_address_(nullptr), + direct_buffer_capacity_in_bytes_(0), + frames_per_buffer_(0), + initialized_(false), + recording_(false), + audio_device_buffer_(nullptr) { + RTC_LOG(INFO) << "ctor"; + RTC_DCHECK(audio_parameters_.is_valid()); + RTC_CHECK(j_environment_); + JNINativeMethod native_methods[] = { + {"nativeCacheDirectBufferAddress", "(Ljava/nio/ByteBuffer;J)V", + reinterpret_cast( + &webrtc::AudioScreenRecordJni::CacheDirectBufferAddress)}, + {"nativeDataIsRecorded", "(IJ)V", + reinterpret_cast(&webrtc::AudioScreenRecordJni::DataIsRecorded)}}; + j_native_registration_ = j_environment_->RegisterNatives( + "org/webrtc/voiceengine/WebRtcAudioRecord", native_methods, + arraysize(native_methods)); + j_audio_record_.reset( + new JavaAudioRecord(j_native_registration_.get(), + j_native_registration_->NewObject( + "", "(JZ)V", PointerTojlong(this), true))); + // Detach from this thread since we want to use the checker to verify calls + // from the Java based audio thread. + thread_checker_java_.Detach(); +} + +AudioScreenRecordJni::~AudioScreenRecordJni() { + RTC_LOG(INFO) << "dtor"; + RTC_DCHECK(thread_checker_.IsCurrent()); + Terminate(); +} + +int32_t AudioScreenRecordJni::Init() { + RTC_LOG(INFO) << "Init"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return 0; +} + +int32_t AudioScreenRecordJni::Terminate() { + RTC_LOG(INFO) << "Terminate"; + RTC_DCHECK(thread_checker_.IsCurrent()); + StopRecording(); + return 0; +} + +int32_t AudioScreenRecordJni::InitRecording() { + RTC_LOG(INFO) << "InitRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!recording_); + ScopedHistogramTimer timer("WebRTC.Audio.InitRecordingDurationMs"); + int frames_per_buffer = j_audio_record_->InitRecording( + audio_parameters_.sample_rate(), audio_parameters_.channels()); + if (frames_per_buffer < 0) { + direct_buffer_address_ = nullptr; + RTC_LOG(LS_ERROR) << "InitRecording failed"; + return -1; + } + frames_per_buffer_ = static_cast(frames_per_buffer); + RTC_LOG(INFO) << "frames_per_buffer: " << frames_per_buffer_; + const size_t bytes_per_frame = audio_parameters_.channels() * sizeof(int16_t); + RTC_CHECK_EQ(direct_buffer_capacity_in_bytes_, + frames_per_buffer_ * bytes_per_frame); + RTC_CHECK_EQ(frames_per_buffer_, audio_parameters_.frames_per_10ms_buffer()); + initialized_ = true; + return 0; +} + +int32_t AudioScreenRecordJni::StartRecording() { + RTC_LOG(INFO) << "StartRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!recording_); + if (!initialized_) { + RTC_DLOG(LS_WARNING) + << "Recording can not start since InitRecording must succeed first"; + return 0; + } + ScopedHistogramTimer timer("WebRTC.Audio.StartRecordingDurationMs"); + if (!j_audio_record_->StartRecording()) { + RTC_LOG(LS_ERROR) << "StartRecording failed"; + return -1; + } + recording_ = true; + return 0; +} + +int32_t AudioScreenRecordJni::StopRecording() { + RTC_LOG(INFO) << "StopRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!initialized_ || !recording_) { + return 0; + } + if (!j_audio_record_->StopRecording()) { + RTC_LOG(LS_ERROR) << "StopRecording failed"; + return -1; + } + // If we don't detach here, we will hit a RTC_DCHECK in OnDataIsRecorded() + // next time StartRecording() is called since it will create a new Java + // thread. + thread_checker_java_.Detach(); + initialized_ = false; + recording_ = false; + direct_buffer_address_ = nullptr; + return 0; +} + +void AudioScreenRecordJni::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + RTC_LOG(INFO) << "AttachAudioBuffer"; + RTC_DCHECK(thread_checker_.IsCurrent()); + audio_device_buffer_ = audioBuffer; + const int sample_rate_hz = audio_parameters_.sample_rate(); + RTC_LOG(INFO) << "SetRecordingSampleRate(" << sample_rate_hz << ")"; + audio_device_buffer_->SetRecordingSampleRate(sample_rate_hz); + const size_t channels = audio_parameters_.channels(); + RTC_LOG(INFO) << "SetRecordingChannels(" << channels << ")"; + audio_device_buffer_->SetRecordingChannels(channels); + total_delay_in_milliseconds_ = + audio_manager_->GetDelayEstimateInMilliseconds(); + RTC_DCHECK_GT(total_delay_in_milliseconds_, 0); + RTC_LOG(INFO) << "total_delay_in_milliseconds: " + << total_delay_in_milliseconds_; +} + +int32_t AudioScreenRecordJni::EnableBuiltInAEC(bool enable) { + RTC_LOG(INFO) << "EnableBuiltInAEC(" << enable << ")"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return j_audio_record_->EnableBuiltInAEC(enable) ? 0 : -1; +} + +int32_t AudioScreenRecordJni::EnableBuiltInAGC(bool enable) { + // TODO(henrika): possibly remove when no longer used by any client. + RTC_CHECK_NOTREACHED(); +} + +int32_t AudioScreenRecordJni::EnableBuiltInNS(bool enable) { + RTC_LOG(INFO) << "EnableBuiltInNS(" << enable << ")"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return j_audio_record_->EnableBuiltInNS(enable) ? 0 : -1; +} + +JNI_FUNCTION_ALIGN +void JNICALL AudioScreenRecordJni::CacheDirectBufferAddress(JNIEnv* env, + jobject obj, + jobject byte_buffer, + jlong nativeAudioRecord) { + webrtc::AudioScreenRecordJni* this_object = + reinterpret_cast(nativeAudioRecord); + this_object->OnCacheDirectBufferAddress(env, byte_buffer); +} + +void AudioScreenRecordJni::OnCacheDirectBufferAddress(JNIEnv* env, + jobject byte_buffer) { + RTC_LOG(INFO) << "OnCacheDirectBufferAddress"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!direct_buffer_address_); + direct_buffer_address_ = env->GetDirectBufferAddress(byte_buffer); + jlong capacity = env->GetDirectBufferCapacity(byte_buffer); + RTC_LOG(INFO) << "direct buffer capacity: " << capacity; + direct_buffer_capacity_in_bytes_ = static_cast(capacity); +} + +JNI_FUNCTION_ALIGN +void JNICALL AudioScreenRecordJni::DataIsRecorded(JNIEnv* env, + jobject obj, + jint length, + jlong nativeAudioRecord) { + webrtc::AudioScreenRecordJni* this_object = + reinterpret_cast(nativeAudioRecord); + this_object->OnDataIsRecorded(length); +} + +// This method is called on a high-priority thread from Java. The name of +// the thread is 'AudioRecordThread'. +void AudioScreenRecordJni::OnDataIsRecorded(int length) { + RTC_DCHECK(thread_checker_java_.IsCurrent()); + if (!audio_device_buffer_) { + RTC_LOG(LS_ERROR) << "AttachAudioBuffer has not been called"; + return; + } + audio_device_buffer_->SetRecordedBuffer(direct_buffer_address_, + frames_per_buffer_); + // We provide one (combined) fixed delay estimate for the APM and use the + // |playDelayMs| parameter only. Components like the AEC only sees the sum + // of |playDelayMs| and |recDelayMs|, hence the distributions does not matter. + audio_device_buffer_->SetVQEData(total_delay_in_milliseconds_, 0); + if (audio_device_buffer_->DeliverRecordedData() == -1) { + RTC_LOG(INFO) << "AudioDeviceBuffer::DeliverRecordedData failed"; + } +} + +} // namespace webrtc diff --git a/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_screen_record_jni.h b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_screen_record_jni.h new file mode 100644 index 000000000..2a163e668 --- /dev/null +++ b/TMessagesProj/jni/voip/webrtc/modules/audio_device/android/audio_screen_record_jni.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AUDIO_SCREEN_RECORD_JNI_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AUDIO_SCREEN_RECORD_JNI_H_ + +#include + +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/utility/include/helpers_android.h" +#include "modules/utility/include/jvm_android.h" + +namespace webrtc { + +// Implements 16-bit mono PCM audio input support for Android using the Java +// AudioRecord interface. Most of the work is done by its Java counterpart in +// WebRtcAudioRecord.java. This class is created and lives on a thread in +// C++-land, but recorded audio buffers are delivered on a high-priority +// thread managed by the Java class. +// +// The Java class makes use of AudioEffect features (mainly AEC) which are +// first available in Jelly Bean. If it is instantiated running against earlier +// SDKs, the AEC provided by the APM in WebRTC must be used and enabled +// separately instead. +// +// An instance must be created and destroyed on one and the same thread. +// All public methods must also be called on the same thread. A thread checker +// will RTC_DCHECK if any method is called on an invalid thread. +// +// This class uses JvmThreadConnector to attach to a Java VM if needed +// and detach when the object goes out of scope. Additional thread checking +// guarantees that no other (possibly non attached) thread is used. +class AudioScreenRecordJni { + public: + // Wraps the Java specific parts of the AudioRecordJni into one helper class. + class JavaAudioRecord { + public: + JavaAudioRecord(NativeRegistration* native_registration, + std::unique_ptr audio_track); + + int InitRecording(int sample_rate, size_t channels); + bool StartRecording(); + bool StopRecording(); + bool EnableBuiltInAEC(bool enable); + bool EnableBuiltInNS(bool enable); + + private: + std::unique_ptr audio_record_; + jmethodID init_recording_; + jmethodID start_recording_; + jmethodID stop_recording_; + jmethodID enable_built_in_aec_; + jmethodID enable_built_in_ns_; + }; + + explicit AudioScreenRecordJni(AudioManager* audio_manager); + ~AudioScreenRecordJni(); + + int32_t Init(); + int32_t Terminate(); + + int32_t InitRecording(); + bool RecordingIsInitialized() const { return initialized_; } + + int32_t StartRecording(); + int32_t StopRecording(); + bool Recording() const { return recording_; } + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer); + + int32_t EnableBuiltInAEC(bool enable); + int32_t EnableBuiltInAGC(bool enable); + int32_t EnableBuiltInNS(bool enable); + + private: + // Called from Java side so we can cache the address of the Java-manged + // |byte_buffer| in |direct_buffer_address_|. The size of the buffer + // is also stored in |direct_buffer_capacity_in_bytes_|. + // This method will be called by the WebRtcAudioRecord constructor, i.e., + // on the same thread that this object is created on. + static void JNICALL CacheDirectBufferAddress(JNIEnv* env, + jobject obj, + jobject byte_buffer, + jlong nativeAudioRecord); + void OnCacheDirectBufferAddress(JNIEnv* env, jobject byte_buffer); + + // Called periodically by the Java based WebRtcAudioRecord object when + // recording has started. Each call indicates that there are |length| new + // bytes recorded in the memory area |direct_buffer_address_| and it is + // now time to send these to the consumer. + // This method is called on a high-priority thread from Java. The name of + // the thread is 'AudioRecordThread'. + static void JNICALL DataIsRecorded(JNIEnv* env, + jobject obj, + jint length, + jlong nativeAudioRecord); + void OnDataIsRecorded(int length); + + // Stores thread ID in constructor. + SequenceChecker thread_checker_; + + // Stores thread ID in first call to OnDataIsRecorded() from high-priority + // thread in Java. Detached during construction of this object. + SequenceChecker thread_checker_java_; + + // Calls JavaVM::AttachCurrentThread() if this thread is not attached at + // construction. + // Also ensures that DetachCurrentThread() is called at destruction. + JvmThreadConnector attach_thread_if_needed_; + + // Wraps the JNI interface pointer and methods associated with it. + std::unique_ptr j_environment_; + + // Contains factory method for creating the Java object. + std::unique_ptr j_native_registration_; + + // Wraps the Java specific parts of the AudioRecordJni class. + std::unique_ptr j_audio_record_; + + // Raw pointer to the audio manger. + const AudioManager* audio_manager_; + + // Contains audio parameters provided to this class at construction by the + // AudioManager. + const AudioParameters audio_parameters_; + + // Delay estimate of the total round-trip delay (input + output). + // Fixed value set once in AttachAudioBuffer() and it can take one out of two + // possible values. See audio_common.h for details. + int total_delay_in_milliseconds_; + + // Cached copy of address to direct audio buffer owned by |j_audio_record_|. + void* direct_buffer_address_; + + // Number of bytes in the direct audio buffer owned by |j_audio_record_|. + size_t direct_buffer_capacity_in_bytes_; + + // Number audio frames per audio buffer. Each audio frame corresponds to + // one sample of PCM mono data at 16 bits per sample. Hence, each audio + // frame contains 2 bytes (given that the Java layer only supports mono). + // Example: 480 for 48000 Hz or 441 for 44100 Hz. + size_t frames_per_buffer_; + + bool initialized_; + + bool recording_; + + // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the + // AudioDeviceModuleImpl class and called by AudioDeviceModule::Create(). + AudioDeviceBuffer* audio_device_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AUDIO_RECORD_JNI_H_ diff --git a/TMessagesProj/jni/voip/webrtc/modules/audio_device/audio_device_impl.cc b/TMessagesProj/jni/voip/webrtc/modules/audio_device/audio_device_impl.cc index 84460ff83..c55703047 100644 --- a/TMessagesProj/jni/voip/webrtc/modules/audio_device/audio_device_impl.cc +++ b/TMessagesProj/jni/voip/webrtc/modules/audio_device/audio_device_impl.cc @@ -33,6 +33,7 @@ #include "modules/audio_device/android/audio_device_template.h" #include "modules/audio_device/android/audio_manager.h" #include "modules/audio_device/android/audio_record_jni.h" +#include "modules/audio_device/android/audio_screen_record_jni.h" #include "modules/audio_device/android/audio_track_jni.h" #include "modules/audio_device/android/opensles_player.h" #include "modules/audio_device/android/opensles_recorder.h" @@ -210,6 +211,10 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() { // Java audio for both input and output audio. audio_device_.reset(new AudioDeviceTemplate( audio_layer, audio_manager)); + } else if (audio_layer == kAndroidScreenAudio) { + // Java audio for both input and output audio. + audio_device_.reset(new AudioDeviceTemplate( + audio_layer, audio_manager)); } else if (audio_layer == kAndroidOpenSLESAudio) { // OpenSL ES based audio for both input and output audio. audio_device_.reset( diff --git a/TMessagesProj/jni/voip/webrtc/modules/audio_device/include/audio_device.h b/TMessagesProj/jni/voip/webrtc/modules/audio_device/include/audio_device.h index f82029eb5..6228568fc 100644 --- a/TMessagesProj/jni/voip/webrtc/modules/audio_device/include/audio_device.h +++ b/TMessagesProj/jni/voip/webrtc/modules/audio_device/include/audio_device.h @@ -34,6 +34,7 @@ class AudioDeviceModule : public rtc::RefCountInterface { kAndroidAAudioAudio, kAndroidJavaInputAndAAudioOutputAudio, kDummyAudio, + kAndroidScreenAudio }; enum WindowsDeviceType { diff --git a/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_color_space.h b/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_color_space.h index aec76efba..f83b9b493 100644 --- a/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_color_space.h +++ b/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_color_space.h @@ -19,7 +19,7 @@ #include "api/video/color_space.h" extern "C" { -#include "third_party/ffmpeg/libavcodec/avcodec.h" +#include "libavcodec/avcodec.h" } // extern "C" namespace webrtc { diff --git a/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc b/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc index 6f37b52fd..5dd9df8b3 100644 --- a/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc +++ b/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc @@ -21,9 +21,9 @@ #include extern "C" { -#include "third_party/ffmpeg/libavcodec/avcodec.h" -#include "third_party/ffmpeg/libavformat/avformat.h" -#include "third_party/ffmpeg/libavutil/imgutils.h" +#include "libavcodec/avcodec.h" +#include "libavformat/avformat.h" +#include "libavutil/imgutils.h" } // extern "C" #include "api/video/color_space.h" diff --git a/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h b/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h index 47af12c8c..9bb89050c 100644 --- a/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h +++ b/TMessagesProj/jni/voip/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h @@ -36,7 +36,7 @@ // passed to ffmpeg. extern "C" { -#include "third_party/ffmpeg/libavcodec/avcodec.h" +#include "libavcodec/avcodec.h" } // extern "C" #include "common_video/h264/h264_bitstream_parser.h" diff --git a/TMessagesProj/jni/voip/webrtc/sdk/android/generated_openh264_jni/OpenH264Decoder_jni.h b/TMessagesProj/jni/voip/webrtc/sdk/android/generated_openh264_jni/OpenH264Decoder_jni.h new file mode 100644 index 000000000..a601fad2d --- /dev/null +++ b/TMessagesProj/jni/voip/webrtc/sdk/android/generated_openh264_jni/OpenH264Decoder_jni.h @@ -0,0 +1,56 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + + +// This file is autogenerated by +// base/android/jni_generator/jni_generator.py +// For +// org/webrtc/OpenH264Decoder + +#ifndef org_webrtc_OpenH264Decoder_JNI +#define org_webrtc_OpenH264Decoder_JNI + +#include + +#include "webrtc/sdk/android/src/jni/jni_generator_helper.h" + + +// Step 1: Forward declarations. + +JNI_REGISTRATION_EXPORT extern const char kClassPath_org_webrtc_OpenH264Decoder[]; +const char kClassPath_org_webrtc_OpenH264Decoder[] = "org/webrtc/OpenH264Decoder"; +// Leaking this jclass as we cannot use LazyInstance from some threads. +JNI_REGISTRATION_EXPORT std::atomic g_org_webrtc_OpenH264Decoder_clazz(nullptr); +#ifndef org_webrtc_OpenH264Decoder_clazz_defined +#define org_webrtc_OpenH264Decoder_clazz_defined +inline jclass org_webrtc_OpenH264Decoder_clazz(JNIEnv* env) { + return base::android::LazyGetClass(env, kClassPath_org_webrtc_OpenH264Decoder, + &g_org_webrtc_OpenH264Decoder_clazz); +} +#endif + + +// Step 2: Constants (optional). + + +// Step 3: Method stubs. +namespace webrtc { +namespace jni { + +static jlong JNI_OpenH264Decoder_CreateDecoder(JNIEnv* env); + +JNI_GENERATOR_EXPORT jlong Java_org_webrtc_OpenH264Decoder_nativeCreateDecoder( + JNIEnv* env, + jclass jcaller) { + return JNI_OpenH264Decoder_CreateDecoder(env); +} + + +} // namespace jni +} // namespace webrtc + +// Step 4: Generated test functions (optional). + + +#endif // org_webrtc_OpenH264Decoder_JNI diff --git a/TMessagesProj/jni/voip/webrtc/sdk/android/generated_openh264_jni/OpenH264Encoder_jni.h b/TMessagesProj/jni/voip/webrtc/sdk/android/generated_openh264_jni/OpenH264Encoder_jni.h new file mode 100644 index 000000000..364925c2a --- /dev/null +++ b/TMessagesProj/jni/voip/webrtc/sdk/android/generated_openh264_jni/OpenH264Encoder_jni.h @@ -0,0 +1,56 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + + +// This file is autogenerated by +// base/android/jni_generator/jni_generator.py +// For +// org/webrtc/OpenH264Encoder + +#ifndef org_webrtc_OpenH264Encoder_JNI +#define org_webrtc_OpenH264Encoder_JNI + +#include + +#include "webrtc/sdk/android/src/jni/jni_generator_helper.h" + + +// Step 1: Forward declarations. + +JNI_REGISTRATION_EXPORT extern const char kClassPath_org_webrtc_OpenH264Encoder[]; +const char kClassPath_org_webrtc_OpenH264Encoder[] = "org/webrtc/OpenH264Encoder"; +// Leaking this jclass as we cannot use LazyInstance from some threads. +JNI_REGISTRATION_EXPORT std::atomic g_org_webrtc_OpenH264Encoder_clazz(nullptr); +#ifndef org_webrtc_OpenH264Encoder_clazz_defined +#define org_webrtc_OpenH264Encoder_clazz_defined +inline jclass org_webrtc_OpenH264Encoder_clazz(JNIEnv* env) { + return base::android::LazyGetClass(env, kClassPath_org_webrtc_OpenH264Encoder, + &g_org_webrtc_OpenH264Encoder_clazz); +} +#endif + + +// Step 2: Constants (optional). + + +// Step 3: Method stubs. +namespace webrtc { +namespace jni { + +static jlong JNI_OpenH264Encoder_CreateEncoder(JNIEnv* env); + +JNI_GENERATOR_EXPORT jlong Java_org_webrtc_OpenH264Encoder_nativeCreateEncoder( + JNIEnv* env, + jclass jcaller) { + return JNI_OpenH264Encoder_CreateEncoder(env); +} + + +} // namespace jni +} // namespace webrtc + +// Step 4: Generated test functions (optional). + + +#endif // org_webrtc_OpenH264Encoder_JNI diff --git a/TMessagesProj/jni/voip/webrtc/sdk/android/src/jni/h264_codec.cc b/TMessagesProj/jni/voip/webrtc/sdk/android/src/jni/h264_codec.cc new file mode 100644 index 000000000..94b905b16 --- /dev/null +++ b/TMessagesProj/jni/voip/webrtc/sdk/android/src/jni/h264_codec.cc @@ -0,0 +1,30 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "modules/video_coding/codecs/h264/include/h264.h" +#include "sdk/android/generated_openh264_jni/OpenH264Decoder_jni.h" +#include "sdk/android/generated_openh264_jni/OpenH264Encoder_jni.h" +#include "sdk/android/src/jni/jni_helpers.h" + +namespace webrtc { +namespace jni { + +static jlong JNI_OpenH264Encoder_CreateEncoder(JNIEnv* jni) { + return jlongFromPointer(H264Encoder::Create(cricket::VideoCodec(CreateH264Format(H264Profile::kProfileBaseline, H264Level::kLevel3_1,"1"))).release()); +} + +static jlong JNI_OpenH264Decoder_CreateDecoder(JNIEnv* jni) { + return jlongFromPointer(H264Decoder::Create().release()); +} + +} // namespace jni +} // namespace webrtc diff --git a/TMessagesProj/src/main/AndroidManifest.xml b/TMessagesProj/src/main/AndroidManifest.xml index 3c20562d4..96dfececd 100644 --- a/TMessagesProj/src/main/AndroidManifest.xml +++ b/TMessagesProj/src/main/AndroidManifest.xml @@ -1,17 +1,17 @@ + android:largeScreens="true" + android:normalScreens="true" + android:resizeable="true" + android:smallScreens="true" + android:xlargeScreens="true"/> - - + + @@ -107,6 +107,7 @@ android:roundIcon="@mipmap/ic_launcher" android:supportsRtl="false" android:theme="@style/Theme.TMessages.Start" + android:allowAudioPlaybackCapture="true" tools:replace="android:supportsRtl"> - - - - + + + diff --git a/TMessagesProj/src/main/java/androidx/recyclerview/widget/ChatListItemAnimator.java b/TMessagesProj/src/main/java/androidx/recyclerview/widget/ChatListItemAnimator.java index 2e0084164..3d9ef6fea 100644 --- a/TMessagesProj/src/main/java/androidx/recyclerview/widget/ChatListItemAnimator.java +++ b/TMessagesProj/src/main/java/androidx/recyclerview/widget/ChatListItemAnimator.java @@ -8,6 +8,7 @@ import android.animation.ValueAnimator; import android.os.Build; import android.view.View; import android.view.ViewPropertyAnimator; +import android.view.animation.Interpolator; import android.view.animation.OvershootInterpolator; import androidx.annotation.NonNull; @@ -18,12 +19,15 @@ import org.telegram.messenger.BuildVars; import org.telegram.messenger.FileLog; import org.telegram.messenger.ImageReceiver; import org.telegram.messenger.MessageObject; +import org.telegram.messenger.SharedConfig; import org.telegram.ui.Cells.BotHelpCell; import org.telegram.ui.Cells.ChatMessageCell; import org.telegram.ui.ChatActivity; import org.telegram.ui.Components.ChatGreetingsView; import org.telegram.ui.Components.CubicBezierInterpolator; import org.telegram.ui.Components.RecyclerListView; +import org.telegram.ui.TextMessageEnterTransition; +import org.telegram.ui.VoiceMessageEnterTransition; import java.util.ArrayList; import java.util.Collections; @@ -32,6 +36,9 @@ import java.util.List; public class ChatListItemAnimator extends DefaultItemAnimator { + public static final long DEFAULT_DURATION = 250; + public static final Interpolator DEFAULT_INTERPOLATOR = new CubicBezierInterpolator(0.19919472913616398, 0.010644531250000006, 0.27920937042459737, 0.91025390625); + private final ChatActivity activity; private final RecyclerListView recyclerListView; @@ -52,7 +59,7 @@ public class ChatListItemAnimator extends DefaultItemAnimator { public ChatListItemAnimator(ChatActivity activity, RecyclerListView listView) { this.activity = activity; this.recyclerListView = listView; - translationInterpolator = CubicBezierInterpolator.DEFAULT; + translationInterpolator = DEFAULT_INTERPOLATOR; if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) { listView.getElevation(); } @@ -277,9 +284,27 @@ public class ChatListItemAnimator extends DefaultItemAnimator { view.setTranslationY(addedItemsHeight); holder.itemView.setScaleX(1); holder.itemView.setScaleY(1); - if (!(holder.itemView instanceof ChatMessageCell && ((ChatMessageCell) holder.itemView).getTransitionParams().ignoreAlpha)) { + ChatMessageCell chatMessageCell = holder.itemView instanceof ChatMessageCell ? (ChatMessageCell) holder.itemView : null; + if (!(chatMessageCell != null && chatMessageCell.getTransitionParams().ignoreAlpha)) { holder.itemView.setAlpha(1); } + if (chatMessageCell != null && activity.animatingMessageObjects.contains(chatMessageCell.getMessageObject())) { + activity.animatingMessageObjects.remove(chatMessageCell.getMessageObject()); + if (activity.getChatActivityEnterView().canShowMessageTransition()) { + if (chatMessageCell.getMessageObject().isVoice()) { + if (Math.abs(view.getTranslationY()) < view.getMeasuredHeight() * 3f) { + VoiceMessageEnterTransition transition = new VoiceMessageEnterTransition(chatMessageCell, activity.getChatActivityEnterView(), recyclerListView, activity.messageEnterTransitionContainer); + transition.start(); + } + } else { + if (SharedConfig.getDevicePerformanceClass() != SharedConfig.PERFORMANCE_CLASS_LOW && Math.abs(view.getTranslationY()) < recyclerListView.getMeasuredHeight()) { + TextMessageEnterTransition transition = new TextMessageEnterTransition(chatMessageCell, activity, recyclerListView, activity.messageEnterTransitionContainer); + transition.start(); + } + } + activity.getChatActivityEnterView().startMessageTransition(); + } + } animation.translationY(0).setDuration(getMoveDuration()) .setInterpolator(translationInterpolator) .setListener(new AnimatorListenerAdapter() { @@ -1309,7 +1334,7 @@ public class ChatListItemAnimator extends DefaultItemAnimator { animatorSet.setInterpolator(new OvershootInterpolator()); } else { animatorSet.setStartDelay(currentDelay); - animatorSet.setDuration(220); + animatorSet.setDuration(DEFAULT_DURATION); } animatorSet.addListener(new AnimatorListenerAdapter() { @@ -1388,12 +1413,12 @@ public class ChatListItemAnimator extends DefaultItemAnimator { @Override public long getMoveDuration() { - return 220; + return DEFAULT_DURATION; } @Override public long getChangeDuration() { - return 220; + return DEFAULT_DURATION; } public void runOnAnimationEnd(Runnable runnable) { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java b/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java index b361ea64a..120d77e29 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java @@ -1,9 +1,9 @@ /* - * This is the source code of Telegram for Android v. 5.x.x. + * This is the source code of Telegram for Android v. 7.x.x. * It is licensed under GNU GPL v. 2 or later. * You should have received a copy of the license in this archive (see LICENSE). * - * Copyright Nikolai Kudashov, 2013-2018. + * Copyright Nikolai Kudashov, 2013-2020. */ package org.telegram.messenger; diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java b/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java index 006e47337..f70937ea4 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java @@ -74,6 +74,7 @@ public class ChatObject { public boolean reloadingMembers; public boolean recording; public boolean canStreamVideo; + public int activeVideos; public VideoParticipant videoNotAvailableParticipant; public AccountInstance currentAccount; public int speakingMembersCount; @@ -220,6 +221,90 @@ public class ChatObject { } } + private void onParticipantsLoad(ArrayList loadedParticipants, boolean fromBegin, String reqOffset, String nextOffset, int version, int participantCount) { + SparseArray old = null; + int selfId = getSelfId(); + TLRPC.TL_groupCallParticipant oldSelf = participants.get(selfId); + if (TextUtils.isEmpty(reqOffset)) { + if (participants.size() != 0) { + old = participants; + participants = new SparseArray<>(); + } else { + participants.clear(); + } + sortedParticipants.clear(); + participantsBySources.clear(); + participantsByVideoSources.clear(); + participantsByPresentationSources.clear(); + loadingGuids.clear(); + } + nextLoadOffset = nextOffset; + if (loadedParticipants.isEmpty() || TextUtils.isEmpty(nextLoadOffset)) { + membersLoadEndReached = true; + } + if (TextUtils.isEmpty(reqOffset)) { + call.version = version; + call.participants_count = participantCount; + if (BuildVars.LOGS_ENABLED) { + FileLog.d("new participants count " + call.participants_count); + } + } + long time = SystemClock.elapsedRealtime(); + currentAccount.getNotificationCenter().postNotificationName(NotificationCenter.applyGroupCallVisibleParticipants, time); + boolean hasSelf = false; + for (int a = 0, N = loadedParticipants.size(); a <= N; a++) { + TLRPC.TL_groupCallParticipant participant; + if (a == N) { + if (fromBegin && oldSelf != null && !hasSelf) { + participant = oldSelf; + } else { + continue; + } + } else { + participant = loadedParticipants.get(a); + if (participant.self) { + hasSelf = true; + } + } + TLRPC.TL_groupCallParticipant oldParticipant = participants.get(MessageObject.getPeerId(participant.peer)); + if (oldParticipant != null) { + sortedParticipants.remove(oldParticipant); + processAllSources(oldParticipant, false); + if (oldParticipant.self) { + participant.lastTypingDate = oldParticipant.active_date; + } else { + participant.lastTypingDate = Math.max(participant.active_date, oldParticipant.active_date); + } + if (time != participant.lastVisibleDate) { + participant.active_date = participant.lastTypingDate; + } + } else if (old != null) { + oldParticipant = old.get(MessageObject.getPeerId(participant.peer)); + if (oldParticipant != null) { + if (oldParticipant.self) { + participant.lastTypingDate = oldParticipant.active_date; + } else { + participant.lastTypingDate = Math.max(participant.active_date, oldParticipant.active_date); + } + if (time != participant.lastVisibleDate) { + participant.active_date = participant.lastTypingDate; + } else { + participant.active_date = oldParticipant.active_date; + } + } + } + participants.put(MessageObject.getPeerId(participant.peer), participant); + sortedParticipants.add(participant); + processAllSources(participant, true); + } + if (call.participants_count < participants.size()) { + call.participants_count = participants.size(); + } + sortParticipants(); + currentAccount.getNotificationCenter().postNotificationName(NotificationCenter.groupCallUpdated, chatId, call.id, false); + setParticiapantsVolume(); + } + public void loadMembers(boolean fromBegin) { if (fromBegin) { if (reloadingMembers) { @@ -248,87 +333,7 @@ public class ChatObject { TLRPC.TL_phone_groupParticipants groupParticipants = (TLRPC.TL_phone_groupParticipants) response; currentAccount.getMessagesController().putUsers(groupParticipants.users, false); currentAccount.getMessagesController().putChats(groupParticipants.chats, false); - SparseArray old = null; - int selfId = getSelfId(); - TLRPC.TL_groupCallParticipant oldSelf = participants.get(selfId); - if (TextUtils.isEmpty(req.offset)) { - if (participants.size() != 0) { - old = participants; - participants = new SparseArray<>(); - } else { - participants.clear(); - } - sortedParticipants.clear(); - participantsBySources.clear(); - participantsByVideoSources.clear(); - participantsByPresentationSources.clear(); - loadingGuids.clear(); - } - nextLoadOffset = groupParticipants.next_offset; - if (groupParticipants.participants.isEmpty() || TextUtils.isEmpty(nextLoadOffset)) { - membersLoadEndReached = true; - } - if (TextUtils.isEmpty(req.offset)) { - call.version = groupParticipants.version; - call.participants_count = groupParticipants.count; - if (BuildVars.LOGS_ENABLED) { - FileLog.d("new participants count " + call.participants_count); - } - } - long time = SystemClock.elapsedRealtime(); - currentAccount.getNotificationCenter().postNotificationName(NotificationCenter.applyGroupCallVisibleParticipants, time); - boolean hasSelf = false; - for (int a = 0, N = groupParticipants.participants.size(); a <= N; a++) { - TLRPC.TL_groupCallParticipant participant; - if (a == N) { - if (fromBegin && oldSelf != null && !hasSelf) { - participant = oldSelf; - } else { - continue; - } - } else { - participant = groupParticipants.participants.get(a); - if (participant.self) { - hasSelf = true; - } - } - TLRPC.TL_groupCallParticipant oldParticipant = participants.get(MessageObject.getPeerId(participant.peer)); - if (oldParticipant != null) { - sortedParticipants.remove(oldParticipant); - processAllSources(oldParticipant, false); - if (oldParticipant.self) { - participant.lastTypingDate = oldParticipant.active_date; - } else { - participant.lastTypingDate = Math.max(participant.active_date, oldParticipant.active_date); - } - if (time != participant.lastVisibleDate) { - participant.active_date = participant.lastTypingDate; - } - } else if (old != null) { - oldParticipant = old.get(MessageObject.getPeerId(participant.peer)); - if (oldParticipant != null) { - if (oldParticipant.self) { - participant.lastTypingDate = oldParticipant.active_date; - } else { - participant.lastTypingDate = Math.max(participant.active_date, oldParticipant.active_date); - } - if (time != participant.lastVisibleDate) { - participant.active_date = participant.lastTypingDate; - } else { - participant.active_date = oldParticipant.active_date; - } - } - } - participants.put(MessageObject.getPeerId(participant.peer), participant); - sortedParticipants.add(participant); - processAllSources(participant, true); - } - if (call.participants_count < participants.size()) { - call.participants_count = participants.size(); - } - sortParticipants(); - currentAccount.getNotificationCenter().postNotificationName(NotificationCenter.groupCallUpdated, chatId, call.id, false); - setParticiapantsVolume(); + onParticipantsLoad(groupParticipants.participants, fromBegin, req.offset, groupParticipants.next_offset, groupParticipants.version, groupParticipants.count); } })); } @@ -485,6 +490,13 @@ public class ChatObject { for (int c = 0; c < 2; c++) { TLRPC.TL_groupCallParticipantVideo data = c == 0 ? participant.video : participant.presentation; if (data != null) { + if ((data.flags & 2) != 0 && data.audio_source != 0) { + if (add) { + participantsBySources.put(data.audio_source, participant); + } else { + participantsBySources.remove(data.audio_source); + } + } SparseArray sourcesArray = c == 0 ? participantsByVideoSources : participantsByPresentationSources; for (int a = 0, N = data.source_groups.size(); a < N; a++) { TLRPC.TL_groupCallParticipantVideoSourceGroup sourceGroup = data.source_groups.get(a); @@ -702,6 +714,21 @@ public class ChatObject { } } + public void reloadGroupCall() { + TLRPC.TL_phone_getGroupCall req = new TLRPC.TL_phone_getGroupCall(); + req.call = getInputGroupCall(); + req.limit = 100; + currentAccount.getConnectionsManager().sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { + if (response instanceof TLRPC.TL_phone_groupCall) { + TLRPC.TL_phone_groupCall phoneGroupCall = (TLRPC.TL_phone_groupCall) response; + call = phoneGroupCall.call; + currentAccount.getMessagesController().putUsers(phoneGroupCall.users, false); + currentAccount.getMessagesController().putChats(phoneGroupCall.chats, false); + onParticipantsLoad(phoneGroupCall.participants, true, "", phoneGroupCall.participants_next_offset, phoneGroupCall.call.version, phoneGroupCall.call.participants_count); + } + })); + } + private void loadGroupCall() { if (loadingGroupCall || SystemClock.elapsedRealtime() - lastGroupCallReloadTime < 30000) { return; @@ -959,7 +986,7 @@ public class ChatObject { return true; } - public void processGroupCallUpdate(AccountInstance accountInstance, TLRPC.TL_updateGroupCall update) { + public void processGroupCallUpdate(TLRPC.TL_updateGroupCall update) { if (call.version < update.call.version) { nextLoadOffset = null; loadMembers(true); @@ -978,6 +1005,9 @@ public class ChatObject { } public static boolean videoIsActive(TLRPC.TL_groupCallParticipant participant, boolean presentation, ChatObject.Call call) { + if (participant == null) { + return false; + } VoIPService service = VoIPService.getSharedInstance(); if (service == null) { return false; @@ -1008,9 +1038,15 @@ public class ChatObject { canStreamVideo = selfParticipant != null && selfParticipant.video_joined; boolean allowedVideoCount; boolean hasAnyVideo = false; - for (int i = 0; i < sortedParticipants.size(); i++) { + activeVideos = 0; + for (int i = 0, N = sortedParticipants.size(); i < N; i++) { TLRPC.TL_groupCallParticipant participant = sortedParticipants.get(i); - if (videoIsActive(participant, false, this) || videoIsActive(participant, true, this)) { + boolean cameraActive = videoIsActive(participant, false, this); + boolean screenActive = videoIsActive(participant, true, this); + if (!participant.self && (cameraActive || screenActive)) { + activeVideos++; + } + if (cameraActive || screenActive) { hasAnyVideo = true; if (canStreamVideo) { if (participant.videoIndex == 0) { @@ -1066,8 +1102,20 @@ public class ChatObject { } }; Collections.sort(sortedParticipants, comparator); + TLRPC.TL_groupCallParticipant lastParticipant = sortedParticipants.isEmpty() ? null : sortedParticipants.get(sortedParticipants.size() - 1); + if (videoIsActive(lastParticipant, false, this) || videoIsActive(lastParticipant, true, this)) { + if (call.unmuted_video_count > activeVideos) { + activeVideos = call.unmuted_video_count; + VoIPService voIPService = VoIPService.getSharedInstance(); + if (voIPService != null && voIPService.groupCall == this) { + if (voIPService.getVideoState(false) == Instance.VIDEO_STATE_ACTIVE || voIPService.getVideoState(true) == Instance.VIDEO_STATE_ACTIVE) { + activeVideos--; + } + } + } + } - if (sortedParticipants.size() > MAX_PARTICIPANTS_COUNT && (!ChatObject.canManageCalls(chat) || sortedParticipants.get(sortedParticipants.size() - 1).raise_hand_rating == 0)) { + if (sortedParticipants.size() > MAX_PARTICIPANTS_COUNT && (!ChatObject.canManageCalls(chat) || lastParticipant.raise_hand_rating == 0)) { for (int a = MAX_PARTICIPANTS_COUNT, N = sortedParticipants.size(); a < N; a++) { TLRPC.TL_groupCallParticipant p = sortedParticipants.get(MAX_PARTICIPANTS_COUNT); if (p.raise_hand_rating != 0) { @@ -1080,7 +1128,7 @@ public class ChatObject { } checkOnlineParticipants(); - if (!canStreamVideo && hasAnyVideo) { + if (!canStreamVideo && hasAnyVideo && videoNotAvailableParticipant != null) { visibleVideoParticipants.add(videoNotAvailableParticipant); } @@ -1152,6 +1200,17 @@ public class ChatObject { } } + public boolean canRecordVideo() { + if (!canStreamVideo) { + return false; + } + VoIPService voIPService = VoIPService.getSharedInstance(); + if (voIPService != null && voIPService.groupCall == this && (voIPService.getVideoState(false) == Instance.VIDEO_STATE_ACTIVE || voIPService.getVideoState(true) == Instance.VIDEO_STATE_ACTIVE)) { + return true; + } + return activeVideos < call.unmuted_video_limit; + } + public void saveActiveDates() { for (int a = 0, N = sortedParticipants.size(); a < N; a++) { TLRPC.TL_groupCallParticipant p = sortedParticipants.get(a); diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java b/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java index 7c3001cf6..d3cd97598 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java @@ -308,7 +308,8 @@ public class Emoji { @Override public void setAlpha(int alpha) { - + placeholderPaint.setAlpha(alpha); + paint.setAlpha(alpha); } @Override @@ -572,6 +573,19 @@ public class Emoji { return size; } } + + @Override + public void draw(Canvas canvas, CharSequence text, int start, int end, float x, int top, int y, int bottom, Paint paint) { + boolean restoreAlpha = false; + if (paint.getAlpha() != 255) { + restoreAlpha = true; + getDrawable().setAlpha(paint.getAlpha()); + } + super.draw(canvas, text, start, end, x, top, y, bottom, paint); + if (restoreAlpha) { + getDrawable().setAlpha(255); + } + } } public static void addRecentEmoji(String code) { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java b/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java index 806965e0f..b9159d141 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java @@ -1514,6 +1514,16 @@ public class FileLoadOperation { } fileReadStream.seek(fileOffset); fileReadStream.readFully(cdnCheckBytes, 0, availableSize); + + if (encryptFile) { + int offset = fileOffset / 16; + encryptIv[15] = (byte) (offset & 0xff); + encryptIv[14] = (byte) ((offset >> 8) & 0xff); + encryptIv[13] = (byte) ((offset >> 16) & 0xff); + encryptIv[12] = (byte) ((offset >> 24) & 0xff); + Utilities.aesCtrDecryptionByteArray(cdnCheckBytes, encryptKey, encryptIv, 0, availableSize, 0); + } + byte[] sha256 = Utilities.computeSHA256(cdnCheckBytes, 0, availableSize); if (!Arrays.equals(sha256, hash.hash)) { if (BuildVars.LOGS_ENABLED) { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java b/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java index fd424f565..a579d380d 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java @@ -827,6 +827,9 @@ public class LocaleController { String[] localesArr = locales.split("&"); for (String locale : localesArr) { LocaleInfo localeInfo = LocaleInfo.createWithString(locale); + if (localeInfo == null) { + continue; + } localeInfo.shortName = localeInfo.shortName.replace("-", "_"); if (localeInfo != null) { unofficialLanguages.add(localeInfo); diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java b/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java index c37ae1398..34b3ed41b 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java @@ -3146,7 +3146,7 @@ public class MediaController implements AudioManager.OnAudioFocusChangeListener, FileLog.e(e); } String name = messageObject.getFileName(); - if (!TextUtils.isEmpty(name) && messageObject.getDuration() >= 20 * 60) { + if (!TextUtils.isEmpty(name) && messageObject.getDuration() >= 10 * 60) { SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("media_saved_pos", Activity.MODE_PRIVATE); float pos = preferences.getFloat(name, -1); if (pos > 0 && pos < 0.999f) { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MediaDataController.java b/TMessagesProj/src/main/java/org/telegram/messenger/MediaDataController.java index dd876c826..eba125d44 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/MediaDataController.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/MediaDataController.java @@ -42,6 +42,7 @@ import androidx.core.graphics.drawable.IconCompat; import org.telegram.SQLite.SQLiteCursor; import org.telegram.SQLite.SQLiteDatabase; +import org.telegram.SQLite.SQLiteException; import org.telegram.SQLite.SQLitePreparedStatement; import org.telegram.messenger.support.SparseLongArray; import org.telegram.tgnet.ConnectionsManager; @@ -5161,7 +5162,7 @@ public class MediaDataController extends BaseController { //---------------- DRAFT END ---------------- - private SparseArray botInfos = new SparseArray<>(); + private HashMap botInfos = new HashMap<>(); private LongSparseArray botKeyboards = new LongSparseArray<>(); private SparseLongArray botKeyboardsByMids = new SparseLongArray(); @@ -5216,9 +5217,27 @@ public class MediaDataController extends BaseController { }); } + private TLRPC.BotInfo loadBotInfoInternal(final int uid, final long dialogId) throws SQLiteException { + TLRPC.BotInfo botInfo = null; + SQLiteCursor cursor = getMessagesStorage().getDatabase().queryFinalized(String.format(Locale.US, "SELECT info FROM bot_info_v2 WHERE uid = %d AND dialogId = %d", uid, dialogId)); + if (cursor.next()) { + NativeByteBuffer data; + + if (!cursor.isNull(0)) { + data = cursor.byteBufferValue(0); + if (data != null) { + botInfo = TLRPC.BotInfo.TLdeserialize(data, data.readInt32(false), false); + data.reuse(); + } + } + } + cursor.dispose(); + return botInfo; + } + public void loadBotInfo(final int uid, final long dialogId, boolean cache, final int classGuid) { if (cache) { - TLRPC.BotInfo botInfo = botInfos.get(uid); + TLRPC.BotInfo botInfo = botInfos.get(uid + "_" + dialogId); if (botInfo != null) { getNotificationCenter().postNotificationName(NotificationCenter.botInfoDidLoad, botInfo, classGuid); return; @@ -5226,21 +5245,7 @@ public class MediaDataController extends BaseController { } getMessagesStorage().getStorageQueue().postRunnable(() -> { try { - TLRPC.BotInfo botInfo = null; - SQLiteCursor cursor = getMessagesStorage().getDatabase().queryFinalized(String.format(Locale.US, "SELECT info FROM bot_info_v2 WHERE uid = %d AND dialogId = %d", uid, dialogId)); - if (cursor.next()) { - NativeByteBuffer data; - - if (!cursor.isNull(0)) { - data = cursor.byteBufferValue(0); - if (data != null) { - botInfo = TLRPC.BotInfo.TLdeserialize(data, data.readInt32(false), false); - data.reuse(); - } - } - } - cursor.dispose(); - + TLRPC.BotInfo botInfo = loadBotInfoInternal(uid, dialogId); if (botInfo != null) { final TLRPC.BotInfo botInfoFinal = botInfo; AndroidUtilities.runOnUIThread(() -> getNotificationCenter().postNotificationName(NotificationCenter.botInfoDidLoad, botInfoFinal, classGuid)); @@ -5295,7 +5300,7 @@ public class MediaDataController extends BaseController { if (botInfo == null) { return; } - botInfos.put(botInfo.user_id, botInfo); + botInfos.put(botInfo.user_id + "_" + dialogId, botInfo); getMessagesStorage().getStorageQueue().postRunnable(() -> { try { SQLitePreparedStatement state = getMessagesStorage().getDatabase().executeFast("REPLACE INTO bot_info_v2 VALUES(?, ?, ?)"); @@ -5314,6 +5319,34 @@ public class MediaDataController extends BaseController { }); } + public void updateBotInfo(long dialogId, TLRPC.TL_updateBotCommands update) { + TLRPC.BotInfo botInfo = botInfos.get(update.bot_id + "_" + dialogId); + if (botInfo != null) { + botInfo.commands = update.commands; + getNotificationCenter().postNotificationName(NotificationCenter.botInfoDidLoad, botInfo, 0); + } + getMessagesStorage().getStorageQueue().postRunnable(() -> { + try { + TLRPC.BotInfo info = loadBotInfoInternal(update.bot_id, dialogId); + if (info != null) { + info.commands = update.commands; + } + SQLitePreparedStatement state = getMessagesStorage().getDatabase().executeFast("REPLACE INTO bot_info_v2 VALUES(?, ?, ?)"); + state.requery(); + NativeByteBuffer data = new NativeByteBuffer(info.getObjectSize()); + info.serializeToStream(data); + state.bindInteger(1, info.user_id); + state.bindLong(2, dialogId); + state.bindByteBuffer(3, data); + state.step(); + data.reuse(); + state.dispose(); + } catch (Exception e) { + FileLog.e(e); + } + }); + } + //---------------- BOT END ---------------- //---------------- EMOJI START ---------------- diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java b/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java index ad02d1859..7a55d7244 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java @@ -201,6 +201,10 @@ public class MessageObject { " . " }; + public int getEmojiOnlyCount() { + return emojiOnlyCount; + } + public static class SendAnimationData { public float x; public float y; @@ -899,7 +903,7 @@ public class MessageObject { } public MessageObject(int accountNum, TLRPC.Message message, MessageObject replyToMessage, AbstractMap users, AbstractMap chats, SparseArray sUsers, SparseArray sChats, boolean generateLayout, boolean checkMediaExists, long eid) { - Theme.createChatResources(null, true); + Theme.createCommonChatResources(null); currentAccount = accountNum; messageOwner = message; @@ -1892,7 +1896,11 @@ public class MessageObject { id = 0; } else if (object instanceof TLRPC.User) { TLRPC.User user = (TLRPC.User) object; - name = ContactsController.formatName(user.first_name, user.last_name); + if (user.deleted) { + name = LocaleController.getString("HiddenName", R.string.HiddenName); + } else { + name = ContactsController.formatName(user.first_name, user.last_name); + } username = user.username; id = user.id; } else { @@ -2362,7 +2370,7 @@ public class MessageObject { } wantedBotKeyboardWidth = 0; if (messageOwner.reply_markup instanceof TLRPC.TL_replyInlineMarkup || messageOwner.reactions != null && !messageOwner.reactions.results.isEmpty()) { - Theme.createChatResources(null, true); + Theme.createCommonChatResources(null); if (botButtonsLayout == null) { botButtonsLayout = new StringBuilder(); } else { @@ -2485,9 +2493,18 @@ public class MessageObject { } } } - messageText = LocaleController.formatString("ActionGroupCallEnded", R.string.ActionGroupCallEnded, time); + + if (messageOwner.peer_id instanceof TLRPC.TL_peerChat || isSupergroup()) { + if (isOut()) { + messageText = LocaleController.formatString("ActionGroupCallEndedByYou", R.string.ActionGroupCallEndedByYou, time); + } else { + messageText = replaceWithLink(LocaleController.formatString("ActionGroupCallEndedBy", R.string.ActionGroupCallEndedBy, time), "un1", fromObject); + } + } else { + messageText = LocaleController.formatString("ActionGroupCallEnded", R.string.ActionGroupCallEnded, time); + } } else { - if (isSupergroup()) { + if (messageOwner.peer_id instanceof TLRPC.TL_peerChat || isSupergroup()) { if (isOut()) { messageText = LocaleController.getString("ActionGroupCallStartedByYou", R.string.ActionGroupCallStartedByYou); } else { @@ -2967,9 +2984,9 @@ public class MessageObject { } else if (messageOwner.translated) { messageText = messageOwner.translatedMessage; } else { - if (messageOwner.message != null) { + if (messageOwner.message != null && messageOwner.message.length() > 200) { try { - messageText = AndroidUtilities.BAD_CHARS_MESSAGE_PATTERN.matcher(messageOwner.message).replaceAll(""); + messageText = AndroidUtilities.BAD_CHARS_MESSAGE_PATTERN.matcher(messageOwner.message).replaceAll("\u200C"); } catch (Throwable e) { messageText = messageOwner.message; } @@ -3321,6 +3338,9 @@ public class MessageObject { if (size.type.equals(photoObject.type)) { photoObject.location = size.location; break; + } else if ("s".equals(photoObject.type) && size instanceof TLRPC.TL_photoStrippedSize) { + photoThumbs.set(a, size); + break; } } } @@ -4681,7 +4701,7 @@ public class MessageObject { int ttl = Math.max(messageOwner.ttl, messageOwner.media.ttl_seconds); return ttl > 0 && ((messageOwner.media instanceof TLRPC.TL_messageMediaPhoto || isVideo() || isGif()) && ttl <= 60 || isRoundVideo()); } else if (messageOwner instanceof TLRPC.TL_message) { - return (messageOwner.media instanceof TLRPC.TL_messageMediaPhoto || messageOwner.media instanceof TLRPC.TL_messageMediaDocument) && messageOwner.media.ttl_seconds != 0; + return (messageOwner.media != null && messageOwner.media.ttl_seconds != 0) && (messageOwner.media instanceof TLRPC.TL_messageMediaPhoto || messageOwner.media instanceof TLRPC.TL_messageMediaDocument); } return false; } @@ -4693,7 +4713,7 @@ public class MessageObject { if (messageOwner instanceof TLRPC.TL_message_secret) { return (((messageOwner.media instanceof TLRPC.TL_messageMediaPhoto) || isGif()) && messageOwner.ttl > 0 && messageOwner.ttl <= 60 || isVoice() || isRoundVideo() || isVideo()); } else if (messageOwner instanceof TLRPC.TL_message) { - return (messageOwner.media instanceof TLRPC.TL_messageMediaPhoto || messageOwner.media instanceof TLRPC.TL_messageMediaDocument) && messageOwner.media.ttl_seconds != 0; + return (messageOwner.media != null && messageOwner.media.ttl_seconds != 0) && (messageOwner.media instanceof TLRPC.TL_messageMediaPhoto || messageOwner.media instanceof TLRPC.TL_messageMediaDocument); } return false; } @@ -5338,7 +5358,7 @@ public class MessageObject { } public boolean shouldAnimateSending() { - return isSending() && (type == MessageObject.TYPE_ROUND_VIDEO || isVoice() || isAnyKindOfSticker() && sendAnimationData != null); + return isSending() && (type == MessageObject.TYPE_ROUND_VIDEO || isVoice() || (isAnyKindOfSticker() && sendAnimationData != null) || (messageText != null && sendAnimationData != null)); } public boolean hasAttachedStickers() { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java index d34c01f57..f25b71a97 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java @@ -1906,6 +1906,7 @@ public class MessagesController extends BaseController implements NotificationCe SharedPreferences.Editor editor = mainPreferences.edit(); editor.putStringSet("pendingSuggestions", pendingSuggestions); editor.commit(); + getNotificationCenter().postNotificationName(NotificationCenter.newSuggestionsAvailable); } else { return; } @@ -3189,6 +3190,7 @@ public class MessagesController extends BaseController implements NotificationCe if (chatFull.call != null) { TLRPC.TL_phone_getGroupCall req = new TLRPC.TL_phone_getGroupCall(); req.call = chatFull.call; + req.limit = 20; getConnectionsManager().sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { if (response != null) { TLRPC.TL_phone_groupCall groupCall = (TLRPC.TL_phone_groupCall) response; @@ -12792,6 +12794,11 @@ public class MessagesController extends BaseController implements NotificationCe updatesOnMainThread.add(baseUpdate); } else if (baseUpdate instanceof TLRPC.TL_updateGroupCallConnection) { + } else if (baseUpdate instanceof TLRPC.TL_updateBotCommands) { + if (updatesOnMainThread == null) { + updatesOnMainThread = new ArrayList<>(); + } + updatesOnMainThread.add(baseUpdate); } else if (baseUpdate instanceof TLRPC.TL_updatePhoneCallSignalingData) { if (updatesOnMainThread == null) { updatesOnMainThread = new ArrayList<>(); @@ -13258,7 +13265,7 @@ public class MessagesController extends BaseController implements NotificationCe updateGroupCall.call = new TLRPC.TL_groupCallDiscarded(); updateGroupCall.call.id = call.call.id; updateGroupCall.call.access_hash = call.call.access_hash; - call.processGroupCallUpdate(getAccountInstance(), updateGroupCall); + call.processGroupCallUpdate(updateGroupCall); if (VoIPService.getSharedInstance() != null) { VoIPService.getSharedInstance().onGroupCallUpdated(updateGroupCall.call); } @@ -13278,7 +13285,7 @@ public class MessagesController extends BaseController implements NotificationCe updateGroupCall.call = new TLRPC.TL_groupCallDiscarded(); updateGroupCall.call.id = call.call.id; updateGroupCall.call.access_hash = call.call.access_hash; - call.processGroupCallUpdate(getAccountInstance(), updateGroupCall); + call.processGroupCallUpdate(updateGroupCall); if (VoIPService.getSharedInstance() != null) { VoIPService.getSharedInstance().onGroupCallUpdated(updateGroupCall.call); } @@ -13303,6 +13310,9 @@ public class MessagesController extends BaseController implements NotificationCe chat.default_banned_rights = update.default_banned_rights; AndroidUtilities.runOnUIThread(() -> getNotificationCenter().postNotificationName(NotificationCenter.channelRightsUpdated, chat)); } + } else if (baseUpdate instanceof TLRPC.TL_updateBotCommands) { + TLRPC.TL_updateBotCommands update = (TLRPC.TL_updateBotCommands) baseUpdate; + getMediaDataController().updateBotInfo(MessageObject.getPeerId(update.peer), update); } else if (baseUpdate instanceof TLRPC.TL_updateStickerSets) { TLRPC.TL_updateStickerSets update = (TLRPC.TL_updateStickerSets) baseUpdate; getMediaDataController().loadStickers(MediaDataController.TYPE_IMAGE, false, true); @@ -13356,7 +13366,7 @@ public class MessagesController extends BaseController implements NotificationCe TLRPC.TL_updateGroupCall update = (TLRPC.TL_updateGroupCall) baseUpdate; ChatObject.Call call = groupCalls.get(update.call.id); if (call != null) { - call.processGroupCallUpdate(getAccountInstance(), update); + call.processGroupCallUpdate(update); TLRPC.Chat chat = getChat(call.chatId); if (chat != null) { chat.call_active = update.call instanceof TLRPC.TL_groupCall; diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java index fff8c3e27..aa5bc5ead 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java @@ -6481,7 +6481,7 @@ public class MessagesStorage extends BaseController { } } else if (load_type == 1) { long holeMessageId = 0; - cursor = database.queryFinalized(String.format(Locale.US, "SELECT start, end FROM messages_holes WHERE uid = %d AND start >= %d AND start != 1 AND end != 1 OR start < %d AND end > %d ORDER BY start ASC LIMIT 1", dialogId, max_id, max_id, max_id)); + cursor = database.queryFinalized(String.format(Locale.US, "SELECT start, end FROM messages_holes WHERE uid = %d AND (start >= %d AND start != 1 AND end != 1 OR start < %d AND end > %d) ORDER BY start ASC LIMIT 1", dialogId, max_id, max_id, max_id)); if (cursor.next()) { holeMessageId = cursor.intValue(0); if (channelId != 0) { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java index 48f841a06..ed2b51cc5 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java @@ -605,6 +605,10 @@ public class NotificationCenter { } } + public void removeDelayed(Runnable runnable) { + delayedRunnables.remove(runnable); + } + private static class AllowedNotifications { int[] allowedIds; diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java b/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java index 8226c65c5..a71ff3e6c 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java @@ -1157,8 +1157,11 @@ public boolean retriedToSend; message.httpLocation = null; message.obj.messageOwner.attachPath = cacheFile.toString(); if (!document.thumbs.isEmpty()) { - message.photoSize = document.thumbs.get(0); - message.locationParent = document; + TLRPC.PhotoSize photoSize = document.thumbs.get(0); + if (!(photoSize instanceof TLRPC.TL_photoStrippedSize)) { + message.photoSize = photoSize; + message.locationParent = document; + } } ArrayList messages = new ArrayList<>(); messages.add(messageObject.messageOwner); @@ -2376,8 +2379,11 @@ public boolean retriedToSend; delayedMessage.inputUploadMedia = uploadedDocument; delayedMessage.performMediaUpload = performMediaUpload; if (!document.thumbs.isEmpty()) { - delayedMessage.photoSize = document.thumbs.get(0); - delayedMessage.locationParent = document; + TLRPC.PhotoSize photoSize = document.thumbs.get(0); + if (!(photoSize instanceof TLRPC.TL_photoStrippedSize)) { + delayedMessage.photoSize = photoSize; + delayedMessage.locationParent = document; + } } delayedMessage.videoEditedInfo = videoEditedInfo; } else if (type == 7) { @@ -2406,8 +2412,11 @@ public boolean retriedToSend; delayedMessage.type = 2; delayedMessage.obj = messageObject; if (!document.thumbs.isEmpty()) { - delayedMessage.photoSize = document.thumbs.get(0); - delayedMessage.locationParent = document; + TLRPC.PhotoSize photoSize = document.thumbs.get(0); + if (!(photoSize instanceof TLRPC.TL_photoStrippedSize)) { + delayedMessage.photoSize = photoSize; + delayedMessage.locationParent = document; + } } delayedMessage.parentObject = parentObject; delayedMessage.inputUploadMedia = uploadedDocument; @@ -3795,8 +3804,11 @@ public boolean retriedToSend; delayedMessage.inputUploadMedia = uploadedDocument; delayedMessage.performMediaUpload = performMediaUpload; if (!document.thumbs.isEmpty()) { - delayedMessage.photoSize = document.thumbs.get(0); - delayedMessage.locationParent = document; + TLRPC.PhotoSize photoSize = document.thumbs.get(0); + if (!(photoSize instanceof TLRPC.TL_photoStrippedSize)) { + delayedMessage.photoSize = photoSize; + delayedMessage.locationParent = document; + } } delayedMessage.videoEditedInfo = videoEditedInfo; } else if (type == 6) { @@ -3858,8 +3870,11 @@ public boolean retriedToSend; delayedMessage.inputUploadMedia = uploadedMedia; delayedMessage.performMediaUpload = performMediaUpload; if (!document.thumbs.isEmpty()) { - delayedMessage.photoSize = document.thumbs.get(0); - delayedMessage.locationParent = document; + TLRPC.PhotoSize photoSize = document.thumbs.get(0); + if (!(photoSize instanceof TLRPC.TL_photoStrippedSize)) { + delayedMessage.photoSize = photoSize; + delayedMessage.locationParent = document; + } } } } else if (type == 8) { @@ -4520,7 +4535,7 @@ public boolean retriedToSend; putToDelayedMessages(location, message); getFileLoader().uploadFile(location, message.sendRequest == null, false, ConnectionsManager.FileTypeFile); putToUploadingMessages(message.obj); - } else if (media.thumb == null && message.photoSize != null) { + } else if (media.thumb == null && message.photoSize != null && !(message.photoSize instanceof TLRPC.TL_photoStrippedSize)) { String location = FileLoader.getDirectory(FileLoader.MEDIA_DIR_CACHE) + "/" + message.photoSize.location.volume_id + "_" + message.photoSize.location.local_id + ".jpg"; putToDelayedMessages(location, message); getFileLoader().uploadFile(location, false, true, ConnectionsManager.FileTypePhoto); @@ -5437,9 +5452,9 @@ public boolean retriedToSend; private void updateMediaPaths(MessageObject newMsgObj, TLRPC.Message sentMessage, int newMsgId, String originalPath, boolean post) { TLRPC.Message newMsg = newMsgObj.messageOwner; + TLRPC.PhotoSize strippedNew = null; if (newMsg.media != null) { TLRPC.PhotoSize strippedOld = null; - TLRPC.PhotoSize strippedNew = null; TLObject photoObject = null; if (newMsgObj.isLiveLocation() && sentMessage.media instanceof TLRPC.TL_messageMediaGeoLive) { newMsg.media.period = sentMessage.media.period; @@ -5540,6 +5555,11 @@ public boolean retriedToSend; String fileName = size2.location.volume_id + "_" + size2.location.local_id; File cacheFile = new File(FileLoader.getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName + ".jpg"); cacheFile.delete(); + if ("s".equals(size2.type) && strippedNew != null) { + newMsg.media.photo.sizes.set(b, strippedNew); + ImageLocation location = ImageLocation.getForPhoto(strippedNew, sentMessage.media.photo); + ImageLoader.getInstance().replaceImageInCache(fileName, location.getKey(sentMessage, null, false), location, post); + } } } } @@ -6904,7 +6924,7 @@ public boolean retriedToSend; originalPath = info.uri.toString(); } - if (tempPath != null && (tempPath.endsWith(".gif") || tempPath.endsWith(".webp"))) { + if (tempPath != null && info.ttl <= 0 && (tempPath.endsWith(".gif") || tempPath.endsWith(".webp"))) { continue; } else if (ImageLoader.shouldSendImageAsDocument(info.path, info.uri)) { continue; @@ -7383,7 +7403,7 @@ public boolean retriedToSend; if (forceDocument || ImageLoader.shouldSendImageAsDocument(info.path, info.uri)) { isDocument = true; extension = tempPath != null ? FileLoader.getFileExtension(new File(tempPath)) : ""; - } else if (tempPath != null && (tempPath.endsWith(".gif") || tempPath.endsWith(".webp"))) { + } else if (tempPath != null && (tempPath.endsWith(".gif") || tempPath.endsWith(".webp")) && info.ttl <= 0) { if (tempPath.endsWith(".gif")) { extension = "gif"; } else { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/SharedConfig.java b/TMessagesProj/src/main/java/org/telegram/messenger/SharedConfig.java index 3d1202f36..2c528033c 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/SharedConfig.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/SharedConfig.java @@ -928,8 +928,7 @@ public class SharedConfig { @Override public void start() { if (loader != null) return; - synchronized (this) - { + synchronized (this) { loader = new WsLoader(); port = ProxyManager.mkPort(); loader.init(bean, port); @@ -945,8 +944,7 @@ public class SharedConfig { if (loader == null) return; ConnectionsManager.setProxySettings(false, address, port, username, password, secret); UIUtil.runOnIoDispatcher(() -> { - synchronized (this) - { + synchronized (this) { if (loader == null) return; loader.stop(); @@ -2214,31 +2212,33 @@ public class SharedConfig { } public static void checkSaveToGalleryFiles() { - try { - File telegramPath = EnvUtil.getTelegramPath(); - File imagePath = new File(telegramPath, "images"); - imagePath.mkdirs(); - File videoPath = new File(telegramPath, "videos"); - videoPath.mkdirs(); + Utilities.globalQueue.postRunnable(() -> { + try { + File telegramPath = EnvUtil.getTelegramPath(); + File imagePath = new File(telegramPath, "images"); + imagePath.mkdirs(); + File videoPath = new File(telegramPath, "videos"); + videoPath.mkdirs(); - if (saveToGallery) { - if (imagePath.isDirectory()) { - new File(imagePath, ".nomedia").delete(); - } - if (videoPath.isDirectory()) { - new File(videoPath, ".nomedia").delete(); - } - } else { - if (imagePath.isDirectory()) { - AndroidUtilities.createEmptyFile(new File(imagePath, ".nomedia")); - } - if (videoPath.isDirectory()) { - AndroidUtilities.createEmptyFile(new File(videoPath, ".nomedia")); + if (saveToGallery) { + if (imagePath.isDirectory()) { + new File(imagePath, ".nomedia").delete(); + } + if (videoPath.isDirectory()) { + new File(videoPath, ".nomedia").delete(); + } + } else { + if (imagePath.isDirectory()) { + AndroidUtilities.createEmptyFile(new File(imagePath, ".nomedia")); + } + if (videoPath.isDirectory()) { + AndroidUtilities.createEmptyFile(new File(videoPath, ".nomedia")); + } } + } catch (Throwable e) { + FileLog.e(e); } - } catch (Throwable e) { - FileLog.e(e); - } + }); } public static int getChatSwipeAction(int currentAccount) { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java b/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java index 8893bd982..82a230927 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java @@ -225,7 +225,7 @@ public class UserConfig extends BaseController { } public static boolean isValidAccount(int num) { - return num >= 0 && SharedConfig.activeAccounts.contains(num) && getInstance(num).isClientActivated(); + return num >= 0 && SharedConfig.activeAccounts.contains(num) && getInstance(num).isClientActivated(); } public boolean isClientActivated() { diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraController.java b/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraController.java index 419077ca7..95f7e7d34 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraController.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraController.java @@ -522,7 +522,7 @@ public class CameraController implements MediaRecorder.OnInfoListener { } Camera.Parameters params = camera.getParameters(); - session.configureRoundCamera(); + session.configureRoundCamera(true); if (configureCallback != null) { configureCallback.run(); } @@ -605,13 +605,12 @@ public class CameraController implements MediaRecorder.OnInfoListener { Camera.Parameters params = camera.getParameters(); params.setFlashMode(session.getCurrentFlashMode().equals(Camera.Parameters.FLASH_MODE_ON) ? Camera.Parameters.FLASH_MODE_TORCH : Camera.Parameters.FLASH_MODE_OFF); camera.setParameters(params); + session.onStartRecord(); } catch (Exception e) { FileLog.e(e); } AndroidUtilities.runOnUIThread(() -> { - cameraView.startRecording(path, () -> { - finishRecordingVideo(); - }); + cameraView.startRecording(path, this::finishRecordingVideo); if (onVideoStartRecord != null) { onVideoStartRecord.run(); @@ -812,16 +811,21 @@ public class CameraController implements MediaRecorder.OnInfoListener { } public static Size chooseOptimalSize(List choices, int width, int height, Size aspectRatio) { + List bigEnoughWithAspectRatio = new ArrayList<>(); List bigEnough = new ArrayList<>(); int w = aspectRatio.getWidth(); int h = aspectRatio.getHeight(); for (int a = 0; a < choices.size(); a++) { Size option = choices.get(a); if (option.getHeight() == option.getWidth() * h / w && option.getWidth() >= width && option.getHeight() >= height) { + bigEnoughWithAspectRatio.add(option); + } else if (option.getHeight() * option.getWidth() <= width * height * 4) { bigEnough.add(option); } } - if (bigEnough.size() > 0) { + if (bigEnoughWithAspectRatio.size() > 0) { + return Collections.min(bigEnoughWithAspectRatio, new CompareSizesByArea()); + } else if (bigEnough.size() > 0) { return Collections.min(bigEnough, new CompareSizesByArea()); } else { return Collections.max(choices, new CompareSizesByArea()); diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraSession.java b/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraSession.java index 4dbd31dbd..6cfcfa79c 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraSession.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraSession.java @@ -48,6 +48,8 @@ public class CameraSession { private boolean flipFront = true; private float currentZoom; private boolean optimizeForBarcode; + private boolean useTorch; + private boolean isRound; public static final int ORIENTATION_HYSTERESIS = 5; @@ -59,11 +61,12 @@ public class CameraSession { } }; - public CameraSession(CameraInfo info, Size preview, Size picture, int format) { + public CameraSession(CameraInfo info, Size preview, Size picture, int format, boolean round) { previewSize = preview; pictureSize = picture; pictureFormat = format; cameraInfo = info; + isRound = round; SharedPreferences sharedPreferences = ApplicationLoader.applicationContext.getSharedPreferences("camera", Activity.MODE_PRIVATE); currentFlashMode = sharedPreferences.getString(cameraInfo.frontCamera != 0 ? "flashMode_front" : "flashMode", Camera.Parameters.FLASH_MODE_OFF); @@ -189,7 +192,7 @@ public class CameraSession { return sameTakePictureOrientation; } - protected void configureRoundCamera() { + protected void configureRoundCamera(boolean initial) { try { isVideo = true; Camera camera = cameraInfo.camera; @@ -241,16 +244,17 @@ public class CameraSession { diffOrientation = currentOrientation - displayOrientation; if (params != null) { - if (BuildVars.LOGS_ENABLED) { + if (initial && BuildVars.LOGS_ENABLED) { FileLog.d("set preview size = " + previewSize.getWidth() + " " + previewSize.getHeight()); } params.setPreviewSize(previewSize.getWidth(), previewSize.getHeight()); - if (BuildVars.LOGS_ENABLED) { + if (initial && BuildVars.LOGS_ENABLED) { FileLog.d("set picture size = " + pictureSize.getWidth() + " " + pictureSize.getHeight()); } params.setPictureSize(pictureSize.getWidth(), pictureSize.getHeight()); params.setPictureFormat(pictureFormat); params.setRecordingHint(true); + maxZoom = params.getMaxZoom(); String desiredMode = Camera.Parameters.FOCUS_MODE_CONTINUOUS_VIDEO; if (params.getSupportedFocusModes().contains(desiredMode)) { @@ -281,15 +285,19 @@ public class CameraSession { // } params.setFlashMode(Camera.Parameters.FLASH_MODE_OFF); + params.setZoom((int) (currentZoom * maxZoom)); try { camera.setParameters(params); } catch (Exception e) { + throw new RuntimeException(e); // } if (params.getMaxNumMeteringAreas() > 0) { meteringAreaSupported = true; } + + } } } catch (Throwable e) { @@ -390,7 +398,8 @@ public class CameraSession { } catch (Exception e) { // } - params.setFlashMode(currentFlashMode); + params.setFlashMode(useTorch ? Camera.Parameters.FLASH_MODE_TORCH : currentFlashMode); + try { camera.setParameters(params); } catch (Exception e) { @@ -444,9 +453,20 @@ public class CameraSession { return maxZoom; } - protected void setZoom(float value) { + public void onStartRecord() { + isVideo = true; + } + + public void setZoom(float value) { currentZoom = value; - configurePhotoCamera(); + if (isVideo && Camera.Parameters.FLASH_MODE_ON.equals(currentFlashMode)) { + useTorch = true; + } + if (isRound) { + configureRoundCamera(false); + } else { + configurePhotoCamera(); + } } protected void configureRecorder(int quality, MediaRecorder recorder) { @@ -480,6 +500,7 @@ public class CameraSession { protected void stopVideoRecording() { isVideo = false; + useTorch = false; configurePhotoCamera(); } diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraView.java b/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraView.java index f21a3006d..2b7efead8 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraView.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/camera/CameraView.java @@ -12,7 +12,6 @@ import android.animation.Animator; import android.animation.AnimatorListenerAdapter; import android.animation.ValueAnimator; import android.annotation.SuppressLint; -import android.app.Activity; import android.content.Context; import android.graphics.Bitmap; import android.graphics.Canvas; @@ -48,7 +47,6 @@ import android.view.HapticFeedbackConstants; import android.view.Surface; import android.view.TextureView; import android.view.View; -import android.view.WindowManager; import android.view.animation.DecelerateInterpolator; import android.widget.FrameLayout; import android.widget.ImageView; @@ -56,10 +54,10 @@ import android.widget.ImageView; import androidx.core.graphics.ColorUtils; import org.telegram.messenger.AndroidUtilities; -import org.telegram.messenger.ApplicationLoader; import org.telegram.messenger.BuildVars; import org.telegram.messenger.DispatchQueue; import org.telegram.messenger.FileLog; +import org.telegram.messenger.SharedConfig; import org.telegram.messenger.Utilities; import org.telegram.messenger.video.MP4Builder; import org.telegram.messenger.video.Mp4Movie; @@ -75,6 +73,7 @@ import java.nio.FloatBuffer; import java.util.ArrayList; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import javax.microedition.khronos.egl.EGL10; import javax.microedition.khronos.egl.EGLConfig; @@ -86,7 +85,6 @@ import javax.microedition.khronos.opengles.GL; @SuppressLint("NewApi") public class CameraView extends FrameLayout implements TextureView.SurfaceTextureListener { - private Size previewSize; private Size pictureSize; CameraInfo info; @@ -171,6 +169,9 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur ValueAnimator flipAnimator; boolean flipHalfReached; + private int fpsLimit = -1; + long nextFrameTimeNs; + public void startSwitchingAnimation() { if (flipAnimator != null) { flipAnimator.cancel(); @@ -243,7 +244,6 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur invalidate(); } - public interface CameraViewDelegate { void onCameraCreated(Camera camera); void onCameraInit(); @@ -275,14 +275,14 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur @Override protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { - if (pictureSize != null && cameraSession != null) { + if (previewSize != null && cameraSession != null) { int frameWidth, frameHeight; if (cameraSession.getWorldAngle() == 90 || cameraSession.getWorldAngle() == 270) { - frameWidth = pictureSize.getWidth(); - frameHeight = pictureSize.getHeight(); + frameWidth = previewSize.getWidth(); + frameHeight = previewSize.getHeight(); } else { - frameWidth = pictureSize.getHeight(); - frameHeight = pictureSize.getWidth(); + frameWidth = previewSize.getHeight(); + frameHeight = previewSize.getWidth(); } float s = Math.max(MeasureSpec.getSize(widthMeasureSpec) / (float) frameWidth , MeasureSpec.getSize(heightMeasureSpec) / (float) frameHeight); blurredStubView.getLayoutParams().width = textureView.getLayoutParams().width = (int) (s * frameWidth); @@ -292,17 +292,17 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur } public float getTextureHeight(float width, float height) { - if (pictureSize == null || cameraSession == null) { + if (previewSize == null || cameraSession == null) { return height; } int frameWidth, frameHeight; if (cameraSession.getWorldAngle() == 90 || cameraSession.getWorldAngle() == 270) { - frameWidth = pictureSize.getWidth(); - frameHeight = pictureSize.getHeight(); + frameWidth = previewSize.getWidth(); + frameHeight = previewSize.getHeight(); } else { - frameWidth = pictureSize.getHeight(); - frameHeight = pictureSize.getWidth(); + frameWidth = previewSize.getHeight(); + frameHeight = previewSize.getWidth(); } float s = Math.max(width / (float) frameWidth , height / (float) frameHeight); return (int) (s * frameHeight); @@ -365,7 +365,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur if (cameraThread == null && surface != null) { if (BuildVars.LOGS_ENABLED) { - FileLog.d("start create thread"); + FileLog.d("CameraView " + "start create thread"); } cameraThread = new CameraGLThread(surface); checkPreviewMatrix(); @@ -407,41 +407,31 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur wantedWidth = 1280; wantedHeight = 960; - photoMaxWidth = 1920; - photoMaxHeight = 1440; + if (SharedConfig.getDevicePerformanceClass() == SharedConfig.PERFORMANCE_CLASS_LOW) { + photoMaxWidth = 1280; + photoMaxHeight = 960; + } else { + photoMaxWidth = 1920; + photoMaxHeight = 1440; + } } else { aspectRatio = new Size(16, 9); wantedWidth = 1280; wantedHeight = 720; - photoMaxWidth = 1920; - photoMaxHeight = 1080; + if (SharedConfig.getDevicePerformanceClass() == SharedConfig.PERFORMANCE_CLASS_LOW) { + photoMaxWidth = 1280; + photoMaxHeight = 960; + } else { + photoMaxWidth = 1920; + photoMaxHeight = 1080; + } } } - if (textureView.getWidth() > 0 && textureView.getHeight() > 0) { - int width; - if (useMaxPreview) { - width = Math.max(AndroidUtilities.displaySize.x, AndroidUtilities.displaySize.y); - } else { - width = Math.min(AndroidUtilities.displaySize.x, AndroidUtilities.displaySize.y); - } - int height = width * aspectRatio.getHeight() / aspectRatio.getWidth(); - previewSize = CameraController.chooseOptimalSize(info.getPreviewSizes(), width, height, aspectRatio); - } - pictureSize = CameraController.chooseOptimalSize(info.getPictureSizes(), wantedWidth, wantedHeight, aspectRatio); - if (pictureSize.getWidth() >= 1280 && pictureSize.getHeight() >= 1280) { - if (Math.abs(screenSize - size4to3) < 0.1f) { - aspectRatio = new Size(3, 4); - } else { - aspectRatio = new Size(9, 16); - } - org.telegram.messenger.camera.Size pictureSize2 = CameraController.chooseOptimalSize(info.getPictureSizes(), wantedHeight, wantedWidth, aspectRatio); - if (pictureSize2.getWidth() < 1280 || pictureSize2.getHeight() < 1280) { - pictureSize = pictureSize2; - } - } - previewSize = pictureSize; + + previewSize = CameraController.chooseOptimalSize(info.getPreviewSizes(), wantedWidth, wantedHeight, aspectRatio); pictureSize = CameraController.chooseOptimalSize(info.getPictureSizes(), photoMaxWidth, photoMaxHeight, aspectRatio); + requestLayout(); } @@ -709,7 +699,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur private boolean initGL() { if (BuildVars.LOGS_ENABLED) { - FileLog.d("start init gl"); + FileLog.d("CameraView " + "start init gl"); } egl10 = (EGL10) EGLContext.getEGL(); @@ -718,6 +708,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur if (BuildVars.LOGS_ENABLED) { FileLog.e("eglGetDisplay failed " + GLUtils.getEGLErrorString(egl10.eglGetError())); } + eglDisplay = null; finish(); return false; } @@ -760,7 +751,8 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur } int[] attrib_list = {EGL_CONTEXT_CLIENT_VERSION, 2, EGL10.EGL_NONE}; eglContext = egl10.eglCreateContext(eglDisplay, eglConfig, EGL10.EGL_NO_CONTEXT, attrib_list); - if (eglContext == null) { + if (eglContext == null || eglContext == EGL10.EGL_NO_CONTEXT) { + eglContext = null; if (BuildVars.LOGS_ENABLED) { FileLog.e("eglCreateContext failed " + GLUtils.getEGLErrorString(egl10.eglGetError())); } @@ -831,9 +823,6 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur android.opengl.Matrix.setIdentityM(mMVPMatrix, 0); - cameraSurface = new SurfaceTexture(cameraTexture[0]); - cameraSurface.setOnFrameAvailableListener(surfaceTexture -> requestRender()); - createCamera(cameraSurface); if (BuildVars.LOGS_ENABLED) { FileLog.e("gl initied"); } @@ -860,6 +849,10 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur textureBuffer = ByteBuffer.allocateDirect(texData.length * 4).order(ByteOrder.nativeOrder()).asFloatBuffer(); textureBuffer.put(texData).position(0); + cameraSurface = new SurfaceTexture(cameraTexture[0]); + cameraSurface.setOnFrameAvailableListener(surfaceTexture -> requestRender()); + createCamera(cameraSurface); + return true; } @@ -916,6 +909,23 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur } } + final boolean shouldRenderFrame; + synchronized (layoutLock) { + if (fpsLimit <= 0) { + shouldRenderFrame = true; + } else { + final long currentTimeNs = System.nanoTime(); + if (currentTimeNs < nextFrameTimeNs) { + shouldRenderFrame = false; + } else { + nextFrameTimeNs += (long) (TimeUnit.SECONDS.toNanos(1) / fpsLimit);; + // The time for the next frame should always be in the future. + nextFrameTimeNs = Math.max(nextFrameTimeNs, currentTimeNs); + shouldRenderFrame = true; + } + } + } + if (currentSession == null || currentSession.cameraInfo.cameraId != cameraId) { return; } @@ -924,6 +934,10 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur videoEncoder.frameAvailable(cameraSurface, cameraId, System.nanoTime()); } + if (!shouldRenderFrame) { + return; + } + cameraSurface.getTransformMatrix(mSTMatrix); egl10.eglQuerySurface(eglDisplay, eglSurface, EGL10.EGL_WIDTH, array); @@ -992,7 +1006,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur case DO_REINIT_MESSAGE: { if (!egl10.eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) { if (BuildVars.LOGS_ENABLED) { - FileLog.d("eglMakeCurrent failed " + GLUtils.getEGLErrorString(egl10.eglGetError())); + FileLog.d("CameraView " + "eglMakeCurrent failed " + GLUtils.getEGLErrorString(egl10.eglGetError())); } return; } @@ -1018,7 +1032,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur } case DO_SETSESSION_MESSAGE: { if (BuildVars.LOGS_ENABLED) { - FileLog.d("set gl rednderer session"); + FileLog.d("CameraView " + "set gl rednderer session"); } CameraSession newSession = (CameraSession) inputMessage.obj; if (currentSession == newSession) { @@ -1119,7 +1133,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur return; } if (BuildVars.LOGS_ENABLED) { - FileLog.d("create camera session"); + FileLog.d("CameraView " + "create camera session"); } if (previewSize == null) { updateCameraInfoSize(); @@ -1129,14 +1143,14 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur } surfaceTexture.setDefaultBufferSize(previewSize.getWidth(), previewSize.getHeight()); - cameraSession = new CameraSession(info, previewSize, pictureSize, ImageFormat.JPEG); + cameraSession = new CameraSession(info, previewSize, pictureSize, ImageFormat.JPEG, false); cameraThread.setCurrentSession(cameraSession); requestLayout(); CameraController.getInstance().open(cameraSession, surfaceTexture, () -> { if (cameraSession != null) { if (BuildVars.LOGS_ENABLED) { - FileLog.d("camera initied"); + FileLog.d("CameraView " + "camera initied"); } cameraSession.setInitied(); } @@ -1297,8 +1311,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur Size pictureSize; int bitrate; - pictureSize = new Size(16, 9); - pictureSize = CameraController.chooseOptimalSize(info.getPictureSizes(), 720, 480, pictureSize); + pictureSize = previewSize; if (Math.min(pictureSize.mHeight, pictureSize.mWidth) >= 720) { bitrate = 3500000; } else { @@ -1359,7 +1372,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur zeroTimeStamps++; if (zeroTimeStamps > 1) { if (BuildVars.LOGS_ENABLED) { - FileLog.d("fix timestamp enabled"); + FileLog.d("CameraView " + "fix timestamp enabled"); } timestamp = timestampInternal; } else { @@ -1395,7 +1408,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur if (audioFirst == -1) { if (videoFirst == -1) { if (BuildVars.LOGS_ENABLED) { - FileLog.d("video record not yet started"); + FileLog.d("CameraView " + "video record not yet started"); } return; } @@ -1407,7 +1420,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur audioFirst = input.offset[a]; ok = true; if (BuildVars.LOGS_ENABLED) { - FileLog.d("detected desync between audio and video " + desyncTime); + FileLog.d("CameraView " + "detected desync between audio and video " + desyncTime); } break; } @@ -1416,18 +1429,18 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur audioFirst = input.offset[a]; ok = true; if (BuildVars.LOGS_ENABLED) { - FileLog.d("found first audio frame at " + a + " timestamp = " + input.offset[a]); + FileLog.d("CameraView " + "found first audio frame at " + a + " timestamp = " + input.offset[a]); } break; } else { if (BuildVars.LOGS_ENABLED) { - FileLog.d("ignore first audio frame at " + a + " timestamp = " + input.offset[a]); + FileLog.d("CameraView " + "ignore first audio frame at " + a + " timestamp = " + input.offset[a]); } } } if (!ok) { if (BuildVars.LOGS_ENABLED) { - FileLog.d("first audio frame not found, removing buffers " + input.results); + FileLog.d("CameraView " + "first audio frame not found, removing buffers " + input.results); } buffersToWrite.remove(input); } else { @@ -1470,7 +1483,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur if (a < input.results) { if (!running && input.offset[a] >= videoLast - desyncTime) { if (BuildVars.LOGS_ENABLED) { - FileLog.d("stop audio encoding because of stoped video recording at " + input.offset[a] + " last video " + videoLast); + FileLog.d("CameraView " + "stop audio encoding because of stoped video recording at " + input.offset[a] + " last video " + videoLast); } audioStopedByTime = true; isLast = true; @@ -1541,7 +1554,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur if (videoFirst == -1) { videoFirst = timestampNanos / 1000; if (BuildVars.LOGS_ENABLED) { - FileLog.d("first video frame was at " + videoFirst); + FileLog.d("CameraView " + "first video frame was at " + videoFirst); } } videoLast = timestampNanos; @@ -1632,6 +1645,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur handler.exit(); AndroidUtilities.runOnUIThread(() -> { + cameraSession.stopVideoRecording(); onRecordingFinishRunnable.run(); }); } @@ -1652,7 +1666,7 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur audioRecorder = new AudioRecord(MediaRecorder.AudioSource.DEFAULT, 44100, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize); audioRecorder.startRecording(); if (BuildVars.LOGS_ENABLED) { - FileLog.d("initied audio record with channels " + audioRecorder.getChannelCount() + " sample rate = " + audioRecorder.getSampleRate() + " bufferSize = " + bufferSize); + FileLog.d("CameraView " + "initied audio record with channels " + audioRecorder.getChannelCount() + " sample rate = " + audioRecorder.getSampleRate() + " bufferSize = " + bufferSize); } Thread thread = new Thread(recorderRunnable); thread.setPriority(Thread.MAX_PRIORITY); @@ -2019,4 +2033,8 @@ public class CameraView extends FrameLayout implements TextureView.SurfaceTextur Looper.myLooper().quit(); } } + + public void setFpsLimit(int fpsLimit) { + this.fpsLimit = fpsLimit; + } } diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/voip/VideoCapturerDevice.java b/TMessagesProj/src/main/java/org/telegram/messenger/voip/VideoCapturerDevice.java index 93e403ffa..7cfd3027b 100644 --- a/TMessagesProj/src/main/java/org/telegram/messenger/voip/VideoCapturerDevice.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/voip/VideoCapturerDevice.java @@ -214,6 +214,13 @@ public class VideoCapturerDevice { }); } + public static MediaProjection getMediaProjection() { + if (instance[1] == null) { + return null; + } + return ((ScreenCapturerAndroid) instance[1].videoCapturer).getMediaProjection(); + } + private void onAspectRatioRequested(float aspectRatio) { /*if (aspectRatio < 0.0001f) { return; diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/voip/VoIPService.java b/TMessagesProj/src/main/java/org/telegram/messenger/voip/VoIPService.java index ab535872d..b3abc4fee 100755 --- a/TMessagesProj/src/main/java/org/telegram/messenger/voip/VoIPService.java +++ b/TMessagesProj/src/main/java/org/telegram/messenger/voip/VoIPService.java @@ -472,10 +472,8 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa } } unmutedByHold = !micMute && hold; - for (int a = 0; a < tgVoip.length; a++) { - if (tgVoip[a] != null) { - tgVoip[a].setMuteMicrophone(mute); - } + if (tgVoip[CAPTURE_DEVICE_CAMERA] != null) { + tgVoip[CAPTURE_DEVICE_CAMERA].setMuteMicrophone(mute); } for (StateListener l : stateListeners) { l.onAudioSettingsChanged(); @@ -1770,7 +1768,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa } } - private void startScreenCapture(String json) { + private void startScreenCapture(int ssrc, String json) { if (getSharedInstance() == null || groupCall == null) { return; } @@ -1781,6 +1779,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa req.params.data = json; ConnectionsManager.getInstance(currentAccount).sendRequest(req, (response, error) -> { if (response != null) { + AndroidUtilities.runOnUIThread(() -> mySource[CAPTURE_DEVICE_SCREEN] = ssrc); TLRPC.Updates updates = (TLRPC.Updates) response; AndroidUtilities.runOnUIThread(() -> { if (tgVoip[CAPTURE_DEVICE_SCREEN] != null) { @@ -1798,10 +1797,14 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa TLRPC.TL_groupCallParticipant participant = updateGroupCallParticipants.participants.get(b); if (MessageObject.getPeerId(participant.peer) == selfId) { if (participant.presentation != null) { - for (int c = 0, N3 = participant.presentation.source_groups.size(); c < N3; c++) { - TLRPC.TL_groupCallParticipantVideoSourceGroup sourceGroup = participant.presentation.source_groups.get(c); - if (sourceGroup.sources.size() > 0) { - mySource[CAPTURE_DEVICE_SCREEN] = sourceGroup.sources.get(0); + if ((participant.presentation.flags & 2) != 0) { + mySource[CAPTURE_DEVICE_SCREEN] = participant.presentation.audio_source; + } else { + for (int c = 0, N3 = participant.presentation.source_groups.size(); c < N3; c++) { + TLRPC.TL_groupCallParticipantVideoSourceGroup sourceGroup = participant.presentation.source_groups.get(c); + if (sourceGroup.sources.size() > 0) { + mySource[CAPTURE_DEVICE_SCREEN] = sourceGroup.sources.get(0); + } } } } @@ -1816,7 +1819,9 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa startGroupCheckShortpoll(); } else { AndroidUtilities.runOnUIThread(() -> { - if ("JOIN_AS_PEER_INVALID".equals(error.text)) { + if ("GROUPCALL_VIDEO_TOO_MUCH".equals(error.text)) { + groupCall.reloadGroupCall(); + } else if ("JOIN_AS_PEER_INVALID".equals(error.text)) { TLRPC.ChatFull chatFull = MessagesController.getInstance(currentAccount).getChatFull(chat.id); if (chatFull != null) { if (chatFull instanceof TLRPC.TL_chatFull) { @@ -1915,9 +1920,9 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa public int audioSsrc; public TLRPC.TL_groupCallParticipant participant; - public RequestedParticipant(TLRPC.TL_groupCallParticipant p) { + public RequestedParticipant(TLRPC.TL_groupCallParticipant p, int ssrc) { participant = p; - audioSsrc = p.source; + audioSsrc = ssrc; } } @@ -1941,7 +1946,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa if (participants == null) { participants = new ArrayList<>(); } - participants.add(new RequestedParticipant(p)); + participants.add(new RequestedParticipant(p, unknown[a])); } if (participants != null) { int[] ssrcs = new int[participants.size()]; @@ -1984,7 +1989,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa if (type == CAPTURE_DEVICE_CAMERA) { startGroupCall(ssrc, json, true); } else { - startScreenCapture(json); + startScreenCapture(ssrc, json); } }, (uids, levels, voice) -> { if (sharedInstance == null || groupCall == null || type != CAPTURE_DEVICE_CAMERA) { @@ -2156,16 +2161,19 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa continue; } if (participant.muted_by_you) { - instance.setVolume(participant.source, 0); + setParticipantVolume(participant, 0); } else { - instance.setVolume(participant.source, ChatObject.getParticipantVolume(participant) / 10000.0); + setParticipantVolume(participant, ChatObject.getParticipantVolume(participant)); } } } } - public void setParticipantVolume(int ssrc, int volume) { - tgVoip[CAPTURE_DEVICE_CAMERA].setVolume(ssrc, volume / 10000.0); + public void setParticipantVolume(TLRPC.TL_groupCallParticipant participant, int volume) { + tgVoip[CAPTURE_DEVICE_CAMERA].setVolume(participant.source, volume / 10000.0); + if (participant.presentation != null && participant.presentation.audio_source != 0) { + tgVoip[CAPTURE_DEVICE_CAMERA].setVolume(participant.presentation.audio_source, volume / 10000.0); + } } public boolean isSwitchingStream() { @@ -2540,6 +2548,10 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa AccountInstance.getInstance(account).getConnectionsManager().sendRequest(req, (response, error) -> { if (response != null) { AccountInstance.getInstance(account).getMessagesController().processUpdates((TLRPC.Updates) response, false); + } else if (error != null) { + if ("GROUPCALL_VIDEO_TOO_MUCH".equals(error.text)) { + groupCall.reloadGroupCall(); + } } if (onComplete != null) { AndroidUtilities.runOnUIThread(onComplete); @@ -2632,6 +2644,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa am.setBluetoothScoOn(true); am.setSpeakerphoneOn(false); } + audioRouteToSet = AUDIO_ROUTE_BLUETOOTH; break; case 1: needSwitchToBluetoothAfterScoActivates = false; @@ -2642,6 +2655,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa } am.setSpeakerphoneOn(false); am.setBluetoothScoOn(false); + audioRouteToSet = AUDIO_ROUTE_EARPIECE; break; case 0: needSwitchToBluetoothAfterScoActivates = false; @@ -2652,6 +2666,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa } am.setBluetoothScoOn(false); am.setSpeakerphoneOn(true); + audioRouteToSet = AUDIO_ROUTE_SPEAKER; break; } updateOutputGainControlState(); @@ -3461,6 +3476,11 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa am.setBluetoothScoOn(speakerphoneStateToSet); } else { am.setSpeakerphoneOn(speakerphoneStateToSet); + if (speakerphoneStateToSet) { + audioRouteToSet = AUDIO_ROUTE_SPEAKER; + } else { + audioRouteToSet = AUDIO_ROUTE_EARPIECE; + } } updateOutputGainControlState(); audioConfigured = true; @@ -3503,7 +3523,7 @@ public class VoIPService extends Service implements SensorEventListener, AudioMa } if (event.sensor.getType() == Sensor.TYPE_PROXIMITY) { AudioManager am = (AudioManager) getSystemService(AUDIO_SERVICE); - if (isHeadsetPlugged || am.isSpeakerphoneOn() || (isBluetoothHeadsetConnected() && am.isBluetoothScoOn())) { + if (audioRouteToSet != AUDIO_ROUTE_EARPIECE || isHeadsetPlugged || am.isSpeakerphoneOn() || (isBluetoothHeadsetConnected() && am.isBluetoothScoOn())) { return; } boolean newIsNear = event.values[0] < Math.min(event.sensor.getMaximumRange(), 3); diff --git a/TMessagesProj/src/main/java/org/telegram/tgnet/TLRPC.java b/TMessagesProj/src/main/java/org/telegram/tgnet/TLRPC.java index 8075517fe..6b50601f6 100644 --- a/TMessagesProj/src/main/java/org/telegram/tgnet/TLRPC.java +++ b/TMessagesProj/src/main/java/org/telegram/tgnet/TLRPC.java @@ -68,7 +68,7 @@ public class TLRPC { public static final int MESSAGE_FLAG_HAS_BOT_ID = 0x00000800; public static final int MESSAGE_FLAG_EDITED = 0x00008000; - public static final int LAYER = 130; + public static final int LAYER = 131; public static class TL_stats_megagroupStats extends TLObject { public static int constructor = 0xef7ff916; @@ -2015,6 +2015,8 @@ public class TLRPC { public int stream_dc_id; public int record_start_date; public int schedule_date; + public int unmuted_video_count; + public int unmuted_video_limit; public int version; public int duration; @@ -2024,7 +2026,7 @@ public class TLRPC { case 0x7780bcb4: result = new TL_groupCallDiscarded(); break; - case 0x653dbaad: + case 0xd597650c: result = new TL_groupCall(); break; } @@ -2057,7 +2059,7 @@ public class TLRPC { } public static class TL_groupCall extends GroupCall { - public static int constructor = 0x653dbaad; + public static int constructor = 0xd597650c; public void readParams(AbstractSerializedData stream, boolean exception) { @@ -2082,6 +2084,10 @@ public class TLRPC { if ((flags & 128) != 0) { schedule_date = stream.readInt32(exception); } + if ((flags & 1024) != 0) { + unmuted_video_count = stream.readInt32(exception); + } + unmuted_video_limit = stream.readInt32(exception); version = stream.readInt32(exception); } @@ -2108,11 +2114,15 @@ public class TLRPC { if ((flags & 128) != 0) { stream.writeInt32(schedule_date); } + if ((flags & 1024) != 0) { + stream.writeInt32(unmuted_video_count); + } + stream.writeInt32(unmuted_video_limit); stream.writeInt32(version); } } - public static class TL_channelBannedRights_layer92 extends TLObject { + public static class TL_channelBannedRights_layer92 extends TLObject { public static int constructor = 0x58cf4249; public int flags; @@ -20439,12 +20449,13 @@ public class TLRPC { } public static class TL_groupCallParticipantVideo extends TLObject { - public static int constructor = 0x78e41663; + public static int constructor = 0x67753ac8; public int flags; public boolean paused; public String endpoint; public ArrayList source_groups = new ArrayList<>(); + public int audio_source; public static TL_groupCallParticipantVideo TLdeserialize(AbstractSerializedData stream, int constructor, boolean exception) { if (TL_groupCallParticipantVideo.constructor != constructor) { @@ -20478,6 +20489,9 @@ public class TLRPC { } source_groups.add(object); } + if ((flags & 2) != 0) { + audio_source = stream.readInt32(exception); + } } public void serializeToStream(AbstractSerializedData stream) { @@ -20491,6 +20505,9 @@ public class TLRPC { for (int a = 0; a < count; a++) { source_groups.get(a).serializeToStream(stream); } + if ((flags & 2) != 0) { + stream.writeInt32(audio_source); + } } } @@ -24366,6 +24383,9 @@ public class TLRPC { case 0xb783982: result = new TL_updateGroupCallConnection(); break; + case 0xcf7e0873: + result = new TL_updateBotCommands(); + break; case 0x871fb939: result = new TL_updateGeoLiveViewed(); break; @@ -25004,6 +25024,46 @@ public class TLRPC { } } + public static class TL_updateBotCommands extends Update { + public static int constructor = 0xcf7e0873; + + public Peer peer; + public int bot_id; + public ArrayList commands = new ArrayList<>(); + + public void readParams(AbstractSerializedData stream, boolean exception) { + peer = Peer.TLdeserialize(stream, stream.readInt32(exception), exception); + bot_id = stream.readInt32(exception); + int magic = stream.readInt32(exception); + if (magic != 0x1cb5c415) { + if (exception) { + throw new RuntimeException(String.format("wrong Vector magic, got %x", magic)); + } + return; + } + int count = stream.readInt32(exception); + for (int a = 0; a < count; a++) { + TL_botCommand object = TL_botCommand.TLdeserialize(stream, stream.readInt32(exception), exception); + if (object == null) { + return; + } + commands.add(object); + } + } + + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + peer.serializeToStream(stream); + stream.writeInt32(bot_id); + stream.writeInt32(0x1cb5c415); + int count = commands.size(); + stream.writeInt32(count); + for (int a = 0; a < count; a++) { + commands.get(a).serializeToStream(stream); + } + } + } + public static class TL_updateGeoLiveViewed extends Update { public static int constructor = 0x871fb939; @@ -27082,7 +27142,7 @@ public class TLRPC { } public static class TL_account_password extends TLObject { - public static int constructor = 0xad2641f8; + public static int constructor = 0x185b184f; public int flags; public boolean has_recovery; @@ -27096,6 +27156,7 @@ public class TLRPC { public PasswordKdfAlgo new_algo; public SecurePasswordKdfAlgo new_secure_algo; public byte[] secure_random; + public int pending_reset_date; public static TL_account_password TLdeserialize(AbstractSerializedData stream, int constructor, boolean exception) { if (TL_account_password.constructor != constructor) { @@ -27133,6 +27194,9 @@ public class TLRPC { new_algo = PasswordKdfAlgo.TLdeserialize(stream, stream.readInt32(exception), exception); new_secure_algo = SecurePasswordKdfAlgo.TLdeserialize(stream, stream.readInt32(exception), exception); secure_random = stream.readByteArray(exception); + if ((flags & 32) != 0) { + pending_reset_date = stream.readInt32(exception); + } } public void serializeToStream(AbstractSerializedData stream) { @@ -27159,6 +27223,9 @@ public class TLRPC { new_algo.serializeToStream(stream); new_secure_algo.serializeToStream(stream); stream.writeByteArray(secure_random); + if ((flags & 32) != 0) { + stream.writeInt32(pending_reset_date); + } } } @@ -39523,6 +39590,70 @@ public class TLRPC { } } + public static abstract class account_ResetPasswordResult extends TLObject { + + public static account_ResetPasswordResult TLdeserialize(AbstractSerializedData stream, int constructor, boolean exception) { + account_ResetPasswordResult result = null; + switch (constructor) { + case 0xe3779861: + result = new TL_account_resetPasswordFailedWait(); + break; + case 0xe9effc7d: + result = new TL_account_resetPasswordRequestedWait(); + break; + case 0xe926d63e: + result = new TL_account_resetPasswordOk(); + break; + } + if (result == null && exception) { + throw new RuntimeException(String.format("can't parse magic %x in account_ResetPasswordResult", constructor)); + } + if (result != null) { + result.readParams(stream, exception); + } + return result; + } + } + + public static class TL_account_resetPasswordFailedWait extends account_ResetPasswordResult { + public static int constructor = 0xe3779861; + + public int retry_date; + + public void readParams(AbstractSerializedData stream, boolean exception) { + retry_date = stream.readInt32(exception); + } + + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + stream.writeInt32(retry_date); + } + } + + public static class TL_account_resetPasswordRequestedWait extends account_ResetPasswordResult { + public static int constructor = 0xe9effc7d; + + public int until_date; + + public void readParams(AbstractSerializedData stream, boolean exception) { + until_date = stream.readInt32(exception); + } + + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + stream.writeInt32(until_date); + } + } + + public static class TL_account_resetPasswordOk extends account_ResetPasswordResult { + public static int constructor = 0xe926d63e; + + + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + } + } + public static abstract class messages_Dialogs extends TLObject { public int count; @@ -40056,6 +40187,21 @@ public class TLRPC { } } + public static class TL_auth_checkRecoveryPassword extends TLObject { + public static int constructor = 0xd36bf79; + + public String code; + + public TLObject deserializeResponse(AbstractSerializedData stream, int constructor, boolean exception) { + return Bool.TLdeserialize(stream, constructor, exception); + } + + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + stream.writeString(code); + } + } + public static class TL_account_registerDevice extends TLObject { public static int constructor = 0x68976c6f; @@ -40262,6 +40408,32 @@ public class TLRPC { } } + public static class TL_account_resetPassword extends TLObject { + public static int constructor = 0x9308ce1b; + + + public TLObject deserializeResponse(AbstractSerializedData stream, int constructor, boolean exception) { + return account_ResetPasswordResult.TLdeserialize(stream, constructor, exception); + } + + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + } + } + + public static class TL_account_declinePasswordReset extends TLObject { + public static int constructor = 0x4c9409f6; + + + public TLObject deserializeResponse(AbstractSerializedData stream, int constructor, boolean exception) { + return Bool.TLdeserialize(stream, constructor, exception); + } + + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + } + } + public static class TL_users_getFullUser extends TLObject { public static int constructor = 0xca30a5b1; @@ -43072,19 +43244,25 @@ public class TLRPC { } public static class TL_auth_recoverPassword extends TLObject { - public static int constructor = 0x4ea56e92; + public static int constructor = 0x37096c70; - public String code; + public int flags; + public String code; + public TL_account_passwordInputSettings new_settings; - public TLObject deserializeResponse(AbstractSerializedData stream, int constructor, boolean exception) { - return auth_Authorization.TLdeserialize(stream, constructor, exception); - } + public TLObject deserializeResponse(AbstractSerializedData stream, int constructor, boolean exception) { + return auth_Authorization.TLdeserialize(stream, constructor, exception); + } - public void serializeToStream(AbstractSerializedData stream) { - stream.writeInt32(constructor); - stream.writeString(code); - } - } + public void serializeToStream(AbstractSerializedData stream) { + stream.writeInt32(constructor); + stream.writeInt32(flags); + stream.writeString(code); + if ((flags & 1) != 0) { + new_settings.serializeToStream(stream); + } + } + } public static class TL_auth_resendCode extends TLObject { public static int constructor = 0x3ef1a9bf; @@ -46321,9 +46499,10 @@ public class TLRPC { } public static class TL_phone_getGroupCall extends TLObject { - public static int constructor = 0xc7cb017; + public static int constructor = 0x41845db; public TL_inputGroupCall call; + public int limit; public TLObject deserializeResponse(AbstractSerializedData stream, int constructor, boolean exception) { return TL_phone_groupCall.TLdeserialize(stream, constructor, exception); @@ -46332,6 +46511,7 @@ public class TLRPC { public void serializeToStream(AbstractSerializedData stream) { stream.writeInt32(constructor); call.serializeToStream(stream); + stream.writeInt32(limit); } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarLayout.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarLayout.java index a802fc4dd..27639ced9 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarLayout.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarLayout.java @@ -24,6 +24,9 @@ import android.graphics.Rect; import android.graphics.drawable.ColorDrawable; import android.graphics.drawable.Drawable; import android.os.Build; +import androidx.annotation.Keep; + +import android.os.SystemClock; import android.view.Gravity; import android.view.HapticFeedbackConstants; import android.view.KeyEvent; @@ -40,6 +43,7 @@ import android.widget.FrameLayout; import androidx.annotation.Keep; import org.telegram.messenger.AndroidUtilities; +import org.telegram.messenger.ApplicationLoader; import org.telegram.messenger.FileLog; import org.telegram.messenger.MessagesController; import org.telegram.messenger.R; @@ -347,7 +351,13 @@ public class ActionBarLayout extends FrameLayout { public void drawHeaderShadow(Canvas canvas, int alpha, int y) { if (headerShadowDrawable != null) { - headerShadowDrawable.setAlpha(alpha); + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) { + if (headerShadowDrawable.getAlpha() != alpha) { + headerShadowDrawable.setAlpha(alpha); + } + } else { + headerShadowDrawable.setAlpha(alpha); + } headerShadowDrawable.setBounds(0, y, getMeasuredWidth(), y + headerShadowDrawable.getIntrinsicHeight()); headerShadowDrawable.draw(canvas); } @@ -358,7 +368,7 @@ public class ActionBarLayout extends FrameLayout { innerTranslationX = value; invalidate(); - if (fragmentsStack.size() >= 2) { + if (fragmentsStack.size() >= 2 && containerView.getMeasuredWidth() > 0) { BaseFragment prevFragment = fragmentsStack.get(fragmentsStack.size() - 2); prevFragment.onSlideProgress(false, value / containerView.getMeasuredWidth()); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarMenuSubItem.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarMenuSubItem.java index c633cad9e..07496d09f 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarMenuSubItem.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarMenuSubItem.java @@ -177,6 +177,9 @@ public class ActionBarMenuSubItem extends FrameLayout { } public void updateSelectorBackground(boolean top, boolean bottom) { + if (this.top == top && this.bottom == bottom) { + return; + } this.top = top; this.bottom = bottom; updateBackground(); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarPopupWindow.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarPopupWindow.java index d3da4c3ad..0c9f7ab7d 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarPopupWindow.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/ActionBarPopupWindow.java @@ -147,6 +147,9 @@ public class ActionBarPopupWindow extends PopupWindow { } Object tag = view.getTag(R.id.width_tag); Object tag2 = view.getTag(R.id.object_tag); + if (tag != null) { + view.getLayoutParams().width = LayoutHelper.WRAP_CONTENT; + } measureChildWithMargins(view, widthMeasureSpec, 0, heightMeasureSpec, 0); if (!(tag instanceof Integer) && tag2 == null) { maxWidth = Math.max(maxWidth, view.getMeasuredWidth()); @@ -405,13 +408,20 @@ public class ActionBarPopupWindow extends PopupWindow { lastVisible = child; } + boolean prevGap = false; for (int a = 0; a < count; a++) { View child = linearLayout.getChildAt(a); if (child.getVisibility() != View.VISIBLE) { continue; } + Object tag = child.getTag(R.id.object_tag); if (child instanceof ActionBarMenuSubItem) { - ((ActionBarMenuSubItem) child).updateSelectorBackground(child == firstVisible, child == lastVisible); + ((ActionBarMenuSubItem) child).updateSelectorBackground(child == firstVisible || prevGap, child == lastVisible); + } + if (tag != null) { + prevGap = true; + } else { + prevGap = false; } } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingActionMode.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingActionMode.java index 386c190dd..c5c923804 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingActionMode.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingActionMode.java @@ -16,6 +16,7 @@ package org.telegram.ui.ActionBar; +import android.accounts.AccountManager; import android.annotation.TargetApi; import android.content.Context; import android.graphics.Point; @@ -31,7 +32,10 @@ import android.view.WindowManager; import android.widget.PopupMenu; import android.widget.PopupWindow; +import org.telegram.messenger.AccountInstance; import org.telegram.messenger.AndroidUtilities; +import org.telegram.messenger.NotificationCenter; +import org.telegram.messenger.UserConfig; import java.util.Arrays; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingToolbar.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingToolbar.java index 7deee0e11..3a622ed71 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingToolbar.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/FloatingToolbar.java @@ -59,7 +59,9 @@ import android.widget.TextView; import org.telegram.messenger.AndroidUtilities; import org.telegram.messenger.LocaleController; +import org.telegram.messenger.NotificationCenter; import org.telegram.messenger.R; +import org.telegram.messenger.UserConfig; import java.util.ArrayList; import java.util.Collections; @@ -347,14 +349,18 @@ public final class FloatingToolbar { mDismissAnimation = createExitAnimation(mContentContainer, 150, new AnimatorListenerAdapter() { @Override public void onAnimationEnd(Animator animation) { - mPopupWindow.dismiss(); - mContentContainer.removeAllViews(); + NotificationCenter.getInstance(UserConfig.selectedAccount).doOnIdle(() -> { + mPopupWindow.dismiss(); + mContentContainer.removeAllViews(); + }); } }); mHideAnimation = createExitAnimation(mContentContainer, 0, new AnimatorListenerAdapter() { @Override public void onAnimationEnd(Animator animation) { - mPopupWindow.dismiss(); + NotificationCenter.getInstance(UserConfig.selectedAccount).doOnIdle(() -> { + mPopupWindow.dismiss(); + }); } }); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/MenuDrawable.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/MenuDrawable.java index 5daf62ece..ca1a0a1a1 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/MenuDrawable.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/MenuDrawable.java @@ -136,6 +136,7 @@ public class MenuDrawable extends Drawable { lastFrameTime = newTime; canvas.save(); + canvas.translate(getIntrinsicWidth() / 2 - AndroidUtilities.dp(9), getIntrinsicHeight() / 2); float endYDiff; float endXDiff; @@ -172,11 +173,12 @@ public class MenuDrawable extends Drawable { canvas.rotate(currentRotation * (reverseAngle ? -225 : 135), AndroidUtilities.dp(9), 0); if (miniIcon) { paint.setColor(color1); - canvas.drawLine(AndroidUtilities.dp(2), 0, AndroidUtilities.dp(16) - diffMiddle, 0, paint); - endYDiff = AndroidUtilities.dp(5) * (1 - Math.abs(currentRotation)) - AndroidUtilities.dp(0.25f) * Math.abs(currentRotation); - endXDiff = AndroidUtilities.dp(16) - AndroidUtilities.dp(6.25f) * Math.abs(currentRotation); - startYDiff = AndroidUtilities.dp(5) + AndroidUtilities.dp(1.25f) * Math.abs(currentRotation); - startXDiff = AndroidUtilities.dp(2) + AndroidUtilities.dp(7) * Math.abs(currentRotation); + canvas.drawLine(AndroidUtilities.dpf2(2) * (1 - Math.abs(currentRotation)) + AndroidUtilities.dp(1) * currentRotation, 0, AndroidUtilities.dpf2(16) * (1f - currentRotation) + AndroidUtilities.dp(17) * currentRotation - diffMiddle, 0, paint); + endYDiff = AndroidUtilities.dpf2(5) * (1 - Math.abs(currentRotation)) - AndroidUtilities.dpf2(0.5f) * Math.abs(currentRotation); + endXDiff = AndroidUtilities.dpf2(16) * (1 - Math.abs(currentRotation)) + (AndroidUtilities.dpf2(9)) * Math.abs(currentRotation); + startYDiff = AndroidUtilities.dpf2(5) + AndroidUtilities.dpf2(3.0f) * Math.abs(currentRotation); + startXDiff = AndroidUtilities.dpf2(2) + AndroidUtilities.dpf2(7) * Math.abs(currentRotation); + } else { int color2 = Theme.getColor(Theme.key_actionBarActionModeDefaultIcon); int backColor2 = Theme.getColor(Theme.key_actionBarActionModeDefault); @@ -189,8 +191,13 @@ public class MenuDrawable extends Drawable { startXDiff = AndroidUtilities.dp(9) * Math.abs(currentRotation); } } - canvas.drawLine(startXDiff, -startYDiff, endXDiff - diffUp, -endYDiff, paint); - canvas.drawLine(startXDiff, startYDiff, endXDiff, endYDiff, paint); + if (miniIcon) { + canvas.drawLine(startXDiff, -startYDiff, endXDiff, -endYDiff, paint); + canvas.drawLine(startXDiff, startYDiff, endXDiff, endYDiff, paint); + } else { + canvas.drawLine(startXDiff, -startYDiff, endXDiff - diffUp, -endYDiff, paint); + canvas.drawLine(startXDiff, startYDiff, endXDiff, endYDiff, paint); + } if (type != TYPE_DEFAULT && currentRotation != 1.0f || previousType != TYPE_DEFAULT && typeAnimationProgress != 1.0f) { float cx = AndroidUtilities.dp(9 + 8); float cy = -AndroidUtilities.dp(4.5f); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/SimpleTextView.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/SimpleTextView.java index e8c12ff88..dff5b50f7 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/SimpleTextView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/SimpleTextView.java @@ -338,6 +338,10 @@ public class SimpleTextView extends View implements Drawable.Callback { setLeftDrawable(resId == 0 ? null : getContext().getResources().getDrawable(resId)); } + public Drawable getLeftDrawable() { + return leftDrawable; + } + public void setRightDrawable(int resId) { setRightDrawable(resId == 0 ? null : getContext().getResources().getDrawable(resId)); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/Theme.java b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/Theme.java index b881d6fd9..28e2dbcf5 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/Theme.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ActionBar/Theme.java @@ -172,6 +172,8 @@ public class Theme { public static final int TYPE_MEDIA = 1; public static final int TYPE_PREVIEW = 2; + Drawable transitionDrawable; + int transitionDrawableColor; private int alpha; public MessageDrawable(int type, boolean out, boolean selected) { @@ -336,6 +338,29 @@ public class Theme { return backgroundDrawable[idx2][idx]; } + public Drawable getTransitionDrawable(int color) { + if (transitionDrawable == null) { + Bitmap bitmap = Bitmap.createBitmap(dp(50), dp(40), Bitmap.Config.ARGB_8888); + Canvas canvas = new Canvas(bitmap); + + backupRect.set(getBounds()); + + Paint shadowPaint = new Paint(Paint.ANTI_ALIAS_FLAG); + shadowPaint.setColor(0xffffffff); + setBounds(0, 0, bitmap.getWidth(), bitmap.getHeight()); + draw(canvas, shadowPaint); + + transitionDrawable = new NinePatchDrawable(bitmap, getByteBuffer(bitmap.getWidth() / 2 - 1, bitmap.getWidth() / 2 + 1, bitmap.getHeight() / 2 - 1, bitmap.getHeight() / 2 + 1).array(), new Rect(), null); + setBounds(backupRect); + } + if (transitionDrawableColor != color) { + transitionDrawableColor = color; + transitionDrawable.setColorFilter(new PorterDuffColorFilter(color, PorterDuff.Mode.MULTIPLY)); + } + + return transitionDrawable; + } + public Drawable getShadowDrawable() { if (gradientShader == null && !isSelected) { return null; @@ -6427,6 +6452,7 @@ public class Theme { ThemeInfo.fillAccentValues(accent, theme.settings); if (currentTheme == themeInfo && currentTheme.currentAccentId == accent.id) { refreshThemeColors(); + createChatResources(ApplicationLoader.applicationContext, false); NotificationCenter.getGlobalInstance().postNotificationName(NotificationCenter.needSetDayNightTheme, currentTheme, currentNightTheme == currentTheme, null, -1); } PatternsLoader.createLoader(true); @@ -6750,34 +6776,34 @@ public class Theme { Integer gradientToColor3 = colors.get(key_chat_wallpaper_gradient_to3); int defaultBackgroundColor = backgroundColor != null ? backgroundColor : 0; - int backgroundOverrideColor = (int) accent.backgroundOverrideColor; + int backgroundOverrideColor = accent != null ? (int) accent.backgroundOverrideColor : 0; int backColor; - if (backgroundOverrideColor == 0 && accent.backgroundOverrideColor != 0) { + if (backgroundOverrideColor == 0 && accent != null && accent.backgroundOverrideColor != 0) { backColor = 0; } else { backColor = backgroundOverrideColor != 0 ? backgroundOverrideColor : defaultBackgroundColor; } int defaultBackgroundGradient1 = gradientToColor1 != null ? gradientToColor1 : 0; - int backgroundGradientOverrideColor1 = (int) accent.backgroundGradientOverrideColor1; + int backgroundGradientOverrideColor1 = accent != null ? (int) accent.backgroundGradientOverrideColor1 : 0; int color1; - if (backgroundGradientOverrideColor1 == 0 && accent.backgroundGradientOverrideColor1 != 0) { + if (backgroundGradientOverrideColor1 == 0 && accent != null && accent.backgroundGradientOverrideColor1 != 0) { color1 = 0; } else { color1 = backgroundGradientOverrideColor1 != 0 ? backgroundGradientOverrideColor1 : defaultBackgroundGradient1; } int defaultBackgroundGradient2 = gradientToColor2 != null ? gradientToColor2 : 0; - int backgroundGradientOverrideColor2 = (int) accent.backgroundGradientOverrideColor2; + int backgroundGradientOverrideColor2 = accent != null ? (int) accent.backgroundGradientOverrideColor2 : 0; int color2; - if (backgroundGradientOverrideColor2 == 0 && accent.backgroundGradientOverrideColor2 != 0) { + if (backgroundGradientOverrideColor2 == 0 && accent != null && accent.backgroundGradientOverrideColor2 != 0) { color2 = 0; } else { color2 = backgroundGradientOverrideColor2 != 0 ? backgroundGradientOverrideColor2 : defaultBackgroundGradient2; } int defaultBackgroundGradient3 = gradientToColor3 != null ? gradientToColor3 : 0; - int backgroundGradientOverrideColor3 = (int) accent.backgroundGradientOverrideColor3; + int backgroundGradientOverrideColor3 = accent != null ? (int) accent.backgroundGradientOverrideColor3 : 0; int color3; - if (backgroundGradientOverrideColor3 == 0 && accent.backgroundGradientOverrideColor3 != 0) { + if (backgroundGradientOverrideColor3 == 0 && accent != null && accent.backgroundGradientOverrideColor3 != 0) { color3 = 0; } else { color3 = backgroundGradientOverrideColor3 != 0 ? backgroundGradientOverrideColor3 : defaultBackgroundGradient3; @@ -6787,7 +6813,7 @@ public class Theme { try { Uri data = Uri.parse(wallpaperLink[0]); String bgColor = data.getQueryParameter("bg_color"); - if (!TextUtils.isEmpty(bgColor)) { + if (accent != null && !TextUtils.isEmpty(bgColor)) { accent.backgroundOverrideColor = backColor = Integer.parseInt(bgColor.substring(0, 6), 16) | 0xff000000; if (bgColor.length() >= 13 && AndroidUtilities.isValidWallChar(bgColor.charAt(6))) { accent.backgroundGradientOverrideColor1 = color1 = Integer.parseInt(bgColor.substring(7, 13), 16) | 0xff000000; @@ -6853,7 +6879,7 @@ public class Theme { options.inJustDecodeBounds = false; Bitmap wallpaper = BitmapFactory.decodeFile(wallpaperPath, options); if (wallpaper != null) { - if (color2 != 0) { + if (color2 != 0 && accent != null) { MotionBackgroundDrawable wallpaperDrawable = new MotionBackgroundDrawable(backColor, color1, color2, color3, true); wallpaperDrawable.setPatternBitmap((int) (accent.patternIntensity * 100), wallpaper); wallpaperDrawable.setBounds(0, 0, bitmap.getWidth(), bitmap.getHeight()); @@ -7212,8 +7238,22 @@ public class Theme { dialogs_unarchiveDrawable.commitApplyLayerColors(); } + public static void createCommonDialogResources(Context context) { + if (dialogs_countTextPaint == null) { + dialogs_countTextPaint = new TextPaint(TextPaint.ANTI_ALIAS_FLAG); + dialogs_countTextPaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); + + dialogs_countPaint = new Paint(Paint.ANTI_ALIAS_FLAG); + + dialogs_onlineCirclePaint = new Paint(Paint.ANTI_ALIAS_FLAG); + } + + dialogs_countTextPaint.setTextSize(AndroidUtilities.dp(13)); + } + public static void createDialogsResources(Context context) { createCommonResources(context); + createCommonDialogResources(context); if (dialogs_namePaint == null) { Resources resources = context.getResources(); @@ -7236,8 +7276,6 @@ public class Theme { dialogs_messageNamePaint = new TextPaint(TextPaint.ANTI_ALIAS_FLAG); dialogs_messageNamePaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); dialogs_timePaint = new TextPaint(TextPaint.ANTI_ALIAS_FLAG); - dialogs_countTextPaint = new TextPaint(TextPaint.ANTI_ALIAS_FLAG); - dialogs_countTextPaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); dialogs_archiveTextPaint = new TextPaint(TextPaint.ANTI_ALIAS_FLAG); dialogs_archiveTextPaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); dialogs_archiveTextPaintSmall = new TextPaint(TextPaint.ANTI_ALIAS_FLAG); @@ -7247,8 +7285,6 @@ public class Theme { dialogs_tabletSeletedPaint = new Paint(); dialogs_pinnedPaint = new Paint(Paint.ANTI_ALIAS_FLAG); - dialogs_onlineCirclePaint = new Paint(Paint.ANTI_ALIAS_FLAG); - dialogs_countPaint = new Paint(Paint.ANTI_ALIAS_FLAG); dialogs_countGrayPaint = new Paint(Paint.ANTI_ALIAS_FLAG); dialogs_errorPaint = new Paint(Paint.ANTI_ALIAS_FLAG); dialogs_actionMessagePaint = new Paint(Paint.ANTI_ALIAS_FLAG); @@ -7297,7 +7333,6 @@ public class Theme { dialogs_messageNamePaint.setTextSize(AndroidUtilities.dp(14)); dialogs_timePaint.setTextSize(AndroidUtilities.dp(13)); - dialogs_countTextPaint.setTextSize(AndroidUtilities.dp(13)); dialogs_archiveTextPaint.setTextSize(AndroidUtilities.dp(13)); dialogs_archiveTextPaintSmall.setTextSize(AndroidUtilities.dp(11)); dialogs_onlinePaint.setTextSize(AndroidUtilities.dp(15)); @@ -7373,7 +7408,7 @@ public class Theme { } } - public static void createChatResources(Context context, boolean fontsOnly) { + public static void createCommonChatResources(Context context) { synchronized (sync) { if (chat_msgTextPaint == null) { chat_msgTextPaint = new TextPaint(Paint.ANTI_ALIAS_FLAG); @@ -7384,9 +7419,16 @@ public class Theme { chat_msgBotButtonPaint = new TextPaint(Paint.ANTI_ALIAS_FLAG); chat_msgBotButtonPaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); } + + chat_msgTextPaintOneEmoji.setTextSize(AndroidUtilities.dp(28)); + chat_msgTextPaintTwoEmoji.setTextSize(AndroidUtilities.dp(24)); + chat_msgTextPaintThreeEmoji.setTextSize(AndroidUtilities.dp(20)); + chat_msgTextPaint.setTextSize(AndroidUtilities.dp(SharedConfig.fontSize)); + chat_msgGameTextPaint.setTextSize(AndroidUtilities.dp(14)); + chat_msgBotButtonPaint.setTextSize(AndroidUtilities.dp(15)); } - if (!fontsOnly && chat_msgInDrawable == null) { + if (chat_infoPaint == null) { chat_infoPaint = new TextPaint(Paint.ANTI_ALIAS_FLAG); chat_stickerCommentCountPaint = new TextPaint(Paint.ANTI_ALIAS_FLAG); chat_stickerCommentCountPaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); @@ -7459,6 +7501,13 @@ public class Theme { chat_contextResult_titleTextPaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); chat_contextResult_descriptionTextPaint = new TextPaint(Paint.ANTI_ALIAS_FLAG); chat_composeBackgroundPaint = new Paint(); + } + } + + public static void createChatResources(Context context, boolean fontsOnly) { + createCommonChatResources(context); + + if (!fontsOnly && chat_msgInDrawable == null) { Resources resources = context.getResources(); @@ -7700,13 +7749,6 @@ public class Theme { applyChatTheme(fontsOnly, false); } - chat_msgTextPaintOneEmoji.setTextSize(AndroidUtilities.dp(28)); - chat_msgTextPaintTwoEmoji.setTextSize(AndroidUtilities.dp(24)); - chat_msgTextPaintThreeEmoji.setTextSize(AndroidUtilities.dp(20)); - chat_msgTextPaint.setTextSize(AndroidUtilities.dp(SharedConfig.fontSize)); - chat_msgGameTextPaint.setTextSize(AndroidUtilities.dp(14)); - chat_msgBotButtonPaint.setTextSize(AndroidUtilities.dp(15)); - if (!fontsOnly && chat_botProgressPaint != null) { chat_botProgressPaint.setStrokeWidth(AndroidUtilities.dp(2)); chat_infoPaint.setTextSize(AndroidUtilities.dp(12)); @@ -8927,6 +8969,15 @@ public class Theme { return isPatternWallpaper || "CJz3BZ6YGEYBAAAABboWp6SAv04".equals(selectedBgSlug) || "qeZWES8rGVIEAAAARfWlK1lnfiI".equals(selectedBgSlug); } + public static BackgroundGradientDrawable getCurrentGradientWallpaper() { + if (currentTheme.overrideWallpaper != null && currentTheme.overrideWallpaper.color != 0 && currentTheme.overrideWallpaper.gradientColor1 != 0) { + final int[] colors = {currentTheme.overrideWallpaper.color, currentTheme.overrideWallpaper.gradientColor1}; + final GradientDrawable.Orientation orientation = BackgroundGradientDrawable.getGradientOrientation(currentTheme.overrideWallpaper.rotation); + return new BackgroundGradientDrawable(orientation, colors); + } + return null; + } + public static AudioVisualizerDrawable getCurrentAudiVisualizerDrawable() { if (chat_msgAudioVisualizeDrawable == null) { chat_msgAudioVisualizeDrawable = new AudioVisualizerDrawable(); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DialogsAdapter.java b/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DialogsAdapter.java index 5d1466a4c..744aa8e7e 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DialogsAdapter.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DialogsAdapter.java @@ -432,15 +432,25 @@ public class DialogsAdapter extends RecyclerListView.SelectionAdapter { private int movement; private float moveProgress; private long lastUpdateTime; - private int x; - private int y; + private int originalX; + private int originalY; + + @Override + protected void afterTextDraw() { + if (arrowDrawable != null) { + Rect bounds = arrowDrawable.getBounds(); + arrowDrawable.setBounds(originalX, originalY, originalX + bounds.width(), originalY + bounds.height()); + } + } @Override protected void onTextDraw() { if (arrowDrawable != null) { Rect bounds = arrowDrawable.getBounds(); int dx = (int) (moveProgress * AndroidUtilities.dp(3)); - arrowDrawable.setBounds(x + dx, y + AndroidUtilities.dp(1), x + dx + bounds.width(), y + AndroidUtilities.dp(1) + bounds.height()); + originalX = bounds.left; + originalY = bounds.top; + arrowDrawable.setBounds(originalX + dx, originalY + AndroidUtilities.dp(1), originalX + dx + bounds.width(), originalY + AndroidUtilities.dp(1) + bounds.height()); long newUpdateTime = SystemClock.elapsedRealtime(); long dt = newUpdateTime - lastUpdateTime; @@ -464,15 +474,6 @@ public class DialogsAdapter extends RecyclerListView.SelectionAdapter { getTextView().invalidate(); } } - - @Override - protected void onLayout(boolean changed, int left, int top, int right, int bottom) { - super.onLayout(changed, left, top, right, bottom); - if (arrowDrawable != null) { - x = arrowDrawable.getBounds().left; - y = arrowDrawable.getBounds().top; - } - } }; Drawable drawable = Theme.getThemedDrawable(mContext, R.drawable.greydivider, Theme.key_windowBackgroundGrayShadow); CombinedDrawable combinedDrawable = new CombinedDrawable(new ColorDrawable(Theme.getColor(Theme.key_windowBackgroundGray)), drawable); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DrawerLayoutAdapter.java b/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DrawerLayoutAdapter.java index 79146d6ab..27bef0e3c 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DrawerLayoutAdapter.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Adapters/DrawerLayoutAdapter.java @@ -57,8 +57,8 @@ public class DrawerLayoutAdapter extends RecyclerListView.SelectionAdapter imple public DrawerLayoutAdapter(Context context, SideMenultItemAnimator animator) { mContext = context; itemAnimator = animator; - accountsShown = MessagesController.getGlobalMainSettings().getBoolean("accountsShown", true); - Theme.createDialogsResources(context); + accountsShown = UserConfig.getActivatedAccountsCount() > 1 && MessagesController.getGlobalMainSettings().getBoolean("accountsShown", true); + Theme.createCommonDialogResources(context); resetItems(); try { hasGps = ApplicationLoader.applicationContext.getPackageManager().hasSystemFeature(PackageManager.FEATURE_LOCATION_GPS); @@ -344,7 +344,7 @@ public class DrawerLayoutAdapter extends RecyclerListView.SelectionAdapter imple } public void bind(DrawerActionCell actionCell) { - actionCell.setTextAndIcon(text, icon); + actionCell.setTextAndIcon(id, text, icon); } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/BubbleActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/BubbleActivity.java index c29bb2def..a9a019dc0 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/BubbleActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/BubbleActivity.java @@ -120,7 +120,7 @@ public class BubbleActivity extends Activity implements ActionBarLayout.ActionBa } else if (ArticleViewer.hasInstance() && ArticleViewer.getInstance().isVisible()) { ArticleViewer.getInstance().close(false, true); } - passcodeView.onShow(); + passcodeView.onShow(true, false); SharedConfig.isWaitingForPasscodeEnter = true; drawerLayoutContainer.setAllowOpenDrawer(false, false); passcodeView.setDelegate(() -> { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/CallLogActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/CallLogActivity.java index 76df3940c..cb56f2e50 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/CallLogActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/CallLogActivity.java @@ -352,6 +352,7 @@ public class CallLogActivity extends BaseFragment implements NotificationCenter. private ProfileSearchCell profileSearchCell; private ProgressButton button; + private TLRPC.Chat currentChat; public GroupCallCell(Context context) { super(context); @@ -389,6 +390,10 @@ public class CallLogActivity extends BaseFragment implements NotificationCenter. } }); } + + public void setChat(TLRPC.Chat chat) { + currentChat = chat; + } } @Override @@ -487,9 +492,9 @@ public class CallLogActivity extends BaseFragment implements NotificationCenter. presentFragment(new ChatActivity(args), true); } } else if (view instanceof GroupCallCell) { - Integer id = activeGroupCalls.get(position - listViewAdapter.activeStartRow); + GroupCallCell cell = (GroupCallCell) view; Bundle args = new Bundle(); - args.putInt("chat_id", id); + args.putInt("chat_id", cell.currentChat.id); getNotificationCenter().postNotificationName(NotificationCenter.closeChats); presentFragment(new ChatActivity(args), true); } @@ -1079,6 +1084,7 @@ public class CallLogActivity extends BaseFragment implements NotificationCenter. Integer chatId = activeGroupCalls.get(position); TLRPC.Chat chat = getMessagesController().getChat(chatId); GroupCallCell cell = (GroupCallCell) holder.itemView; + cell.setChat(chat); cell.button.setTag(chat.id); String text; if (ChatObject.isChannel(chat) && !chat.megagroup) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/AccountSelectCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/AccountSelectCell.java index 150920a5d..4e7977a5a 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/AccountSelectCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/AccountSelectCell.java @@ -92,7 +92,7 @@ public class AccountSelectCell extends FrameLayout { @Override protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { - if (checkImageView != null) { + if (checkImageView != null || infoTextView != null && getLayoutParams().width != LayoutHelper.WRAP_CONTENT) { super.onMeasure(MeasureSpec.makeMeasureSpec(MeasureSpec.getSize(widthMeasureSpec), MeasureSpec.EXACTLY), MeasureSpec.makeMeasureSpec(AndroidUtilities.dp(56), MeasureSpec.EXACTLY)); } else { super.onMeasure(MeasureSpec.makeMeasureSpec(MeasureSpec.getSize(widthMeasureSpec), MeasureSpec.AT_MOST), MeasureSpec.makeMeasureSpec(AndroidUtilities.dp(56), MeasureSpec.EXACTLY)); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatActionCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatActionCell.java index eff0f94ba..5ae2c031d 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatActionCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatActionCell.java @@ -128,7 +128,7 @@ public class ChatActionCell extends BaseCell implements DownloadController.FileD } public void setCustomDate(int date, boolean scheduled, boolean inLayout) { - if (customDate == date) { + if (customDate == date || customDate / 3600 == date / 3600) { return; } CharSequence newText; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatMessageCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatMessageCell.java index 1850b5a44..4d956a8b9 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatMessageCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/ChatMessageCell.java @@ -65,6 +65,8 @@ import android.view.accessibility.AccessibilityNodeProvider; import android.view.animation.Interpolator; import android.widget.Toast; +import androidx.core.graphics.ColorUtils; + import org.telegram.PhoneFormat.PhoneFormat; import org.telegram.messenger.AndroidUtilities; import org.telegram.messenger.ChatObject; @@ -145,10 +147,10 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate return radialProgress; } - boolean voiceTransitionInPorgress; + boolean enterTransitionInPorgress; - public void setVoiceTransitionInProgress(boolean b) { - voiceTransitionInPorgress = b; + public void setEnterTransitionInProgress(boolean b) { + enterTransitionInPorgress = b; invalidate(); } @@ -310,8 +312,8 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate private TLRPC.TL_pollAnswer answer; } - private boolean pinnedTop; - private boolean pinnedBottom; + public boolean pinnedTop; + public boolean pinnedBottom; private boolean drawPinnedTop; private boolean drawPinnedBottom; private MessageObject.GroupedMessages currentMessagesGroup; @@ -340,7 +342,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate private boolean fullyDraw; private int parentHeight; - private float parentViewTopOffset; + public float parentViewTopOffset; private boolean attachedToWindow; @@ -668,16 +670,16 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate private ImageReceiver locationImageReceiver; - private StaticLayout replyNameLayout; - private StaticLayout replyTextLayout; - private ImageReceiver replyImageReceiver; - private int replyStartX; - private int replyStartY; + public StaticLayout replyNameLayout; + public StaticLayout replyTextLayout; + public ImageReceiver replyImageReceiver; + public int replyStartX; + public int replyStartY; private int replyNameWidth; private int replyNameOffset; private int replyTextWidth; private int replyTextOffset; - private boolean needReplyImage; + public boolean needReplyImage; private boolean replyPressed; private TLRPC.PhotoSize currentReplyPhoto; @@ -756,6 +758,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate private final TransitionParams transitionParams = new TransitionParams(); private boolean edited; + private boolean imageDrawn; private Runnable diceFinishCallback = new Runnable() { @Override @@ -791,6 +794,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } }; private SparseArray accessibilityVirtualViewBounds = new SparseArray<>(); + private boolean isRoundVideo; private int currentFocusedVirtualView = -1; public boolean drawFromPinchToZoom; @@ -2228,7 +2232,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate PinchToZoomHelper pinchToZoomHelper = delegate == null ? null : delegate.getPinchToZoomHelper(); if (currentMessageObject == null || !photoImage.hasNotThumb() || pinchToZoomHelper == null || currentMessageObject.isSticker() || currentMessageObject.isAnimatedEmoji() || (currentMessageObject.isVideo() && !autoPlayingMedia) || - currentMessageObject.isRoundVideo() || currentMessageObject.isAnimatedSticker() || (currentMessageObject.isDocument() && !currentMessageObject.isGif()) || currentMessageObject.needDrawBluredPreview()) { + isRoundVideo || currentMessageObject.isAnimatedSticker() || (currentMessageObject.isDocument() && !currentMessageObject.isGif()) || currentMessageObject.needDrawBluredPreview()) { return false; } return pinchToZoomHelper.checkPinchToZoom(ev, this, photoImage, currentMessageObject); @@ -2361,7 +2365,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate infoLayout = new StaticLayout(str, Theme.chat_infoPaint, infoWidth, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, false); lastTime = duration; } - } else if (currentMessageObject.isRoundVideo()) { + } else if (isRoundVideo) { int duration = 0; TLRPC.Document document = currentMessageObject.getDocument(); for (int a = 0; a < document.attributes.size(); a++) { @@ -2441,7 +2445,6 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate if (parent != parentHeight || parentOffset != this.parentViewTopOffset) { this.parentViewTopOffset = parentOffset; parentHeight = parent; - invalidate(); } if (currentMessageObject != null && (Theme.hasGradientService() && currentMessageObject.shouldDrawWithoutBackground() || drawSideButton != 0 || !botButtons.isEmpty()) || currentBackgroundDrawable != null && currentBackgroundDrawable.getGradientShader() != null) { invalidate(); @@ -2872,7 +2875,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } else { updateButtonState(false, false, false); } - if (currentMessageObject != null && (currentMessageObject.isRoundVideo() || currentMessageObject.isVideo())) { + if (currentMessageObject != null && (isRoundVideo || currentMessageObject.isVideo())) { checkVideoPlayback(true); } if (documentAttachType == DOCUMENT_ATTACH_TYPE_VIDEO && autoPlayingMedia) { @@ -2902,6 +2905,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate currentMessageObject = null; } lastHeight = AndroidUtilities.displaySize.y; + isRoundVideo = messageObject != null && messageObject.isRoundVideo(); boolean messageIdChanged = currentMessageObject == null || currentMessageObject.getId() != messageObject.getId(); boolean messageChanged = currentMessageObject != messageObject || messageObject.forceUpdate; boolean dataChanged = currentMessageObject != null && currentMessageObject.getId() == messageObject.getId() && lastSendState == MessageObject.MESSAGE_SEND_STATE_EDITING && messageObject.isSent() @@ -5996,7 +6000,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } accessibilityVirtualViewBounds.clear(); } - + private boolean needHide; public void checkVideoPlayback(boolean allowStart) { @@ -6783,13 +6787,15 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate return 0; } + int lastSize; @SuppressLint("DrawAllocation") @Override protected void onLayout(boolean changed, int left, int top, int right, int bottom) { if (currentMessageObject == null || needHide) { return; } - if (changed || !wasLayout) { + int currentSize = getMeasuredHeight() + (getMeasuredWidth() << 16); + if (lastSize != currentSize || !wasLayout) { layoutWidth = getMeasuredWidth(); layoutHeight = getMeasuredHeight() - substractBackgroundHeight; if (timeTextWidth < 0) { @@ -6854,11 +6860,12 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate wasLayout = true; } + lastSize = currentSize; if (currentMessageObject.type == 0) { textY = AndroidUtilities.dp(10) + namesOffset; } - if (currentMessageObject.isRoundVideo()) { + if (isRoundVideo) { updatePlayingMessageProgress(); } if (documentAttachType == DOCUMENT_ATTACH_TYPE_AUDIO) { @@ -7129,7 +7136,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate radialProgress.setProgressColor(Theme.getColor(Theme.key_chat_mediaProgress)); videoRadialProgress.setProgressColor(Theme.getColor(Theme.key_chat_mediaProgress)); - boolean imageDrawn = false; + imageDrawn = false; radialProgress.setCircleCrossfadeColor(null, 0.0f, 1.0f); if (currentMessageObject.type == 0) { if (currentMessageObject.isOutOwner()) { @@ -7161,296 +7168,37 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate textX += diff - getExtraTimeX(); } } - if (transitionParams.animateChangeProgress != 1.0f && transitionParams.animateMessageText) { - drawMessageText(canvas, transitionParams.animateOutTextBlocks, false, (1.0f - transitionParams.animateChangeProgress)); - drawMessageText(canvas, currentMessageObject.textLayoutBlocks, true, transitionParams.animateChangeProgress); - } else { - drawMessageText(canvas, currentMessageObject.textLayoutBlocks, true, 1.0f); + if (!enterTransitionInPorgress) { + if (transitionParams.animateChangeProgress != 1.0f && transitionParams.animateMessageText) { + canvas.save(); + if (currentBackgroundDrawable != null) { + Rect r = currentBackgroundDrawable.getBounds(); + if (currentMessageObject.isOutOwner() && !mediaBackground && !pinnedBottom) { + canvas.clipRect( + r.left + AndroidUtilities.dp(4), r.top + AndroidUtilities.dp(4), + r.right - AndroidUtilities.dp(10), r.bottom - AndroidUtilities.dp(4) + ); + } else { + canvas.clipRect( + r.left + AndroidUtilities.dp(4), r.top + AndroidUtilities.dp(4), + r.right - AndroidUtilities.dp(4), r.bottom - AndroidUtilities.dp(4) + ); + } + } + drawMessageText(canvas, transitionParams.animateOutTextBlocks, false, (1.0f - transitionParams.animateChangeProgress), false); + drawMessageText(canvas, currentMessageObject.textLayoutBlocks, true, transitionParams.animateChangeProgress, false); + canvas.restore(); + } else { + drawMessageText(canvas, currentMessageObject.textLayoutBlocks, true, 1.0f, false); + } } - - if (hasLinkPreview || hasGamePreview || hasInvoicePreview) { - int startY; - int linkX; - if (hasGamePreview) { - startY = AndroidUtilities.dp(14) + namesOffset; - linkX = unmovedTextX - AndroidUtilities.dp(10); - } else if (hasInvoicePreview) { - startY = AndroidUtilities.dp(14) + namesOffset; - linkX = unmovedTextX + AndroidUtilities.dp(1); - } else { - startY = textY + currentMessageObject.textHeight + AndroidUtilities.dp(8); - linkX = unmovedTextX + AndroidUtilities.dp(1); - } - int linkPreviewY = startY; - int smallImageStartY = 0; - - if (!hasInvoicePreview) { - Theme.chat_replyLinePaint.setColor(Theme.getColor(currentMessageObject.isOutOwner() ? Theme.key_chat_outPreviewLine : Theme.key_chat_inPreviewLine)); - canvas.drawRect(linkX, linkPreviewY - AndroidUtilities.dp(3), linkX + AndroidUtilities.dp(2), linkPreviewY + linkPreviewHeight + AndroidUtilities.dp(3), Theme.chat_replyLinePaint); - } - - if (siteNameLayout != null) { - smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); - Theme.chat_replyNamePaint.setColor(Theme.getColor(currentMessageObject.isOutOwner() ? Theme.key_chat_outSiteNameText : Theme.key_chat_inSiteNameText)); - canvas.save(); - int x; - if (siteNameRtl) { - x = backgroundWidth - siteNameWidth - AndroidUtilities.dp(32); - if (isSmallImage) { - x -= AndroidUtilities.dp(48 + 6); - } - } else { - x = (hasInvoicePreview ? 0 : AndroidUtilities.dp(10)); - } - canvas.translate(linkX + x, linkPreviewY - AndroidUtilities.dp(3)); - siteNameLayout.draw(canvas); - canvas.restore(); - linkPreviewY += siteNameLayout.getLineBottom(siteNameLayout.getLineCount() - 1); - } - if ((hasGamePreview || hasInvoicePreview) && currentMessageObject.textHeight != 0) { - startY += currentMessageObject.textHeight + AndroidUtilities.dp(4); - linkPreviewY += currentMessageObject.textHeight + AndroidUtilities.dp(4); - } - - if (drawPhotoImage && drawInstantView && drawInstantViewType != 9 || drawInstantViewType == 6 && imageBackgroundColor != 0) { - if (linkPreviewY != startY) { - linkPreviewY += AndroidUtilities.dp(2); - } - if (imageBackgroundSideColor != 0) { - int x = linkX + AndroidUtilities.dp(10); - photoImage.setImageCoords(x + (imageBackgroundSideWidth - photoImage.getImageWidth()) / 2, linkPreviewY, photoImage.getImageWidth(), photoImage.getImageHeight()); - rect.set(x, photoImage.getImageY(), x + imageBackgroundSideWidth, photoImage.getImageY2()); - Theme.chat_instantViewPaint.setColor(imageBackgroundSideColor); - canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_instantViewPaint); - } else { - photoImage.setImageCoords(linkX + AndroidUtilities.dp(10), linkPreviewY, photoImage.getImageWidth(), photoImage.getImageHeight()); - } - if (imageBackgroundColor != 0) { - rect.set(photoImage.getImageX(), photoImage.getImageY(), photoImage.getImageX2(), photoImage.getImageY2()); - if (imageBackgroundGradientColor1 != 0) { - if (imageBackgroundGradientColor2 != 0) { - if (motionBackgroundDrawable == null) { - motionBackgroundDrawable = new MotionBackgroundDrawable(imageBackgroundColor, imageBackgroundGradientColor1, imageBackgroundGradientColor2, imageBackgroundGradientColor3, true); - if (imageBackgroundIntensity < 0) { - photoImage.setGradientBitmap(motionBackgroundDrawable.getBitmap()); - } - if (!photoImage.hasImageSet()) { - motionBackgroundDrawable.setRoundRadius(AndroidUtilities.dp(4)); - } - } - } else { - if (gradientShader == null) { - Rect r = BackgroundGradientDrawable.getGradientPoints(AndroidUtilities.getWallpaperRotation(imageBackgroundGradientRotation, false), (int) rect.width(), (int) rect.height()); - gradientShader = new LinearGradient(r.left, r.top, r.right, r.bottom, new int[]{imageBackgroundColor, imageBackgroundGradientColor1}, null, Shader.TileMode.CLAMP); - } - Theme.chat_instantViewPaint.setShader(gradientShader); - } - } else { - Theme.chat_instantViewPaint.setShader(null); - Theme.chat_instantViewPaint.setColor(imageBackgroundColor); - } - if (motionBackgroundDrawable != null) { - motionBackgroundDrawable.setBounds((int) rect.left, (int) rect.top, (int) rect.right, (int) rect.bottom); - motionBackgroundDrawable.draw(canvas); - } else if (imageBackgroundSideColor != 0) { - canvas.drawRect(photoImage.getImageX(), photoImage.getImageY(), photoImage.getImageX2(), photoImage.getImageY2(), Theme.chat_instantViewPaint); - } else { - canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_instantViewPaint); - } - Theme.chat_instantViewPaint.setShader(null); - } - if (drawPhotoImage && drawInstantView && drawInstantViewType != 9) { - if (drawImageButton) { - int size = AndroidUtilities.dp(48); - buttonX = this.buttonX = (int) (photoImage.getImageX() + (photoImage.getImageWidth() - size) / 2.0f); - buttonY = this.buttonY = (int) (photoImage.getImageY() + (photoImage.getImageHeight() - size) / 2.0f); - radialProgress.setProgressRect((int) buttonX, (int) buttonY, (int) buttonX + size, (int) buttonY + size); - } - if (delegate == null || delegate.getPinchToZoomHelper() == null || !delegate.getPinchToZoomHelper().isInOverlayModeFor(this)) { - imageDrawn = photoImage.draw(canvas); - } - } - linkPreviewY += photoImage.getImageHeight() + AndroidUtilities.dp(6); - } - - if (currentMessageObject.isOutOwner()) { - Theme.chat_replyNamePaint.setColor(Theme.getColor(Theme.key_chat_messageTextOut)); - Theme.chat_replyTextPaint.setColor(Theme.getColor(Theme.key_chat_messageTextOut)); - } else { - Theme.chat_replyNamePaint.setColor(Theme.getColor(Theme.key_chat_messageTextIn)); - Theme.chat_replyTextPaint.setColor(Theme.getColor(Theme.key_chat_messageTextIn)); - } - if (titleLayout != null) { - if (linkPreviewY != startY) { - linkPreviewY += AndroidUtilities.dp(2); - } - if (smallImageStartY == 0) { - smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); - } - canvas.save(); - canvas.translate(linkX + AndroidUtilities.dp(10) + titleX, linkPreviewY - AndroidUtilities.dp(3)); - titleLayout.draw(canvas); - canvas.restore(); - linkPreviewY += titleLayout.getLineBottom(titleLayout.getLineCount() - 1); - } - - if (authorLayout != null) { - if (linkPreviewY != startY) { - linkPreviewY += AndroidUtilities.dp(2); - } - if (smallImageStartY == 0) { - smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); - } - canvas.save(); - canvas.translate(linkX + AndroidUtilities.dp(10) + authorX, linkPreviewY - AndroidUtilities.dp(3)); - authorLayout.draw(canvas); - canvas.restore(); - linkPreviewY += authorLayout.getLineBottom(authorLayout.getLineCount() - 1); - } - - if (descriptionLayout != null) { - if (linkPreviewY != startY) { - linkPreviewY += AndroidUtilities.dp(2); - } - if (smallImageStartY == 0) { - smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); - } - descriptionY = linkPreviewY - AndroidUtilities.dp(3); - canvas.save(); - canvas.translate(linkX + (hasInvoicePreview ? 0 : AndroidUtilities.dp(10)) + descriptionX, descriptionY); - if (pressedLink != null && linkBlockNum == -10) { - for (int b = 0; b < urlPath.size(); b++) { - canvas.drawPath(urlPath.get(b), Theme.chat_urlPaint); - } - } - if (delegate.getTextSelectionHelper() != null && getDelegate().getTextSelectionHelper().isSelected(currentMessageObject)) { - delegate.getTextSelectionHelper().drawDescription(currentMessageObject.isOutOwner(), descriptionLayout, canvas); - } - descriptionLayout.draw(canvas); - canvas.restore(); - linkPreviewY += descriptionLayout.getLineBottom(descriptionLayout.getLineCount() - 1); - } - - if (drawPhotoImage && (!drawInstantView || drawInstantViewType == 9)) { - if (linkPreviewY != startY) { - linkPreviewY += AndroidUtilities.dp(2); - } - - if (isSmallImage) { - photoImage.setImageCoords(linkX + backgroundWidth - AndroidUtilities.dp(81), smallImageStartY, photoImage.getImageWidth(), photoImage.getImageHeight()); - } else { - photoImage.setImageCoords(linkX + (hasInvoicePreview ? -AndroidUtilities.dp(6.3f) : AndroidUtilities.dp(10)), linkPreviewY, photoImage.getImageWidth(), photoImage.getImageHeight()); - if (drawImageButton) { - int size = AndroidUtilities.dp(48); - buttonX = this.buttonX = (int) (photoImage.getImageX() + (photoImage.getImageWidth() - size) / 2.0f); - buttonY = this.buttonY = (int) (photoImage.getImageY() + (photoImage.getImageHeight() - size) / 2.0f); - radialProgress.setProgressRect((int) buttonX, (int) buttonY, (int) buttonX + size, (int) buttonY + size); - } - } - if (currentMessageObject.isRoundVideo() && MediaController.getInstance().isPlayingMessage(currentMessageObject) && MediaController.getInstance().isVideoDrawingReady() && canvas.isHardwareAccelerated()) { - imageDrawn = true; - drawTime = true; - } else { - if (delegate == null || delegate.getPinchToZoomHelper() == null || !delegate.getPinchToZoomHelper().isInOverlayModeFor(this)) { - imageDrawn = photoImage.draw(canvas); - } - } - } - if (documentAttachType == DOCUMENT_ATTACH_TYPE_VIDEO || documentAttachType == DOCUMENT_ATTACH_TYPE_GIF) { - videoButtonX = (int) (photoImage.getImageX() + AndroidUtilities.dp(8)); - videoButtonY = (int) (photoImage.getImageY() + AndroidUtilities.dp(8)); - videoRadialProgress.setProgressRect(videoButtonX, videoButtonY, videoButtonX + AndroidUtilities.dp(24), videoButtonY + AndroidUtilities.dp(24)); - } - if (photosCountLayout != null && photoImage.getVisible()) { - int x = (int) (photoImage.getImageX() + photoImage.getImageWidth() - AndroidUtilities.dp(8) - photosCountWidth); - int y = (int) (photoImage.getImageY() + photoImage.getImageHeight() - AndroidUtilities.dp(19)); - rect.set(x - AndroidUtilities.dp(4), y - AndroidUtilities.dp(1.5f), x + photosCountWidth + AndroidUtilities.dp(4), y + AndroidUtilities.dp(14.5f)); - int oldAlpha = Theme.chat_timeBackgroundPaint.getAlpha(); - Theme.chat_timeBackgroundPaint.setAlpha((int) (oldAlpha * controlsAlpha)); - Theme.chat_durationPaint.setAlpha((int) (255 * controlsAlpha)); - canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_timeBackgroundPaint); - Theme.chat_timeBackgroundPaint.setAlpha(oldAlpha); - canvas.save(); - canvas.translate(x, y); - photosCountLayout.draw(canvas); - canvas.restore(); - Theme.chat_durationPaint.setAlpha(255); - } - if (videoInfoLayout != null && (!drawPhotoImage || photoImage.getVisible()) && imageBackgroundSideColor == 0) { - int x; - int y; - if (hasGamePreview || hasInvoicePreview || documentAttachType == DOCUMENT_ATTACH_TYPE_WALLPAPER) { - if (drawPhotoImage) { - x = (int) (photoImage.getImageX() + AndroidUtilities.dp(8.5f)); - y = (int) (photoImage.getImageY() + AndroidUtilities.dp(6)); - int height = AndroidUtilities.dp(documentAttachType == DOCUMENT_ATTACH_TYPE_WALLPAPER ? 14.5f : 16.5f); - rect.set(x - AndroidUtilities.dp(4), y - AndroidUtilities.dp(1.5f), x + durationWidth + AndroidUtilities.dp(4), y + height); - canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_timeBackgroundPaint); - } else { - x = linkX; - y = linkPreviewY; - } - } else { - x = (int) (photoImage.getImageX() + photoImage.getImageWidth() - AndroidUtilities.dp(8) - durationWidth); - y = (int) (photoImage.getImageY() + photoImage.getImageHeight() - AndroidUtilities.dp(19)); - rect.set(x - AndroidUtilities.dp(4), y - AndroidUtilities.dp(1.5f), x + durationWidth + AndroidUtilities.dp(4), y + AndroidUtilities.dp(14.5f)); - canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_timeBackgroundPaint); - } - - canvas.save(); - canvas.translate(x, y); - if (hasInvoicePreview) { - if (drawPhotoImage) { - Theme.chat_shipmentPaint.setColor(Theme.getColor(Theme.key_chat_previewGameText)); - } else { - if (currentMessageObject.isOutOwner()) { - Theme.chat_shipmentPaint.setColor(Theme.getColor(Theme.key_chat_messageTextOut)); - } else { - Theme.chat_shipmentPaint.setColor(Theme.getColor(Theme.key_chat_messageTextIn)); - } - } - } - videoInfoLayout.draw(canvas); - canvas.restore(); - } - - if (drawInstantView) { - Drawable instantDrawable; - int instantY = startY + linkPreviewHeight + AndroidUtilities.dp(10); - Paint backPaint = Theme.chat_instantViewRectPaint; - if (currentMessageObject.isOutOwner()) { - instantDrawable = Theme.chat_msgOutInstantDrawable; - Theme.chat_instantViewPaint.setColor(Theme.getColor(Theme.key_chat_outPreviewInstantText)); - backPaint.setColor(Theme.getColor(Theme.key_chat_outPreviewInstantText)); - } else { - instantDrawable = Theme.chat_msgInInstantDrawable; - Theme.chat_instantViewPaint.setColor(Theme.getColor(Theme.key_chat_inPreviewInstantText)); - backPaint.setColor(Theme.getColor(Theme.key_chat_inPreviewInstantText)); - } - - instantButtonRect.set(linkX, instantY, linkX + instantWidth, instantY + AndroidUtilities.dp(36)); - if (Build.VERSION.SDK_INT >= 21) { - selectorDrawableMaskType[0] = 0; - selectorDrawable[0].setBounds(linkX, instantY, linkX + instantWidth, instantY + AndroidUtilities.dp(36)); - selectorDrawable[0].draw(canvas); - } - canvas.drawRoundRect(instantButtonRect, AndroidUtilities.dp(6), AndroidUtilities.dp(6), backPaint); - if (drawInstantViewType == 0) { - setDrawableBounds(instantDrawable, instantTextLeftX + instantTextX + linkX - AndroidUtilities.dp(15), instantY + AndroidUtilities.dp(11.5f), AndroidUtilities.dp(9), AndroidUtilities.dp(13)); - instantDrawable.draw(canvas); - } - if (instantViewLayout != null) { - canvas.save(); - canvas.translate(linkX + instantTextX, instantY + AndroidUtilities.dp(10.5f)); - instantViewLayout.draw(canvas); - canvas.restore(); - } - } + if (!(enterTransitionInPorgress && !currentMessageObject.isVoice())) { + drawLinkPreview(canvas, 1f); } drawTime = true; } else if (drawPhotoImage) { - if (currentMessageObject.isRoundVideo() && MediaController.getInstance().isPlayingMessage(currentMessageObject) && MediaController.getInstance().isVideoDrawingReady() && canvas.isHardwareAccelerated()) { + if (isRoundVideo && MediaController.getInstance().isPlayingMessage(currentMessageObject) && MediaController.getInstance().isVideoDrawingReady() && canvas.isHardwareAccelerated()) { imageDrawn = true; drawTime = true; } else { @@ -7721,7 +7469,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate audioVisualizerDrawable.draw(canvas, buttonX + AndroidUtilities.dp(22), buttonY + AndroidUtilities.dp(22), currentMessageObject.isOutOwner()); } - if (!voiceTransitionInPorgress) { + if (!enterTransitionInPorgress) { radialProgress.setBackgroundDrawable(isDrawSelectionBackground() ? currentBackgroundSelectedDrawable : currentBackgroundDrawable); radialProgress.draw(canvas); } @@ -7757,7 +7505,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate if (captionLayout != null) { updateCaptionLayout(); } - if ((currentPosition == null || currentMessagesGroup != null && currentMessagesGroup.isDocuments) && !transitionParams.transformGroupToSingleMessage) { + if ((currentPosition == null || currentMessagesGroup != null && currentMessagesGroup.isDocuments) && !transitionParams.transformGroupToSingleMessage && !(enterTransitionInPorgress && currentMessageObject.isVoice())) { drawCaptionLayout(canvas, false, 1f); } @@ -7933,6 +7681,315 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate transitionParams.recordDrawingState(); } + public void drawLinkPreview(Canvas canvas, float alpha) { + if (!hasLinkPreview && !hasGamePreview && !hasInvoicePreview) { + return; + } + int startY; + int linkX; + if (hasGamePreview) { + startY = AndroidUtilities.dp(14) + namesOffset; + linkX = unmovedTextX - AndroidUtilities.dp(10); + } else if (hasInvoicePreview) { + startY = AndroidUtilities.dp(14) + namesOffset; + linkX = unmovedTextX + AndroidUtilities.dp(1); + } else { + startY = textY + currentMessageObject.textHeight + AndroidUtilities.dp(8); + linkX = unmovedTextX + AndroidUtilities.dp(1); + } + int linkPreviewY = startY; + int smallImageStartY = 0; + + if (!hasInvoicePreview) { + Theme.chat_replyLinePaint.setColor(Theme.getColor(currentMessageObject.isOutOwner() ? Theme.key_chat_outPreviewLine : Theme.key_chat_inPreviewLine)); + if (alpha != 1f) { + Theme.chat_replyLinePaint.setAlpha((int) (alpha * Theme.chat_replyLinePaint.getAlpha())); + } + canvas.drawRect(linkX, linkPreviewY - AndroidUtilities.dp(3), linkX + AndroidUtilities.dp(2), linkPreviewY + linkPreviewHeight + AndroidUtilities.dp(3), Theme.chat_replyLinePaint); + } + + if (siteNameLayout != null) { + smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); + Theme.chat_replyNamePaint.setColor(Theme.getColor(currentMessageObject.isOutOwner() ? Theme.key_chat_outSiteNameText : Theme.key_chat_inSiteNameText)); + if (alpha != 1f) { + Theme.chat_replyNamePaint.setAlpha((int) (alpha * Theme.chat_replyLinePaint.getAlpha())); + } + canvas.save(); + int x; + if (siteNameRtl) { + x = backgroundWidth - siteNameWidth - AndroidUtilities.dp(32); + if (isSmallImage) { + x -= AndroidUtilities.dp(48 + 6); + } + } else { + x = (hasInvoicePreview ? 0 : AndroidUtilities.dp(10)); + } + canvas.translate(linkX + x, linkPreviewY - AndroidUtilities.dp(3)); + siteNameLayout.draw(canvas); + canvas.restore(); + linkPreviewY += siteNameLayout.getLineBottom(siteNameLayout.getLineCount() - 1); + } + if ((hasGamePreview || hasInvoicePreview) && currentMessageObject.textHeight != 0) { + startY += currentMessageObject.textHeight + AndroidUtilities.dp(4); + linkPreviewY += currentMessageObject.textHeight + AndroidUtilities.dp(4); + } + + if (drawPhotoImage && drawInstantView && drawInstantViewType != 9 || drawInstantViewType == 6 && imageBackgroundColor != 0) { + if (linkPreviewY != startY) { + linkPreviewY += AndroidUtilities.dp(2); + } + if (imageBackgroundSideColor != 0) { + int x = linkX + AndroidUtilities.dp(10); + photoImage.setImageCoords(x + (imageBackgroundSideWidth - photoImage.getImageWidth()) / 2, linkPreviewY, photoImage.getImageWidth(), photoImage.getImageHeight()); + rect.set(x, photoImage.getImageY(), x + imageBackgroundSideWidth, photoImage.getImageY2()); + Theme.chat_instantViewPaint.setColor(ColorUtils.setAlphaComponent(imageBackgroundSideColor, (int) (255 * alpha))); + canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_instantViewPaint); + } else { + photoImage.setImageCoords(linkX + AndroidUtilities.dp(10), linkPreviewY, photoImage.getImageWidth(), photoImage.getImageHeight()); + } + if (imageBackgroundColor != 0) { + rect.set(photoImage.getImageX(), photoImage.getImageY(), photoImage.getImageX2(), photoImage.getImageY2()); + if (imageBackgroundGradientColor1 != 0) { + if (imageBackgroundGradientColor2 != 0) { + if (motionBackgroundDrawable == null) { + motionBackgroundDrawable = new MotionBackgroundDrawable(imageBackgroundColor, imageBackgroundGradientColor1, imageBackgroundGradientColor2, imageBackgroundGradientColor3, true); + if (imageBackgroundIntensity < 0) { + photoImage.setGradientBitmap(motionBackgroundDrawable.getBitmap()); + } + if (!photoImage.hasImageSet()) { + motionBackgroundDrawable.setRoundRadius(AndroidUtilities.dp(4)); + } + } + } else { + if (gradientShader == null) { + Rect r = BackgroundGradientDrawable.getGradientPoints(AndroidUtilities.getWallpaperRotation(imageBackgroundGradientRotation, false), (int) rect.width(), (int) rect.height()); + gradientShader = new LinearGradient(r.left, r.top, r.right, r.bottom, new int[]{imageBackgroundColor, imageBackgroundGradientColor1}, null, Shader.TileMode.CLAMP); + } + Theme.chat_instantViewPaint.setShader(gradientShader); + if (alpha != 1f) { + Theme.chat_instantViewPaint.setAlpha((int) (255 * alpha)); + } + } + } else { + Theme.chat_instantViewPaint.setShader(null); + Theme.chat_instantViewPaint.setColor(imageBackgroundColor); + if (alpha != 1f) { + Theme.chat_instantViewPaint.setAlpha((int) (255 * alpha)); + } + } + if (motionBackgroundDrawable != null) { + motionBackgroundDrawable.setBounds((int) rect.left, (int) rect.top, (int) rect.right, (int) rect.bottom); + motionBackgroundDrawable.draw(canvas); + } else if (imageBackgroundSideColor != 0) { + canvas.drawRect(photoImage.getImageX(), photoImage.getImageY(), photoImage.getImageX2(), photoImage.getImageY2(), Theme.chat_instantViewPaint); + } else { + canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_instantViewPaint); + } + Theme.chat_instantViewPaint.setShader(null); + Theme.chat_instantViewPaint.setAlpha(255); + } + if (drawPhotoImage && drawInstantView && drawInstantViewType != 9) { + if (drawImageButton) { + int size = AndroidUtilities.dp(48); + buttonX = this.buttonX = (int) (photoImage.getImageX() + (photoImage.getImageWidth() - size) / 2.0f); + buttonY = this.buttonY = (int) (photoImage.getImageY() + (photoImage.getImageHeight() - size) / 2.0f); + radialProgress.setProgressRect((int) buttonX, (int ) buttonY, (int) buttonX + size, (int) buttonY + size); + } + if (delegate == null || delegate.getPinchToZoomHelper() == null || !delegate.getPinchToZoomHelper().isInOverlayModeFor(this)) { + if (alpha != 1f) { + photoImage.setAlpha(alpha); + imageDrawn = photoImage.draw(canvas); + photoImage.setAlpha(255); + } else { + imageDrawn = photoImage.draw(canvas); + } + + } + } + linkPreviewY += photoImage.getImageHeight() + AndroidUtilities.dp(6); + } + + if (currentMessageObject.isOutOwner()) { + Theme.chat_replyNamePaint.setColor(ColorUtils.setAlphaComponent(Theme.getColor(Theme.key_chat_messageTextOut), (int) (255 * alpha))); + Theme.chat_replyTextPaint.setColor(ColorUtils.setAlphaComponent(Theme.getColor(Theme.key_chat_messageTextOut), (int) (255 * alpha))); + } else { + Theme.chat_replyNamePaint.setColor(ColorUtils.setAlphaComponent(Theme.getColor(Theme.key_chat_messageTextIn), (int) (255 * alpha))); + Theme.chat_replyTextPaint.setColor(ColorUtils.setAlphaComponent(Theme.getColor(Theme.key_chat_messageTextIn), (int) (255 * alpha))); + } + if (titleLayout != null) { + if (linkPreviewY != startY) { + linkPreviewY += AndroidUtilities.dp(2); + } + if (smallImageStartY == 0) { + smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); + } + canvas.save(); + canvas.translate(linkX + AndroidUtilities.dp(10) + titleX, linkPreviewY - AndroidUtilities.dp(3)); + titleLayout.draw(canvas); + canvas.restore(); + linkPreviewY += titleLayout.getLineBottom(titleLayout.getLineCount() - 1); + } + + if (authorLayout != null) { + if (linkPreviewY != startY) { + linkPreviewY += AndroidUtilities.dp(2); + } + if (smallImageStartY == 0) { + smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); + } + canvas.save(); + canvas.translate(linkX + AndroidUtilities.dp(10) + authorX, linkPreviewY - AndroidUtilities.dp(3)); + authorLayout.draw(canvas); + canvas.restore(); + linkPreviewY += authorLayout.getLineBottom(authorLayout.getLineCount() - 1); + } + + if (descriptionLayout != null) { + if (linkPreviewY != startY) { + linkPreviewY += AndroidUtilities.dp(2); + } + if (smallImageStartY == 0) { + smallImageStartY = linkPreviewY - AndroidUtilities.dp(1); + } + descriptionY = linkPreviewY - AndroidUtilities.dp(3); + canvas.save(); + canvas.translate(linkX + (hasInvoicePreview ? 0 : AndroidUtilities.dp(10)) + descriptionX, descriptionY); + if (pressedLink != null && linkBlockNum == -10) { + for (int b = 0; b < urlPath.size(); b++) { + canvas.drawPath(urlPath.get(b), Theme.chat_urlPaint); + } + } + if (delegate.getTextSelectionHelper() != null && getDelegate().getTextSelectionHelper().isSelected(currentMessageObject)) { + delegate.getTextSelectionHelper().drawDescription(currentMessageObject.isOutOwner(), descriptionLayout, canvas); + } + descriptionLayout.draw(canvas); + canvas.restore(); + linkPreviewY += descriptionLayout.getLineBottom(descriptionLayout.getLineCount() - 1); + } + + if (drawPhotoImage && (!drawInstantView || drawInstantViewType == 9)) { + if (linkPreviewY != startY) { + linkPreviewY += AndroidUtilities.dp(2); + } + + if (isSmallImage) { + photoImage.setImageCoords(linkX + backgroundWidth - AndroidUtilities.dp(81), smallImageStartY, photoImage.getImageWidth(), photoImage.getImageHeight()); + } else { + photoImage.setImageCoords(linkX + (hasInvoicePreview ? -AndroidUtilities.dp(6.3f) : AndroidUtilities.dp(10)), linkPreviewY, photoImage.getImageWidth(), photoImage.getImageHeight()); + if (drawImageButton) { + int size = AndroidUtilities.dp(48); + buttonX = this.buttonX = (int) (photoImage.getImageX() + (photoImage.getImageWidth() - size) / 2.0f); + buttonY = this.buttonY = (int) (photoImage.getImageY() + (photoImage.getImageHeight() - size) / 2.0f); + radialProgress.setProgressRect((int) buttonX, (int) buttonY, (int) buttonX + size, (int) buttonY + size); + } + } + if (isRoundVideo && MediaController.getInstance().isPlayingMessage(currentMessageObject) && MediaController.getInstance().isVideoDrawingReady() && canvas.isHardwareAccelerated()) { + imageDrawn = true; + drawTime = true; + } else { + if (delegate == null || delegate.getPinchToZoomHelper() == null || !delegate.getPinchToZoomHelper().isInOverlayModeFor(this)) { + if (alpha != 1f) { + photoImage.setAlpha(alpha); + imageDrawn = photoImage.draw(canvas); + photoImage.setAlpha(255); + } else { + imageDrawn = photoImage.draw(canvas); + } + } + } + } + if (documentAttachType == DOCUMENT_ATTACH_TYPE_VIDEO || documentAttachType == DOCUMENT_ATTACH_TYPE_GIF) { + videoButtonX = (int) (photoImage.getImageX() + AndroidUtilities.dp(8)); + videoButtonY = (int) (photoImage.getImageY() + AndroidUtilities.dp(8)); + videoRadialProgress.setProgressRect(videoButtonX, videoButtonY, videoButtonX + AndroidUtilities.dp(24), videoButtonY + AndroidUtilities.dp(24)); + } + if (photosCountLayout != null && photoImage.getVisible()) { + int x = (int) (photoImage.getImageX() + photoImage.getImageWidth() - AndroidUtilities.dp(8) - photosCountWidth); + int y = (int) (photoImage.getImageY() + photoImage.getImageHeight() - AndroidUtilities.dp(19)); + rect.set(x - AndroidUtilities.dp(4), y - AndroidUtilities.dp(1.5f), x + photosCountWidth + AndroidUtilities.dp(4), y + AndroidUtilities.dp(14.5f)); + int oldAlpha = Theme.chat_timeBackgroundPaint.getAlpha(); + Theme.chat_timeBackgroundPaint.setAlpha((int) (oldAlpha * controlsAlpha)); + Theme.chat_durationPaint.setAlpha((int) (255 * controlsAlpha)); + canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_timeBackgroundPaint); + Theme.chat_timeBackgroundPaint.setAlpha(oldAlpha); + canvas.save(); + canvas.translate(x, y); + photosCountLayout.draw(canvas); + canvas.restore(); + Theme.chat_durationPaint.setAlpha(255); + } + if (videoInfoLayout != null && (!drawPhotoImage || photoImage.getVisible()) && imageBackgroundSideColor == 0) { + int x; + int y; + if (hasGamePreview || hasInvoicePreview || documentAttachType == DOCUMENT_ATTACH_TYPE_WALLPAPER) { + if (drawPhotoImage) { + x = (int) (photoImage.getImageX() + AndroidUtilities.dp(8.5f)); + y = (int) (photoImage.getImageY() + AndroidUtilities.dp(6)); + int height = AndroidUtilities.dp(documentAttachType == DOCUMENT_ATTACH_TYPE_WALLPAPER ? 14.5f : 16.5f); + rect.set(x - AndroidUtilities.dp(4), y - AndroidUtilities.dp(1.5f), x + durationWidth + AndroidUtilities.dp(4), y + height); + canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_timeBackgroundPaint); + } else { + x = linkX; + y = linkPreviewY; + } + } else { + x = (int) (photoImage.getImageX() + photoImage.getImageWidth() - AndroidUtilities.dp(8) - durationWidth); + y = (int) (photoImage.getImageY() + photoImage.getImageHeight() - AndroidUtilities.dp(19)); + rect.set(x - AndroidUtilities.dp(4), y - AndroidUtilities.dp(1.5f), x + durationWidth + AndroidUtilities.dp(4), y + AndroidUtilities.dp(14.5f)); + canvas.drawRoundRect(rect, AndroidUtilities.dp(4), AndroidUtilities.dp(4), Theme.chat_timeBackgroundPaint); + } + + canvas.save(); + canvas.translate(x, y); + if (hasInvoicePreview) { + if (drawPhotoImage) { + Theme.chat_shipmentPaint.setColor(Theme.getColor(Theme.key_chat_previewGameText)); + } else { + if (currentMessageObject.isOutOwner()) { + Theme.chat_shipmentPaint.setColor(Theme.getColor(Theme.key_chat_messageTextOut)); + } else { + Theme.chat_shipmentPaint.setColor(Theme.getColor(Theme.key_chat_messageTextIn)); + } + } + } + videoInfoLayout.draw(canvas); + canvas.restore(); + } + + if (drawInstantView) { + Drawable instantDrawable; + int instantY = startY + linkPreviewHeight + AndroidUtilities.dp(10); + Paint backPaint = Theme.chat_instantViewRectPaint; + if (currentMessageObject.isOutOwner()) { + instantDrawable = Theme.chat_msgOutInstantDrawable; + Theme.chat_instantViewPaint.setColor(Theme.getColor(Theme.key_chat_outPreviewInstantText)); + backPaint.setColor(Theme.getColor(Theme.key_chat_outPreviewInstantText)); + } else { + instantDrawable = Theme.chat_msgInInstantDrawable; + Theme.chat_instantViewPaint.setColor(Theme.getColor(Theme.key_chat_inPreviewInstantText)); + backPaint.setColor(Theme.getColor(Theme.key_chat_inPreviewInstantText)); + } + + instantButtonRect.set(linkX, instantY, linkX + instantWidth, instantY + AndroidUtilities.dp(36)); + if (Build.VERSION.SDK_INT >= 21) { + selectorDrawableMaskType[0] = 0; + selectorDrawable[0].setBounds(linkX, instantY, linkX + instantWidth, instantY + AndroidUtilities.dp(36)); + selectorDrawable[0].draw(canvas); + } + canvas.drawRoundRect(instantButtonRect, AndroidUtilities.dp(6), AndroidUtilities.dp(6), backPaint); + if (drawInstantViewType == 0) { + setDrawableBounds(instantDrawable, instantTextLeftX + instantTextX + linkX - AndroidUtilities.dp(15), instantY + AndroidUtilities.dp(11.5f), AndroidUtilities.dp(9), AndroidUtilities.dp(13)); + instantDrawable.draw(canvas); + } + if (instantViewLayout != null) { + canvas.save(); + canvas.translate(linkX + instantTextX, instantY + AndroidUtilities.dp(10.5f)); + instantViewLayout.draw(canvas); + canvas.restore(); + } + } + } + private boolean shouldDrawMenuDrawable() { return currentMessagesGroup == null || (currentPosition.flags & MessageObject.POSITION_FLAG_TOP) != 0; } @@ -8029,11 +8086,10 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate canvas.restore(); } - private void drawMessageText(Canvas canvas, ArrayList textLayoutBlocks, boolean origin, float alpha) { + public void drawMessageText(Canvas canvas, ArrayList textLayoutBlocks, boolean origin, float alpha, boolean drawOnlyText) { if (textLayoutBlocks == null || textLayoutBlocks.isEmpty()) { return; } - int oldAlpha = 0; int firstVisibleBlockNum; int lastVisibleBlockNum; if (origin) { @@ -8050,25 +8106,31 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate if (firstVisibleBlockNum >= 0) { int restore = Integer.MIN_VALUE; + int oldAlpha = -1; if (alpha != 1.0f) { - if (currentBackgroundDrawable != null) { - int top = currentBackgroundDrawable.getBounds().top; - int bottom = currentBackgroundDrawable.getBounds().bottom; - - if (getY() < 0) { - top = (int) -getY(); - } - if (getY() + getMeasuredHeight() > parentHeight) { - bottom = (int) (parentHeight - getY()); - } - rect.set( - getCurrentBackgroundLeft(), top, - currentBackgroundDrawable.getBounds().right, bottom - ); + if (drawOnlyText) { + oldAlpha = Theme.chat_msgTextPaint.getAlpha(); + Theme.chat_msgTextPaint.setAlpha((int) (oldAlpha * alpha)); } else { - rect.set(0, 0, getMeasuredWidth(), getMeasuredHeight()); + if (currentBackgroundDrawable != null) { + int top = currentBackgroundDrawable.getBounds().top; + int bottom = currentBackgroundDrawable.getBounds().bottom; + + if (getY() < 0) { + top = (int) -getY(); + } + if (getY() + getMeasuredHeight() > parentHeight) { + bottom = (int) (parentHeight - getY()); + } + rect.set( + getCurrentBackgroundLeft(), top, + currentBackgroundDrawable.getBounds().right, bottom + ); + } else { + rect.set(0, 0, getMeasuredWidth(), getMeasuredHeight()); + } + restore = canvas.saveLayerAlpha(rect, (int) (alpha * 255), Canvas.ALL_SAVE_FLAG); } - restore = canvas.saveLayerAlpha(rect, (int) (alpha * 255), Canvas.ALL_SAVE_FLAG); } for (int a = firstVisibleBlockNum; a <= lastVisibleBlockNum; a++) { if (a >= textLayoutBlocks.size()) { @@ -8077,18 +8139,18 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate MessageObject.TextLayoutBlock block = textLayoutBlocks.get(a); canvas.save(); canvas.translate(textX - (block.isRtl() ? (int) Math.ceil(currentMessageObject.textXOffset) : 0), textY + block.textYOffset); - if (pressedLink != null && a == linkBlockNum) { + if (pressedLink != null && a == linkBlockNum && !drawOnlyText) { for (int b = 0; b < urlPath.size(); b++) { canvas.drawPath(urlPath.get(b), Theme.chat_urlPaint); } } - if (a == linkSelectionBlockNum && !urlPathSelection.isEmpty()) { + if (a == linkSelectionBlockNum && !urlPathSelection.isEmpty() && !drawOnlyText) { for (int b = 0; b < urlPathSelection.size(); b++) { canvas.drawPath(urlPathSelection.get(b), Theme.chat_textSearchSelectionPaint); } } - if (delegate.getTextSelectionHelper() != null && transitionParams.animateChangeProgress == 1f) { + if (delegate.getTextSelectionHelper() != null && transitionParams.animateChangeProgress == 1f && !drawOnlyText) { delegate.getTextSelectionHelper().draw(currentMessageObject, block, canvas); } try { @@ -8098,6 +8160,9 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } canvas.restore(); } + if (oldAlpha >= 0) { + Theme.chat_msgTextPaint.setAlpha(oldAlpha); + } if (restore != Integer.MIN_VALUE) { canvas.restoreToCount(restore); @@ -8726,7 +8791,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate if (currentMessageObject.loadedFileSize > 0) { createLoadingProgressLayout(documentAttach); } - } else if (currentMessageObject.isRoundVideo()) { + } else if (isRoundVideo) { if (currentMessageObject.isSecretMedia()) { FileLoader.getInstance(currentAccount).loadFile(currentMessageObject.getDocument(), currentMessageObject, 1, 1); } else { @@ -8781,7 +8846,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate invalidate(); } } else { - if (currentMessageObject.isOut() && (currentMessageObject.isSending() || currentMessageObject.isEditing())) { + if (currentMessageObject.isOut() && !drawVideoImageButton && (currentMessageObject.isSending() || currentMessageObject.isEditing())) { if (radialProgress.getIcon() != MediaActionDrawable.ICON_CHECK) { delegate.didPressCancelSendButton(this); } @@ -8813,7 +8878,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate radialProgress.setIcon(getIconForCurrentState(), true, animated); invalidate(); } else { - if (currentMessageObject.isRoundVideo()) { + if (isRoundVideo) { MessageObject playingMessage = MediaController.getInstance().getPlayingMessageObject(); if (playingMessage == null || !playingMessage.isRoundVideo()) { photoImage.setAllowStartAnimation(true); @@ -9831,8 +9896,11 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } } - private void setBackgroundTopY() { + public void setBackgroundTopY(boolean fromParent) { for (int a = 0; a < 2; a++) { + if (a == 1 && !fromParent) { + return; + } Theme.MessageDrawable drawable = a == 0 ? currentBackgroundDrawable : currentBackgroundSelectedDrawable; int h = parentHeight; if (h == 0) { @@ -9842,7 +9910,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate h = view.getMeasuredHeight(); } } - drawable.setTop((int) (getY() + parentViewTopOffset), h, pinnedTop, pinnedBottom || transitionParams.changePinnedBottomProgress != 1); + drawable.setTop((int) ((fromParent ? getY() : getTop()) + parentViewTopOffset), h, pinnedTop, pinnedBottom || transitionParams.changePinnedBottomProgress != 1); } } @@ -9858,7 +9926,6 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate if (currentMessageObject == null) { return; } - if (!wasLayout && !animationRunning) { forceLayout(); return; @@ -9930,7 +9997,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate currentBackgroundSelectedDrawable = Theme.chat_msgOutMediaSelectedDrawable; transitionParams.drawPinnedBottomBackground = true; } - setBackgroundTopY(); + setBackgroundTopY(true); if (isDrawSelectionBackground() && (currentPosition == null || getBackground() != null)) { currentBackgroundShadowDrawable = currentBackgroundSelectedDrawable.getShadowDrawable(); } else { @@ -9997,7 +10064,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate currentBackgroundSelectedDrawable = Theme.chat_msgInMediaSelectedDrawable; transitionParams.drawPinnedBottomBackground = true; } - setBackgroundTopY(); + setBackgroundTopY(true); if (isDrawSelectionBackground() && (currentPosition == null || getBackground() != null)) { currentBackgroundShadowDrawable = currentBackgroundSelectedDrawable.getShadowDrawable(); } else { @@ -10128,7 +10195,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } } - if (drawBackground && currentBackgroundDrawable != null && (currentPosition == null || isDrawSelectionBackground() && (currentMessageObject.isMusic() || currentMessageObject.isDocument()))) { + if (drawBackground && currentBackgroundDrawable != null && (currentPosition == null || isDrawSelectionBackground() && (currentMessageObject.isMusic() || currentMessageObject.isDocument())) && !(enterTransitionInPorgress && !currentMessageObject.isVoice())) { if (isHighlightedAnimated) { currentBackgroundDrawable.setAlpha((int) (255 * alphaInternal)); currentBackgroundDrawable.draw(canvas); @@ -10300,38 +10367,8 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } } if (drawSideButton == 3) { - int height = AndroidUtilities.dp(32); - if (commentLayout != null) { - sideStartY -= AndroidUtilities.dp(18); - height += AndroidUtilities.dp(18); - } - - rect.set(sideStartX, sideStartY, sideStartX + AndroidUtilities.dp(32), sideStartY + height); - Theme.applyServiceShaderMatrix(getMeasuredWidth(), backgroundHeight, getX(), viewTop); - canvas.drawRoundRect(rect, AndroidUtilities.dp(16), AndroidUtilities.dp(16), sideButtonPressed ? Theme.chat_actionBackgroundSelectedPaint : Theme.chat_actionBackgroundPaint); - if (Theme.hasGradientService()) { - canvas.drawRoundRect(rect, AndroidUtilities.dp(16), AndroidUtilities.dp(16), Theme.chat_actionBackgroundGradientDarkenPaint); - } - - setDrawableBounds(Theme.chat_commentStickerDrawable, sideStartX + AndroidUtilities.dp(4), sideStartY + AndroidUtilities.dp(4)); - Theme.chat_commentStickerDrawable.draw(canvas); - - if (commentLayout != null) { - Theme.chat_stickerCommentCountPaint.setColor(Theme.getColor(Theme.key_chat_stickerReplyNameText)); - if (transitionParams.animateComments) { - if (transitionParams.animateCommentsLayout != null) { - canvas.save(); - Theme.chat_stickerCommentCountPaint.setAlpha((int) (255 * (1.0 - transitionParams.animateChangeProgress))); - canvas.translate(sideStartX + (AndroidUtilities.dp(32) - transitionParams.animateTotalCommentWidth) / 2, sideStartY + AndroidUtilities.dp(30)); - transitionParams.animateCommentsLayout.draw(canvas); - canvas.restore(); - } - Theme.chat_stickerCommentCountPaint.setAlpha((int) (255 * transitionParams.animateChangeProgress)); - } - canvas.save(); - canvas.translate(sideStartX + (AndroidUtilities.dp(32) - totalCommentWidth) / 2, sideStartY + AndroidUtilities.dp(30)); - commentLayout.draw(canvas); - canvas.restore(); + if (!(enterTransitionInPorgress && !currentMessageObject.isVoice())) { + drawCommentButton(canvas, 1f); } } else { rect.set(sideStartX, sideStartY, sideStartX + AndroidUtilities.dp(32), sideStartY + AndroidUtilities.dp(32)); @@ -10383,14 +10420,14 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate replyStartY = AndroidUtilities.dp(12 + (drawForwardedName && forwardedNameLayout[0] != null ? 36 : 0) + (drawNameLayout && nameLayout != null ? 20 : 0)); } } - if (currentPosition == null && !transitionParams.animateBackgroundBoundsInner) { + if (currentPosition == null && !transitionParams.animateBackgroundBoundsInner && !(enterTransitionInPorgress && !currentMessageObject.isVoice())) { drawNamesLayout(canvas, 1f); } if (!autoPlayingMedia || !MediaController.getInstance().isPlayingMessageAndReadyToDraw(currentMessageObject) && !transitionParams.animateBackgroundBoundsInner) { drawOverlays(canvas); } - if ((drawTime || !mediaBackground) && !forceNotDrawTime && !transitionParams.animateBackgroundBoundsInner) { + if ((drawTime || !mediaBackground) && !forceNotDrawTime && !transitionParams.animateBackgroundBoundsInner && !(enterTransitionInPorgress && !currentMessageObject.isVoice())) { drawTime(canvas, 1f, false); } @@ -10423,6 +10460,67 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate updateSelectionTextPosition(); } + public void drawCommentButton(Canvas canvas, float alpha) { + if (drawSideButton != 3) { + return; + } + int height = AndroidUtilities.dp(32); + if (commentLayout != null) { + sideStartY -= AndroidUtilities.dp(18); + height += AndroidUtilities.dp(18); + } + + rect.set(sideStartX, sideStartY, sideStartX + AndroidUtilities.dp(32), sideStartY + height); + Theme.applyServiceShaderMatrix(getMeasuredWidth(), backgroundHeight, getX(), viewTop); + if (alpha != 1f) { + int oldAlpha = Theme.chat_actionBackgroundPaint.getAlpha(); + Theme.chat_actionBackgroundPaint.setAlpha((int) (alpha * oldAlpha)); + canvas.drawRoundRect(rect, AndroidUtilities.dp(16), AndroidUtilities.dp(16), Theme.chat_actionBackgroundPaint); + Theme.chat_actionBackgroundPaint.setAlpha(oldAlpha); + } else { + canvas.drawRoundRect(rect, AndroidUtilities.dp(16), AndroidUtilities.dp(16), sideButtonPressed ? Theme.chat_actionBackgroundSelectedPaint : Theme.chat_actionBackgroundPaint); + } + if (Theme.hasGradientService()) { + if (alpha != 1f) { + int oldAlpha = Theme.chat_actionBackgroundGradientDarkenPaint.getAlpha(); + Theme.chat_actionBackgroundGradientDarkenPaint.setAlpha((int) (alpha * oldAlpha)); + canvas.drawRoundRect(rect, AndroidUtilities.dp(16), AndroidUtilities.dp(16), Theme.chat_actionBackgroundGradientDarkenPaint); + Theme.chat_actionBackgroundGradientDarkenPaint.setAlpha(oldAlpha); + } else { + canvas.drawRoundRect(rect, AndroidUtilities.dp(16), AndroidUtilities.dp(16), Theme.chat_actionBackgroundGradientDarkenPaint); + } + } + + + setDrawableBounds(Theme.chat_commentStickerDrawable, sideStartX + AndroidUtilities.dp(4), sideStartY + AndroidUtilities.dp(4)); + if (alpha != 1f) { + Theme.chat_commentStickerDrawable.setAlpha((int) (255 * alpha)); + Theme.chat_commentStickerDrawable.draw(canvas); + Theme.chat_commentStickerDrawable.setAlpha(255); + } else { + Theme.chat_commentStickerDrawable.draw(canvas); + } + + if (commentLayout != null) { + Theme.chat_stickerCommentCountPaint.setColor(Theme.getColor(Theme.key_chat_stickerReplyNameText)); + Theme.chat_stickerCommentCountPaint.setAlpha((int) (255 * alpha)); + if (transitionParams.animateComments) { + if (transitionParams.animateCommentsLayout != null) { + canvas.save(); + Theme.chat_stickerCommentCountPaint.setAlpha((int) (255 * (1.0 - transitionParams.animateChangeProgress) * alpha)); + canvas.translate(sideStartX + (AndroidUtilities.dp(32) - transitionParams.animateTotalCommentWidth) / 2, sideStartY + AndroidUtilities.dp(30)); + transitionParams.animateCommentsLayout.draw(canvas); + canvas.restore(); + } + Theme.chat_stickerCommentCountPaint.setAlpha((int) (255 * transitionParams.animateChangeProgress)); + } + canvas.save(); + canvas.translate(sideStartX + (AndroidUtilities.dp(32) - totalCommentWidth) / 2, sideStartY + AndroidUtilities.dp(30)); + commentLayout.draw(canvas); + canvas.restore(); + } + } + public void drawOutboundsContent(Canvas canvas) { if (transitionParams.animateBackgroundBoundsInner) { if (!transitionParams.transitionBotButtons.isEmpty()) { @@ -10831,7 +10929,8 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } } } - if (currentPosition == null || currentPosition.minY == 0 && currentPosition.minX == 0) { + forwardNameX = replyStartX - replyTextOffset + AndroidUtilities.dp(10 + (needReplyImage ? 44 : 0)); + if ((currentPosition == null || currentPosition.minY == 0 && currentPosition.minX == 0) && !(enterTransitionInPorgress && !currentMessageObject.isVoice())) { canvas.drawRect(replyStartX, replyStartY, replyStartX + AndroidUtilities.dp(2), replyStartY + AndroidUtilities.dp(35), Theme.chat_replyLinePaint); if (needReplyImage) { replyImageReceiver.setImageCoords(replyStartX + AndroidUtilities.dp(10), replyStartY, AndroidUtilities.dp(35), AndroidUtilities.dp(35)); @@ -10846,7 +10945,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } if (replyTextLayout != null) { canvas.save(); - canvas.translate(forwardNameX = replyStartX - replyTextOffset + AndroidUtilities.dp(10 + (needReplyImage ? 44 : 0)), replyStartY + AndroidUtilities.dp(19)); + canvas.translate(forwardNameX, replyStartY + AndroidUtilities.dp(19)); replyTextLayout.draw(canvas); canvas.restore(); } @@ -10939,7 +11038,24 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate return backgroundDrawable; } - public Theme.MessageDrawable getCurrentBackgroundDrawable() { + public Theme.MessageDrawable getCurrentBackgroundDrawable(boolean update) { + if (update) { + boolean forceMediaByGroup = currentPosition != null && (currentPosition.flags & MessageObject.POSITION_FLAG_BOTTOM) == 0 && currentMessagesGroup.isDocuments && !drawPinnedBottom; + if (currentMessageObject.isOutOwner()) { + if (!mediaBackground && !drawPinnedBottom && !forceMediaByGroup) { + currentBackgroundDrawable = Theme.chat_msgOutDrawable; + } else { + currentBackgroundDrawable = Theme.chat_msgOutMediaDrawable; + } + } else { + if (!mediaBackground && !drawPinnedBottom && !forceMediaByGroup) { + currentBackgroundDrawable = Theme.chat_msgInDrawable; + } else { + currentBackgroundDrawable = Theme.chat_msgInMediaDrawable; + } + } + } + currentBackgroundDrawable.getBackgroundDrawable(); return currentBackgroundDrawable; } @@ -11081,7 +11197,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate if (transitionParams.animateComments && transitionParams.animateCommentsLayout != null) { Theme.chat_replyNamePaint.setAlpha((int) (prevAlpha * transitionParams.animateChangeProgress)); } else { - Theme.chat_replyNamePaint.setAlpha(prevAlpha); + Theme.chat_replyNamePaint.setAlpha((int) (prevAlpha * alpha)); } commentLayout.draw(canvas); canvas.restore(); @@ -11117,7 +11233,13 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate } if (!drawnAvatars) { setDrawableBounds(Theme.chat_commentDrawable, x, y - AndroidUtilities.dp(4) + (pinnedBottom ? AndroidUtilities.dp(2) : 0)); - Theme.chat_commentDrawable.draw(canvas); + if (alpha != 1f) { + Theme.chat_commentDrawable.setAlpha((int) (255 * alpha)); + Theme.chat_commentDrawable.draw(canvas); + Theme.chat_commentDrawable.setAlpha(255); + } else { + Theme.chat_commentDrawable.draw(canvas); + } } commentArrowX = endX - AndroidUtilities.dp(44); @@ -11162,7 +11284,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate float acx = commentX + aw / 2; float acy = commentY + ah / 2; Theme.chat_commentArrowDrawable.setBounds((int) (acx - aw / 2 * (1.0f - commentProgressAlpha)), (int) (acy - ah / 2 * (1.0f - commentProgressAlpha)), (int) (acx + aw / 2 * (1.0f - commentProgressAlpha)), (int) (acy + ah / 2 * (1.0f - commentProgressAlpha))); - Theme.chat_commentArrowDrawable.setAlpha((int) (255 * (1.0f - commentProgressAlpha))); + Theme.chat_commentArrowDrawable.setAlpha((int) (255 * (1.0f - commentProgressAlpha) * alpha)); Theme.chat_commentArrowDrawable.draw(canvas); } } @@ -13111,13 +13233,17 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate }; public void setSlidingOffset(float offsetX) { - slidingOffsetX = offsetX; - updateTranslation(); + if (slidingOffsetX != offsetX) { + slidingOffsetX = offsetX; + updateTranslation(); + } } public void setAnimationOffsetX(float offsetX) { - animationOffsetX = offsetX; - updateTranslation(); + if (animationOffsetX != offsetX) { + animationOffsetX = offsetX; + updateTranslation(); + } } private void updateTranslation() { @@ -13184,7 +13310,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate AccessibilityNodeInfo info = AccessibilityNodeInfo.obtain(ChatMessageCell.this); onInitializeAccessibilityNodeInfo(info); StringBuilder sb = new StringBuilder(); - if (isChat && currentUser != null && !currentMessageObject.isOut()) { + if (isChat && currentUser!=null && !currentMessageObject.isOut()) { sb.append(UserObject.getUserName(currentUser)); sb.append('\n'); } @@ -13206,7 +13332,7 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate if (currentMessageObject.isMusic()) { sb.append("\n"); sb.append(LocaleController.formatString("AccDescrMusicInfo", R.string.AccDescrMusicInfo, currentMessageObject.getMusicAuthor(), currentMessageObject.getMusicTitle())); - } else if (currentMessageObject.isVoice() || currentMessageObject.isRoundVideo()) { + } else if (currentMessageObject.isVoice() || isRoundVideo){ sb.append(", "); sb.append(LocaleController.formatDuration(currentMessageObject.getDuration())); if (currentMessageObject.isContentUnread()) { @@ -13686,6 +13812,14 @@ public class ChatMessageCell extends BaseCell implements SeekBar.SeekBarDelegate return 0; } + public int getTextX() { + return textX; + } + + public int getTextY() { + return textY; + } + public class TransitionParams { public float lastDrawingImageX, lastDrawingImageY, lastDrawingImageW, lastDrawingImageH; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/DividerCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/DividerCell.java index 0c7ecf444..efd3a9d76 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/DividerCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/DividerCell.java @@ -21,8 +21,8 @@ import org.telegram.ui.ActionBar.Theme; public class DividerCell extends View { - boolean forceDarkTheme; - Paint paint; + private boolean forceDarkTheme; + private Paint paint = new Paint(); public DividerCell(Context context) { super(context); @@ -36,16 +36,13 @@ public class DividerCell extends View { @Override protected void onDraw(Canvas canvas) { - Paint localPaint = Theme.dividerPaint; if (forceDarkTheme) { - if (paint == null) { - paint = new Paint(); - paint.setColor(ColorUtils.blendARGB(Color.BLACK, Theme.getColor(Theme.key_voipgroup_dialogBackground), 0.2f)); - } - localPaint = paint; + paint.setColor(ColorUtils.blendARGB(Color.BLACK, Theme.getColor(Theme.key_voipgroup_dialogBackground), 0.2f)); + } else { + paint.setColor(Theme.getColor(Theme.key_divider)); } - canvas.drawLine(getPaddingLeft(), getPaddingTop(), getWidth() - getPaddingRight(), getPaddingTop(), localPaint); + canvas.drawLine(getPaddingLeft(), getPaddingTop(), getWidth() - getPaddingRight(), getPaddingTop(), paint); } public void setForceDarkTheme(boolean forceDarkTheme) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerActionCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerActionCell.java index 7accffa01..90d23a0e0 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerActionCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerActionCell.java @@ -9,8 +9,10 @@ package org.telegram.ui.Cells; import android.content.Context; +import android.graphics.Canvas; import android.graphics.PorterDuff; import android.graphics.PorterDuffColorFilter; +import android.graphics.RectF; import android.graphics.drawable.Drawable; import android.util.TypedValue; import android.view.Gravity; @@ -19,12 +21,18 @@ import android.widget.TextView; import org.telegram.messenger.AndroidUtilities; import org.telegram.messenger.FileLog; +import org.telegram.messenger.MessagesController; +import org.telegram.messenger.UserConfig; import org.telegram.ui.ActionBar.Theme; import org.telegram.ui.Components.LayoutHelper; +import java.util.Set; + public class DrawerActionCell extends FrameLayout { private TextView textView; + private int currentId; + private RectF rect = new RectF(); public DrawerActionCell(Context context) { super(context); @@ -39,6 +47,32 @@ public class DrawerActionCell extends FrameLayout { textView.setGravity(Gravity.LEFT | Gravity.CENTER_VERTICAL); textView.setCompoundDrawablePadding(AndroidUtilities.dp(29)); addView(textView, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT, Gravity.LEFT | Gravity.TOP, 19, 0, 16, 0)); + + setWillNotDraw(false); + } + + @Override + protected void onDraw(Canvas canvas) { + super.onDraw(canvas); + + if (currentId == 8) { + Set suggestions = MessagesController.getInstance(UserConfig.selectedAccount).pendingSuggestions; + if (suggestions.contains("VALIDATE_PHONE_NUMBER") || suggestions.contains("VALIDATE_PASSWORD")) { + int countTop = AndroidUtilities.dp(12.5f); + int countWidth = AndroidUtilities.dp(9); + int countLeft = getMeasuredWidth() - countWidth - AndroidUtilities.dp(25); + + int x = countLeft - AndroidUtilities.dp(5.5f); + rect.set(x, countTop, x + countWidth + AndroidUtilities.dp(14), countTop + AndroidUtilities.dp(23)); + Theme.chat_docBackPaint.setColor(Theme.getColor(Theme.key_chats_archiveBackground)); + canvas.drawRoundRect(rect, 11.5f * AndroidUtilities.density, 11.5f * AndroidUtilities.density, Theme.chat_docBackPaint); + + int w = Theme.dialogs_errorDrawable.getIntrinsicWidth(); + int h = Theme.dialogs_errorDrawable.getIntrinsicHeight(); + Theme.dialogs_errorDrawable.setBounds((int) (rect.centerX() - w / 2), (int) (rect.centerY() - h / 2), (int) (rect.centerX() + w / 2), (int) (rect.centerY() + h / 2)); + Theme.dialogs_errorDrawable.draw(canvas); + } + } } @Override @@ -52,7 +86,8 @@ public class DrawerActionCell extends FrameLayout { textView.setTextColor(Theme.getColor(Theme.key_chats_menuItemText)); } - public void setTextAndIcon(String text, int resId) { + public void setTextAndIcon(int id, String text, int resId) { + currentId = id; try { textView.setText(text); Drawable drawable = getResources().getDrawable(resId).mutate(); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerProfileCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerProfileCell.java index c7c03ce6c..e926e0f70 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerProfileCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/DrawerProfileCell.java @@ -34,6 +34,7 @@ import org.telegram.messenger.AndroidUtilities; import org.telegram.messenger.FileLog; import org.telegram.messenger.ImageLocation; import org.telegram.messenger.ImageReceiver; +import org.telegram.messenger.ApplicationLoader; import org.telegram.messenger.LocaleController; import org.telegram.messenger.R; import org.telegram.messenger.UserObject; @@ -70,7 +71,7 @@ public class DrawerProfileCell extends FrameLayout { private FireworksEffect fireworksEffect; private boolean accountsShown; private int darkThemeBackgroundColor; - + public static boolean switchingTheme; private Bitmap lastBitmap; private TLRPC.User user; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/SettingsSuggestionCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/SettingsSuggestionCell.java new file mode 100644 index 000000000..e4619a392 --- /dev/null +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/SettingsSuggestionCell.java @@ -0,0 +1,126 @@ +package org.telegram.ui.Cells; + +import android.content.Context; +import android.text.SpannableStringBuilder; +import android.text.Spanned; +import android.text.TextUtils; +import android.util.TypedValue; +import android.view.Gravity; +import android.widget.LinearLayout; +import android.widget.TextView; + +import org.telegram.PhoneFormat.PhoneFormat; +import org.telegram.messenger.AndroidUtilities; +import org.telegram.messenger.FileLog; +import org.telegram.messenger.LocaleController; +import org.telegram.messenger.MessagesController; +import org.telegram.messenger.R; +import org.telegram.messenger.UserConfig; +import org.telegram.tgnet.TLRPC; +import org.telegram.ui.ActionBar.Theme; +import org.telegram.ui.Components.LayoutHelper; +import org.telegram.ui.Components.URLSpanNoUnderline; + +public class SettingsSuggestionCell extends LinearLayout { + + public final static int TYPE_PHONE = 0; + public final static int TYPE_PASSWORD = 1; + + private TextView textView; + private TextView detailTextView; + private TextView yesButton; + private TextView noButton; + + private int currentType; + + private int currentAccount = UserConfig.selectedAccount; + + public SettingsSuggestionCell(Context context) { + super(context); + setOrientation(VERTICAL); + + textView = new TextView(context); + textView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 15); + textView.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); + textView.setEllipsize(TextUtils.TruncateAt.END); + textView.setGravity((LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.CENTER_VERTICAL); + textView.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlueHeader)); + addView(textView, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT, (LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.TOP, 21, 15, 21, 0)); + + detailTextView = new TextView(context); + detailTextView.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteGrayText2)); + detailTextView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 13); + detailTextView.setLinkTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteLinkText)); + detailTextView.setHighlightColor(Theme.getColor(Theme.key_windowBackgroundWhiteLinkSelection)); + detailTextView.setMovementMethod(new AndroidUtilities.LinkMovementMethodMy()); + detailTextView.setGravity(LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT); + addView(detailTextView, LayoutHelper.createLinear(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT, 21, 8, 21, 0)); + + LinearLayout linearLayout = new LinearLayout(context); + linearLayout.setOrientation(HORIZONTAL); + addView(linearLayout, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, 40, 21, 17, 21, 20)); + + for (int a = 0; a < 2; a++) { + TextView textView = new TextView(context); + textView.setBackground(Theme.createSimpleSelectorRoundRectDrawable(AndroidUtilities.dp(4), Theme.getColor(Theme.key_featuredStickers_addButton), Theme.getColor(Theme.key_featuredStickers_addButtonPressed))); + textView.setLines(1); + textView.setSingleLine(true); + textView.setGravity(Gravity.CENTER_HORIZONTAL); + textView.setEllipsize(TextUtils.TruncateAt.END); + textView.setGravity(Gravity.CENTER); + textView.setTextColor(Theme.getColor(Theme.key_featuredStickers_buttonText)); + textView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); + textView.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); + linearLayout.addView(textView, LayoutHelper.createLinear(0, 40, 0.5f, a == 0 ? 0 : 4, 0, a == 0 ? 4 : 0, 0)); + if (a == 0) { + yesButton = textView; + yesButton.setOnClickListener(v -> onYesClick(currentType)); + } else { + noButton = textView; + noButton.setOnClickListener(v -> onNoClick(currentType)); + } + } + } + + public void setType(int type) { + currentType = type; + if (type == TYPE_PHONE) { + final TLRPC.User user = MessagesController.getInstance(currentAccount).getUser(UserConfig.getInstance(currentAccount).clientUserId); + textView.setText(LocaleController.formatString("CheckPhoneNumber", R.string.CheckPhoneNumber, PhoneFormat.getInstance().format("+" + user.phone))); + String text = LocaleController.getString("CheckPhoneNumberInfo", R.string.CheckPhoneNumberInfo); + SpannableStringBuilder builder = new SpannableStringBuilder(text); + int index1 = text.indexOf("**"); + int index2 = text.lastIndexOf("**"); + if (index1 >= 0 && index2 >= 0 && index1 != index2) { + builder.replace(index2, index2 + 2, ""); + builder.replace(index1, index1 + 2, ""); + try { + builder.setSpan(new URLSpanNoUnderline(LocaleController.getString("CheckPhoneNumberLearnMoreUrl", R.string.CheckPhoneNumberLearnMoreUrl)), index1, index2 - 2, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); + } catch (Exception e) { + FileLog.e(e); + } + } + detailTextView.setText(builder); + yesButton.setText(LocaleController.getString("CheckPhoneNumberYes", R.string.CheckPhoneNumberYes)); + noButton.setText(LocaleController.getString("CheckPhoneNumberNo", R.string.CheckPhoneNumberNo)); + } else if (type == TYPE_PASSWORD) { + textView.setText(LocaleController.getString("YourPasswordHeader", R.string.YourPasswordHeader)); + detailTextView.setText(LocaleController.getString("YourPasswordRemember", R.string.YourPasswordRemember)); + yesButton.setText(LocaleController.getString("YourPasswordRememberYes", R.string.YourPasswordRememberYes)); + noButton.setText(LocaleController.getString("YourPasswordRememberNo", R.string.YourPasswordRememberNo)); + } + } + + protected void onYesClick(int type) { + + } + + protected void onNoClick(int type) { + + } + + @Override + protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { + super.onMeasure(MeasureSpec.makeMeasureSpec(MeasureSpec.getSize(widthMeasureSpec), MeasureSpec.EXACTLY), heightMeasureSpec); + } +} diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/SharedPhotoVideoCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/SharedPhotoVideoCell.java index 1b820a436..38df2cea6 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/SharedPhotoVideoCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/SharedPhotoVideoCell.java @@ -64,13 +64,17 @@ public class SharedPhotoVideoCell extends FrameLayout { private int currentAccount = UserConfig.selectedAccount; + public SharedPhotoVideoCellDelegate getDelegate() { + return delegate; + } + public interface SharedPhotoVideoCellDelegate { void didClickItem(SharedPhotoVideoCell cell, int index, MessageObject messageObject, int a); boolean didLongClickItem(SharedPhotoVideoCell cell, int index, MessageObject messageObject, int a); } - private class PhotoVideoView extends FrameLayout { + public class PhotoVideoView extends FrameLayout { private BackupImageView imageView; private TextView videoTextView; @@ -334,6 +338,13 @@ public class SharedPhotoVideoCell extends FrameLayout { return photoVideoViews[a].imageView; } + public PhotoVideoView getView(int a) { + if (a >= itemsCount) { + return null; + } + return photoVideoViews[a]; + } + public MessageObject getMessageObject(int a) { if (a >= itemsCount) { return null; @@ -341,6 +352,13 @@ public class SharedPhotoVideoCell extends FrameLayout { return messageObjects[a]; } + public int getIndeces(int a) { + if (a >= itemsCount) { + return -1; + } + return indeces[a]; + } + public void setIsFirst(boolean first) { isFirst = first; } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextCell.java index 91c039e7d..f986f6277 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextCell.java @@ -23,12 +23,13 @@ import org.telegram.messenger.AndroidUtilities; import org.telegram.messenger.LocaleController; import org.telegram.ui.ActionBar.SimpleTextView; import org.telegram.ui.ActionBar.Theme; +import org.telegram.ui.Components.RLottieImageView; public class TextCell extends FrameLayout { public final SimpleTextView textView; public final SimpleTextView valueTextView; - public final ImageView imageView; + public final RLottieImageView imageView; private ImageView valueImageView; private int leftPadding; private boolean needDivider; @@ -59,8 +60,7 @@ public class TextCell extends FrameLayout { valueTextView.setImportantForAccessibility(IMPORTANT_FOR_ACCESSIBILITY_NO); addView(valueTextView); - imageView = new ImageView(context); - imageView.setVisibility(GONE); + imageView = new RLottieImageView(context); imageView.setScaleType(ImageView.ScaleType.CENTER); imageView.setColorFilter(new PorterDuffColorFilter(Theme.getColor(dialog ? Theme.key_dialogIcon : Theme.key_windowBackgroundWhiteGrayIcon), PorterDuff.Mode.SRC_IN)); addView(imageView); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextInfoPrivacyCell.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextInfoPrivacyCell.java index 9e414af91..a3c8f436b 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextInfoPrivacyCell.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextInfoPrivacyCell.java @@ -51,6 +51,7 @@ public class TextInfoPrivacyCell extends FrameLayout { protected void onDraw(Canvas canvas) { onTextDraw(); super.onDraw(canvas); + afterTextDraw(); } }; textView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); @@ -67,6 +68,10 @@ public class TextInfoPrivacyCell extends FrameLayout { } + protected void afterTextDraw() { + + } + public void setLinkTextColorKey(String key) { linkTextColorKey = key; } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextSelectionHelper.java b/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextSelectionHelper.java index 2f6d78970..3e9b21500 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextSelectionHelper.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Cells/TextSelectionHelper.java @@ -152,8 +152,8 @@ public abstract class TextSelectionHelper 0) { - dy = -selectedView.getTop(); + if (selectedView.getTop() + dy > getParentTopPadding()) { + dy = -selectedView.getTop() + getParentTopPadding(); } } } @@ -759,7 +759,7 @@ public abstract class TextSelectionHelper parentView.getMeasuredHeight() && (multiselect || selectedView.getBottom() > parentView.getMeasuredHeight()); - boolean canScrollUp = event.getY() < ((View) parentView.getParent()).getTop() && (multiselect || selectedView.getTop() < 0); + boolean canScrollUp = event.getY() < ((View) parentView.getParent()).getTop() + getParentTopPadding() && (multiselect || selectedView.getTop() < getParentTopPadding()); if (canScrollDown || canScrollUp) { if (!scrolling) { scrolling = true; @@ -2665,4 +2665,8 @@ public abstract class TextSelectionHelper 0 && TextUtils.indexOf(source, '\n') == source.length() - 1) { doneButton.performClick(); return ""; } @@ -139,7 +139,7 @@ public class ChangeBioActivity extends BaseFragment { @Override public void afterTextChanged(Editable s) { - checkTextView.setNumber(70 - Character.codePointCount(s, 0, s.length()), true); + checkTextView.setNumber(70 - Character.codePointCount(s, 0, s.length()), true); } }); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ChangePhoneActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ChangePhoneActivity.java index c3babe739..d95545f9a 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ChangePhoneActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ChangePhoneActivity.java @@ -68,6 +68,7 @@ import org.telegram.ui.ActionBar.AlertDialog; import org.telegram.ui.ActionBar.BaseFragment; import org.telegram.ui.ActionBar.Theme; import org.telegram.ui.ActionBar.ThemeDescription; +import org.telegram.ui.Cells.SettingsSuggestionCell; import org.telegram.ui.Components.AlertsCreator; import org.telegram.ui.Components.EditTextBoldCursor; import org.telegram.ui.Components.HintEditText; @@ -99,7 +100,7 @@ public class ChangePhoneActivity extends BaseFragment { private final static int done_button = 1; - private class ProgressView extends View { + private static class ProgressView extends View { private Paint paint = new Paint(); private Paint paint2 = new Paint(); @@ -584,7 +585,7 @@ public class ChangePhoneActivity extends BaseFragment { } phoneField.setText(builder); if (start >= 0) { - phoneField.setSelection(start <= phoneField.length() ? start : phoneField.length()); + phoneField.setSelection(Math.min(start, phoneField.length())); } phoneField.onTextChange(); ignoreOnPhoneChange = false; @@ -998,10 +999,8 @@ public class ChangePhoneActivity extends BaseFragment { int maxHeight = AndroidUtilities.dp(291); if (scrollHeight - innerHeight < requiredHeight) { setMeasuredDimension(getMeasuredWidth(), innerHeight + requiredHeight); - } else if (scrollHeight > maxHeight) { - setMeasuredDimension(getMeasuredWidth(), maxHeight); } else { - setMeasuredDimension(getMeasuredWidth(), scrollHeight); + setMeasuredDimension(getMeasuredWidth(), Math.min(scrollHeight, maxHeight)); } } } @@ -1432,6 +1431,7 @@ public class ChangePhoneActivity extends BaseFragment { MessagesController.getInstance(currentAccount).putUser(user, false); finishFragment(); NotificationCenter.getInstance(currentAccount).postNotificationName(NotificationCenter.mainUserInfoChanged); + getMessagesController().removeSuggestion(0, "VALIDATE_PHONE_NUMBER"); } else { lastError = error.text; if (currentType == 3 && (nextType == 4 || nextType == 2) || currentType == 2 && (nextType == 4 || nextType == 3) || currentType == 4 && nextType == 2) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ChannelAdminLogActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ChannelAdminLogActivity.java index afcb876fd..cc9b86cbc 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ChannelAdminLogActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ChannelAdminLogActivity.java @@ -745,9 +745,6 @@ public class ChannelAdminLogActivity extends BaseFragment implements Notificatio ImageReceiver imageReceiver = chatMessageCell.getAvatarImage(); if (imageReceiver != null) { if (chatMessageCell.getMessageObject().deleted) { -// if (child.getTranslationY() != 0) { -// canvas.restore(); -// } imageReceiver.setVisible(false, false); return result; } @@ -763,17 +760,13 @@ public class ChannelAdminLogActivity extends BaseFragment implements Notificatio if (p >= 0) { int nextPosition; - nextPosition = p - 1; + nextPosition = p + 1; holder = chatListView.findViewHolderForAdapterPosition(nextPosition); if (holder != null) { -// if (child.getTranslationY() != 0) { -// canvas.restore(); -// } imageReceiver.setVisible(false, false); return result; } - } } float tx = chatMessageCell.getSlidingOffsetX() + chatMessageCell.getCheckBoxTranslation(); @@ -785,9 +778,6 @@ public class ChannelAdminLogActivity extends BaseFragment implements Notificatio y = maxY; } -// if (child.getTranslationY() != 0) { -// canvas.restore(); -// } if (chatMessageCell.drawPinnedTop()) { int p; @@ -804,7 +794,7 @@ public class ChannelAdminLogActivity extends BaseFragment implements Notificatio int prevPosition; - prevPosition = p + 1; + prevPosition = p - 1; holder = chatListView.findViewHolderForAdapterPosition(prevPosition); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ChatActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ChatActivity.java index 647fce0e2..2b8534a33 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ChatActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ChatActivity.java @@ -47,6 +47,7 @@ import android.net.Uri; import android.os.Build; import android.os.Bundle; import android.os.SystemClock; +import android.os.Vibrator; import android.provider.MediaStore; import android.text.Layout; import android.text.Spannable; @@ -310,7 +311,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not private TextView replyButton; private FrameLayout emptyViewContainer; private ChatGreetingsView greetingsViewContainer; - private SizeNotifierFrameLayout contentView; + public SizeNotifierFrameLayout contentView; private ChatBigEmptyView bigEmptyView; private ArrayList actionModeViews = new ArrayList<>(); private ChatAvatarContainer avatarContainer; @@ -449,7 +450,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not private SparseArray pendingSendMessagesDict = new SparseArray<>(); private ArrayList pendingSendMessages = new ArrayList<>(); - private ArrayList animatingMessageObjects = new ArrayList<>(); + public ArrayList animatingMessageObjects = new ArrayList<>(); private HashMap animatingDocuments = new HashMap<>(); private MessageObject needAnimateToMessage; @@ -598,6 +599,8 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not private int startLoadFromMessageId; private int startLoadFromMessageIdSaved; private int startLoadFromMessageOffset = Integer.MAX_VALUE; + private int startFromVideoTimestamp = -1; + private int startFromVideoMessageId; private boolean needSelectFromMessageId; private int returnToMessageId; private int returnToLoadIndex; @@ -728,6 +731,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not private int scrollCallbackAnimationIndex; private boolean showSearchAsIcon; + public MessageEnterTransitionContainer messageEnterTransitionContainer; private final static int[] allowedNotificationsDuringChatListAnimations = new int[]{ NotificationCenter.messagesRead, @@ -1155,6 +1159,9 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } processRowSelect(view, outside, x, y); } + if (view instanceof ChatMessageCell) { + startMultiselect(position); + } return true; } @@ -1169,6 +1176,103 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } }; + private void startMultiselect(int position) { + int indexOfMessage = position - chatAdapter.messagesStartRow; + if (indexOfMessage < 0 || indexOfMessage >= messages.size()) { + return; + } + MessageObject messageObject = messages.get(indexOfMessage); + final boolean unselect = selectedMessagesIds[0].get(messageObject.getId(), null) == null && selectedMessagesIds[1].get(messageObject.getId(), null) == null; + SparseArray alreadySelectedMessagesIds = new SparseArray<>(); + for (int i = 0; i < selectedMessagesIds[0].size(); i++) { + alreadySelectedMessagesIds.put(selectedMessagesIds[0].keyAt(i), selectedMessagesIds[0].valueAt(i)); + } + for (int i = 0; i < selectedMessagesIds[1].size(); i++) { + alreadySelectedMessagesIds.put(selectedMessagesIds[1].keyAt(i), selectedMessagesIds[1].valueAt(i)); + } + chatListView.startMultiselect(position, false, new RecyclerListView.onMultiSelectionChanged() { + boolean limitReached; + @Override + public void onSelectionChanged(int position, boolean selected, float x, float y) { + int i = position - chatAdapter.messagesStartRow; + if (unselect) { + selected = !selected; + } + if (i >= 0 && i < messages.size()) { + MessageObject messageObject = messages.get(i); + if (selected && (selectedMessagesIds[0].indexOfKey(messageObject.getId()) >= 0 || selectedMessagesIds[1].indexOfKey(messageObject.getId()) >= 0)) { + return; + } + if (!selected && selectedMessagesIds[0].indexOfKey(messageObject.getId()) < 0 && selectedMessagesIds[1].indexOfKey(messageObject.getId()) < 0) { + return; + } + if (messageObject.contentType == 0) { + if (selected && selectedMessagesIds[0].size() + selectedMessagesIds[1].size() >= 100) { + limitReached = true; + } else { + limitReached = false; + } + RecyclerView.ViewHolder holder = chatListView.findViewHolderForAdapterPosition(position); + if (holder != null && holder.itemView instanceof ChatMessageCell) { + processRowSelect(holder.itemView, false, x, y); + } else { + addToSelectedMessages(messageObject, false); + updateActionModeTitle(); + updateVisibleRows(); + } + } + } + } + + @Override + public boolean canSelect(int position) { + int i = position - chatAdapter.messagesStartRow; + if (i >= 0 && i < messages.size()) { + MessageObject messageObject = messages.get(i); + if (messageObject.contentType == 0) { + if (!unselect && alreadySelectedMessagesIds.get(messageObject.getId(), null) == null) { + return true; + } + if (unselect && alreadySelectedMessagesIds.get(messageObject.getId(), null) != null) { + return true; + } + } + } + return false; + } + + @Override + public int checkPosition(int position, boolean selectionTop) { + int i = position - chatAdapter.messagesStartRow; + if (i >= 0 && i < messages.size()) { + MessageObject messageObject = messages.get(i); + if (messageObject.contentType == 0 && messageObject.hasValidGroupId()) { + MessageObject.GroupedMessages groupedMessages = groupedMessagesMap.get(messageObject.getGroupId()); + MessageObject messageObject1 = groupedMessages.messages.get(selectionTop ? 0 : groupedMessages.messages.size() - 1); + return chatAdapter.messagesStartRow + messages.indexOf(messageObject1); + } + } + return position; + } + + @Override + public boolean limitReached() { + return limitReached; + } + + @Override + public void getPaddings(int[] paddings) { + paddings[0] = (int) chatListViewPaddingTop; + paddings[1] = 0; + } + + @Override + public void scrollBy(int dy) { + chatListView.scrollBy(0, dy); + } + }); + } + RecyclerListView.OnItemClickListenerExtended onItemClickListener = new RecyclerListView.OnItemClickListenerExtended() { @Override public void onItemClick(View view, int position, float x, float y) { @@ -1224,6 +1328,10 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not inlineReturn = arguments.getLong("inline_return", 0); String inlineQuery = arguments.getString("inline_query"); startLoadFromMessageId = arguments.getInt("message_id", 0); + startFromVideoTimestamp = arguments.getInt("video_timestamp", -1); + if (startFromVideoTimestamp >= 0) { + startFromVideoMessageId = startLoadFromMessageId; + } reportType = arguments.getInt("report", -1); boolean historyPreloaded = arguments.getBoolean("historyPreloaded", false); if (highlightMessageId != 0 && highlightMessageId != Integer.MAX_VALUE) { @@ -1790,7 +1898,12 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not @Override public View createView(Context context) { - textSelectionHelper = new TextSelectionHelper.ChatListTextSelectionHelper(); + textSelectionHelper = new TextSelectionHelper.ChatListTextSelectionHelper() { + @Override + public int getParentTopPadding() { + return (int) chatListViewPaddingTop; + } + }; if (reportType >= 0) { actionBar.setBackgroundColor(Theme.getColor(Theme.key_actionBarActionModeDefault)); @@ -2324,7 +2437,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not if (searchIconItem != null && showSearchAsIcon) { searchIconItem.setVisibility(View.GONE); } - } else if (chatActivityEnterView.hasText() && TextUtils.isEmpty(chatActivityEnterView.getSlowModeTimer())) { + } else if (chatActivityEnterView.hasText() && TextUtils.isEmpty(chatActivityEnterView.getSlowModeTimer()) && (currentChat == null || ChatObject.canSendMessages(currentChat))) { if (headerItem != null) { headerItem.setVisibility(View.GONE); } @@ -3092,6 +3205,8 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not int widthSize = MeasureSpec.getSize(widthMeasureSpec); int heightSize = allHeight = MeasureSpec.getSize(heightMeasureSpec); + long time = System.currentTimeMillis(); + if (lastWidth != widthSize) { globalIgnoreLayout = true; lastWidth = widthMeasureSpec; @@ -3138,7 +3253,6 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not if (actionBar.getVisibility() == VISIBLE) { heightSize -= actionBarHeight; } - if (lastHeight != allHeight) { measureKeyboardHeight(); } @@ -3152,11 +3266,12 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not chatEmojiViewPadding = 0; } } + setEmojiKeyboardHeight(chatEmojiViewPadding); int childCount = getChildCount(); - measureChildWithMargins(chatActivityEnterView, widthMeasureSpec, 0, heightMeasureSpec, 0); + int listViewTopHeight; if (inPreviewMode) { inputFieldHeight = 0; @@ -3168,6 +3283,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not for (int i = 0; i < childCount; i++) { View child = getChildAt(i); + if (child == null || child.getVisibility() == GONE || child == chatActivityEnterView || child == actionBar) { continue; } @@ -3289,6 +3405,8 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } updateBulletinLayout(); + + lastHeight = allHeight; } @Override @@ -3304,6 +3422,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not final int count = getChildCount(); int keyboardSize = getKeyboardHeight(); int paddingBottom; + long time = System.currentTimeMillis(); if (fixedKeyboardHeight > 0 && keyboardSize <= AndroidUtilities.dp(20)) { paddingBottom = fixedKeyboardHeight; } else { @@ -3311,6 +3430,8 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } if (!SharedConfig.smoothKeyboard) { setBottomClip(paddingBottom); + } else if (!inPreviewMode) { + setBottomClip(AndroidUtilities.dp(48)); } for (int i = 0; i < count; i++) { @@ -3420,6 +3541,8 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not if (keyboardSize > AndroidUtilities.dp(20) && getLayoutParams().height < 0) { childTop -= keyboardSize; } + } else if (chatActivityEnterView != null && child == chatActivityEnterView.botCommandsMenuContainer) { + childTop -= inputFieldHeight; } child.layout(childLeft, childTop, childLeft + width, childTop + height); } @@ -3575,16 +3698,21 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not @Override public void setTranslationY(float translationY) { - super.setTranslationY(translationY); - if (emptyViewContainer != null) { - if (chatActivityEnterView != null && chatActivityEnterView.pannelAniamationInProgress()) { - emptyViewContainer.setTranslationY(translationY / 2f); - } else { - emptyViewContainer.setTranslationY(translationY / 1.7f); + if (translationY != getTranslationY()) { + super.setTranslationY(translationY); + if (emptyViewContainer != null) { + if (chatActivityEnterView != null && chatActivityEnterView.pannelAniamationInProgress()) { + emptyViewContainer.setTranslationY(translationY / 2f); + } else { + emptyViewContainer.setTranslationY(translationY / 1.7f); + } } + if (chatActivityEnterView != null && chatActivityEnterView.botCommandsMenuContainer != null) { + chatActivityEnterView.botCommandsMenuContainer.setTranslationY(translationY); + } + invalidateChatListViewTopPadding(); + invalidateMessagesVisiblePart(); } - invalidateChatListViewTopPadding(); - invalidateMessagesVisiblePart(); } @Override @@ -6392,11 +6520,11 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not if (changeBoundAnimator != null) { changeBoundAnimator.cancel(); } + chatActivityEnterViewAnimateFromTop = 0; shouldAnimateEditTextWithBounds = false; } else { int t = getBackgroundTop(); - boolean rez = true; if (chatActivityEnterViewAnimateFromTop != 0 && t != chatActivityEnterViewAnimateFromTop && lastContentViewHeight == contentView.getMeasuredHeight()) { int dy = animatedTop + chatActivityEnterViewAnimateFromTop - t; animatedTop = dy; @@ -6453,11 +6581,11 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not changeBoundAnimator = null; } }); - changeBoundAnimator.setDuration(200); - if (chatActivityEnterViewAnimateBeforeSending) { - changeBoundAnimator.setStartDelay(20); - } - changeBoundAnimator.setInterpolator(CubicBezierInterpolator.DEFAULT); + changeBoundAnimator.setDuration(ChatListItemAnimator.DEFAULT_DURATION); +// if (chatActivityEnterViewAnimateBeforeSending) { +// changeBoundAnimator.setStartDelay(20); +// } + changeBoundAnimator.setInterpolator(ChatListItemAnimator.DEFAULT_INTERPOLATOR); if (!waitingForSendingMessageLoad) { changeBoundAnimator.start(); } @@ -6476,9 +6604,9 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not messageEditTextAnimator.cancel(); } messageEditTextAnimator = a; - a.setDuration(200); - a.setStartDelay(chatActivityEnterViewAnimateBeforeSending ? 20 : 0); - a.setInterpolator(CubicBezierInterpolator.DEFAULT); + a.setDuration(ChatListItemAnimator.DEFAULT_DURATION); + // a.setStartDelay(chatActivityEnterViewAnimateBeforeSending ? 20 : 0); + a.setInterpolator(ChatListItemAnimator.DEFAULT_INTERPOLATOR); a.start(); shouldAnimateEditTextWithBounds = false; } @@ -6500,6 +6628,9 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } }; chatActivityEnterView.setDelegate(new ChatActivityEnterView.ChatActivityEnterViewDelegate() { + + int lastSize; + @Override public void onMessageSend(CharSequence message, boolean notify, int scheduleDate) { if (chatListItemAnimator != null) { @@ -6711,8 +6842,12 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not allowContextBotPanel = !chatActivityEnterView.isPopupShowing(); checkContextBotPanel(); - chatActivityEnterViewAnimateFromTop = 0; - chatActivityEnterViewAnimateBeforeSending = false; + int size2 = size + (chatActivityEnterView.isPopupShowing() ? 1 << 16 : 0); + if (lastSize != size2) { + chatActivityEnterViewAnimateFromTop = 0; + chatActivityEnterViewAnimateBeforeSending = false; + } + lastSize = size2; } @Override @@ -6848,6 +6983,11 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } } + @Override + public boolean hasForwardingMessages() { + return forwardingMessages != null && !forwardingMessages.isEmpty(); + } + @Override public int getDisableLinkPreviewStatus() { return disableLinkPreview ? 2 : 1; @@ -7241,11 +7381,13 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not searchUpButton.setOnClickListener(view -> { getMediaDataController().searchMessagesInChat(null, dialog_id, mergeDialogId, classGuid, 1, threadMessageId, searchingUserMessages, searchingChatMessages); showMessagesSearchListView(false); - if (!SharedConfig.searchMessagesAsListUsed && SharedConfig.searchMessagesAsListHintShows < 3 && !searchAsListHintShown && Math.random() <= 0.25) { + //if (!SharedConfig.searchMessagesAsListUsed && SharedConfig.searchMessagesAsListHintShows < 3 && !searchAsListHintShown && Math.random() <= 0.25) { + if (!searchAsListHintShown) { showSearchAsListHint(); - searchAsListHintShown = true; - SharedConfig.increaseSearchAsListHintShows(); + // searchAsListHintShown = true; } +// SharedConfig.increaseSearchAsListHintShows(); +// } }); searchUpButton.setContentDescription(LocaleController.getString("AccDescrSearchNext", R.string.AccDescrSearchNext)); @@ -7518,7 +7660,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not bottomMessagesActionContainer.addView(forwardButton, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.MATCH_PARENT, Gravity.RIGHT | Gravity.TOP)); contentView.addView(searchContainer, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, 51, Gravity.BOTTOM)); - + contentView.addView(messageEnterTransitionContainer = new MessageEnterTransitionContainer(context, currentAccount)); undoView = new UndoView(context, this); undoView.setAdditionalTranslationY(AndroidUtilities.dp(51)); contentView.addView(undoView, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT, Gravity.BOTTOM | Gravity.LEFT, 8, 0, 8, 8)); @@ -8178,9 +8320,6 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } else if (mentionListView != null) { mentionListView.setTranslationY(bottomPanelTranslationYReverse); } - if (chatActivityEnterView != null && chatActivityEnterView.botCommandsMenuContainer != null) { - chatActivityEnterView.botCommandsMenuContainer.setTranslationY(bottomPanelTranslationYReverse); - } if (alertView != null && alertView.getVisibility() == View.VISIBLE) { alertView.setTranslationY(contentPanTranslation + contentPaddingTop - AndroidUtilities.dp(50) * (1f - alertViewEnterProgress)); } @@ -8190,6 +8329,9 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not if (bottomMessagesActionContainer != null) { bottomMessagesActionContainer.setTranslationY(bottomPanelTranslationYReverse); } + if (undoView != null) { + undoView.setAdditionalTranslationY(chatActivityEnterView.getHeightWithTopView() - chatActivityEnterView.getAnimatedTop()); + } } private TextureView createTextureView(boolean add) { @@ -8362,6 +8504,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not ObjectAnimator.ofFloat(bottomOverlayProgress, View.SCALE_X, 1.0f), ObjectAnimator.ofFloat(bottomOverlayProgress, View.SCALE_Y, 1.0f), ObjectAnimator.ofFloat(bottomOverlayProgress, View.ALPHA, 1.0f)); + bottomOverlayAnimation.setStartDelay(200); } else { bottomOverlayChatText.setVisibility(View.VISIBLE); bottomOverlayAnimation.playTogether( @@ -10922,6 +11065,11 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not foundTextureViewMessage = true; } } + if (startFromVideoTimestamp >= 0 && fragmentOpened && !chatListView.isFastScrollAnimationRunning() && startFromVideoMessageId == messageObject.getId()) { + messageObject.forceSeekTo = startFromVideoTimestamp / (float) messageObject.getDuration(); + openPhotoViewerForMessage(messageCell, messageObject); + startFromVideoTimestamp = -1; + } } else if (view instanceof ChatActionCell) { ChatActionCell cell = (ChatActionCell) view; messageObject = cell.getMessageObject(); @@ -11999,6 +12147,11 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } } else { if (selectedMessagesIds[0].size() + selectedMessagesIds[1].size() >= 100) { + AndroidUtilities.shakeView(selectedMessagesCountTextView, 2, 0); + Vibrator vibrator = (Vibrator) ApplicationLoader.applicationContext.getSystemService(Context.VIBRATOR_SERVICE); + if (vibrator != null) { + vibrator.vibrate(200); + } return; } selectedMessagesIds[index].put(messageObject.getId(), messageObject); @@ -15012,7 +15165,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } } else if (id == NotificationCenter.botInfoDidLoad) { int guid = (Integer) args[1]; - if (classGuid == guid) { + if (classGuid == guid || guid == 0) { TLRPC.BotInfo info = (TLRPC.BotInfo) args[0]; if (currentEncryptedChat == null) { if (!info.commands.isEmpty() && !ChatObject.isChannel(currentChat) && !isThreadChat()) { @@ -16163,7 +16316,9 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } if (obj.isOut() && waitingForSendingMessageLoad) { waitingForSendingMessageLoad = false; - chatActivityEnterView.hideTopView(true); + if (!animatingMessageObjects.contains(obj)) { + chatActivityEnterView.hideTopView(true); + } if (changeBoundAnimator != null) { changeBoundAnimator.start(); } @@ -20694,7 +20849,12 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } builder.setPositiveButton(LocaleController.getString("PinMessage", R.string.PinMessage), (dialogInterface, i) -> { getMessagesController().pinMessage(currentChat, currentUser, mid, false, !checks[1], checks[0]); - BulletinFactory.createPinMessageBulletin(this).show(); + Bulletin bulletin = BulletinFactory.createPinMessageBulletin(this); + bulletin.show(); + View view = bulletin.getLayout(); + view.postDelayed(() -> { + view.performHapticFeedback(HapticFeedbackConstants.KEYBOARD_TAP, HapticFeedbackConstants.FLAG_IGNORE_GLOBAL_SETTING); + }, 550); }); builder.setNegativeButton(LocaleController.getString("Cancel", R.string.Cancel), null); showDialog(builder.create()); @@ -22184,7 +22344,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } if (url instanceof URLSpanMono) { ((URLSpanMono) url).copyToClipboard(); - Toast.makeText(getParentActivity(), LocaleController.getString("TextCopied", R.string.TextCopied), Toast.LENGTH_SHORT).show(); + getUndoView().showWithAction(0, UndoView.ACTION_TEXT_COPIED, null); } else if (url instanceof URLSpanUserMention) { TLRPC.User user = getMessagesController().getUser(Utilities.parseInt(((URLSpanUserMention) url).getURL())); if (user != null) { @@ -22947,6 +23107,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not @Override public void didLongPress(ChatMessageCell cell, float x, float y) { createMenu(cell, false, false, x, y); + startMultiselect(chatListView.getChildAdapterPosition(cell)); } @Override @@ -23513,8 +23674,9 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } int index; if ((index = animatingMessageObjects.indexOf(message)) != -1) { - animatingMessageObjects.remove(index); + boolean applyAnimation = false; if (message.type == MessageObject.TYPE_ROUND_VIDEO && instantCameraView.getTextureView() != null) { + applyAnimation = true; messageCell.getViewTreeObserver().addOnPreDrawListener(new ViewTreeObserver.OnPreDrawListener() { @Override public boolean onPreDraw() { @@ -23601,6 +23763,7 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not } }); } else if (message.isAnyKindOfSticker()) { + applyAnimation = true; messageCell.getViewTreeObserver().addOnPreDrawListener(new ViewTreeObserver.OnPreDrawListener() { @Override public boolean onPreDraw() { @@ -23667,12 +23830,12 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not ObjectAnimator.ofFloat(sendAnimationData, param1, scale, 1.0f), ObjectAnimator.ofFloat(sendAnimationData, param3, sendAnimationData.y, position[1] + imageReceiver.getCenterY()) ); - animatorSet.setInterpolator(CubicBezierInterpolator.DEFAULT); + animatorSet.setInterpolator(ChatListItemAnimator.DEFAULT_INTERPOLATOR); ObjectAnimator o = ObjectAnimator.ofFloat(sendAnimationData, param2, sendAnimationData.x, position[0] + imageReceiver.getCenterX()); o.setInterpolator(CubicBezierInterpolator.EASE_OUT_QUINT); allAnimators.playTogether(o, animatorSet); - allAnimators.setDuration(250); + allAnimators.setDuration(ChatListItemAnimator.DEFAULT_DURATION); allAnimators.addListener(new AnimatorListenerAdapter() { @Override @@ -23714,22 +23877,11 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not return true; } }); - } else { - if (chatActivityEnterView.canShowVoiceMessageTransition()) { - messageCell.getViewTreeObserver().addOnPreDrawListener(new ViewTreeObserver.OnPreDrawListener() { - @Override - public boolean onPreDraw() { - messageCell.getViewTreeObserver().removeOnPreDrawListener(this); - if (!chatListView.fastScrollAnimationRunning && Math.abs(messageCell.getTranslationY()) < messageCell.getMeasuredHeight() * 3f) { - VoiceMessageEnterTransition transition = new VoiceMessageEnterTransition(contentView, messageCell, chatActivityEnterView, chatListView); - transition.start(); - } else { - chatActivityEnterView.startMessageTransition(); - } - return true; - } - }); - } + } + if (applyAnimation || chatListItemAnimator == null) { + animatingMessageObjects.remove(index); + chatActivityEnterView.startMessageTransition(); + chatActivityEnterView.hideTopView(true); } } if (fromUserBlocked) { @@ -24287,6 +24439,14 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not return str.startsWith("https://") || str.startsWith("vmess://") || str.startsWith("vmess1://") || str.startsWith("ss://") || str.startsWith("ssr://") || str.startsWith("ws://") || str.startsWith("wss://") || str.startsWith("@") || str.startsWith("#") || str.startsWith("$"); } + public SimpleTextView getReplyNameTextView() { + return replyNameTextView; + } + + public SimpleTextView getReplyObjectTextView() { + return replyObjectTextView; + } + @Override public ArrayList getThemeDescriptions() { ThemeDescription.ThemeDescriptionDelegate selectedBackgroundDelegate = () -> { @@ -25011,4 +25171,8 @@ public class ChatActivity extends BaseFragment implements NotificationCenter.Not //currentChat是群组 return currentChat != null && currentChat.megagroup && chatInfo != null && chatInfo.linked_chat_id != 0; } + + public ChatAvatarContainer getAvatarContainer() { + return avatarContainer; + } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ChatEditActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ChatEditActivity.java index bcfe4f2d5..5a20876c6 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ChatEditActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ChatEditActivity.java @@ -66,6 +66,7 @@ import org.telegram.ui.Components.EditTextBoldCursor; import org.telegram.ui.Components.EditTextEmoji; import org.telegram.ui.Components.ImageUpdater; import org.telegram.ui.Components.LayoutHelper; +import org.telegram.ui.Components.RLottieDrawable; import org.telegram.ui.Components.RadialProgressView; import org.telegram.ui.Components.SizeNotifierFrameLayout; import org.telegram.ui.Components.UndoView; @@ -585,12 +586,22 @@ public class ChatEditActivity extends BaseFragment implements ImageUpdater.Image }; setAvatarCell.setBackgroundDrawable(Theme.getSelectorDrawable(false)); setAvatarCell.setColors(Theme.key_windowBackgroundWhiteBlueIcon, Theme.key_windowBackgroundWhiteBlueButton); - setAvatarCell.setOnClickListener(v -> imageUpdater.openMenu(avatar != null, () -> { - avatar = null; - MessagesController.getInstance(currentAccount).changeChatAvatar(chatId, null, null, null, 0, null, null, null, null); - showAvatarProgress(false, true); - avatarImage.setImage(null, null, avatarDrawable, currentChat); - }, null)); + setAvatarCell.setOnClickListener(v -> { + imageUpdater.openMenu(avatar != null, () -> { + avatar = null; + MessagesController.getInstance(currentAccount).changeChatAvatar(chatId, null, null, null, 0, null, null, null, null); + showAvatarProgress(false, true); + avatarImage.setImage(null, null, avatarDrawable, currentChat); + cameraDrawable.setCurrentFrame(0); + setAvatarCell.imageView.playAnimation(); + }, dialogInterface -> { + cameraDrawable.setCustomEndFrame(86); + setAvatarCell.imageView.playAnimation(); + }); + cameraDrawable.setCurrentFrame(0); + cameraDrawable.setCustomEndFrame(43); + setAvatarCell.imageView.playAnimation(); + }); settingsContainer.addView(setAvatarCell, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT)); } @@ -928,6 +939,8 @@ public class ChatEditActivity extends BaseFragment implements ImageUpdater.Image return fragmentView; } + RLottieDrawable cameraDrawable; + private void setAvatar() { if (avatarImage == null) { return; @@ -953,6 +966,12 @@ public class ChatEditActivity extends BaseFragment implements ImageUpdater.Image } else { setAvatarCell.setTextAndIcon(LocaleController.getString("ChatSetPhotoOrVideo", R.string.ChatSetPhotoOrVideo), R.drawable.baseline_image_24, true); } + if (cameraDrawable == null) { + cameraDrawable = new RLottieDrawable(R.raw.camera_outline, "" + R.raw.camera_outline, AndroidUtilities.dp(50), AndroidUtilities.dp(50), false, null); + } + setAvatarCell.imageView.setTranslationY(-AndroidUtilities.dp(9)); + setAvatarCell.imageView.setTranslationX(-AndroidUtilities.dp(8)); + setAvatarCell.imageView.setAnimation(cameraDrawable); } if (PhotoViewer.hasInstance() && PhotoViewer.getInstance().isVisible()) { PhotoViewer.getInstance().checkCurrentImageVisibility(); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatActivityEnterView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatActivityEnterView.java index 79b4f0032..f3e106631 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatActivityEnterView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatActivityEnterView.java @@ -39,6 +39,7 @@ import android.graphics.RectF; import android.graphics.drawable.Drawable; import android.graphics.drawable.TransitionDrawable; import android.media.AudioManager; +import android.net.Uri; import android.os.Build; import android.os.Bundle; import android.os.PowerManager; @@ -73,6 +74,7 @@ import android.view.accessibility.AccessibilityNodeInfo; import android.view.animation.DecelerateInterpolator; import android.view.inputmethod.EditorInfo; import android.view.inputmethod.InputConnection; +import android.webkit.MimeTypeMap; import android.widget.EditText; import android.widget.FrameLayout; import android.widget.ImageView; @@ -92,8 +94,11 @@ import androidx.core.view.inputmethod.EditorInfoCompat; import androidx.core.view.inputmethod.InputConnectionCompat; import androidx.core.view.inputmethod.InputContentInfoCompat; import androidx.customview.widget.ExploreByTouchHelper; +import androidx.recyclerview.widget.ChatListItemAnimator; import androidx.recyclerview.widget.LinearLayoutManager; +import com.google.android.exoplayer2.util.Log; + import org.jetbrains.annotations.NotNull; import org.openintents.openpgp.OpenPgpError; import org.openintents.openpgp.util.OpenPgpApi; @@ -115,6 +120,7 @@ import org.telegram.messenger.SendMessagesHelper; import org.telegram.messenger.SharedConfig; import org.telegram.messenger.UserConfig; import org.telegram.messenger.UserObject; +import org.telegram.messenger.Utilities; import org.telegram.messenger.VideoEditedInfo; import org.telegram.messenger.camera.CameraController; import org.telegram.tgnet.ConnectionsManager; @@ -131,11 +137,14 @@ import org.telegram.ui.ChatActivity; import org.telegram.ui.DialogsActivity; import org.telegram.ui.GroupStickersActivity; import org.telegram.ui.LaunchActivity; +import org.telegram.ui.PhotoViewer; import org.telegram.ui.StickersActivity; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileOutputStream; +import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -160,6 +169,8 @@ import tw.nekomimi.nekogram.utils.UIUtil; public class ChatActivityEnterView extends FrameLayout implements NotificationCenter.NotificationCenterDelegate, SizeNotifierFrameLayout.SizeNotifierFrameLayoutDelegate, StickersAlert.StickersAlertDelegate { + boolean textTransitionIsRunning; + public interface ChatActivityEnterViewDelegate { default void beforeMessageSend(CharSequence message, boolean notify, int scheduleDate) { } @@ -236,6 +247,10 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe default void toggleDisableLinkPreview() { } + + default boolean hasForwardingMessages() { + return false; + } } private final static int RECORD_STATE_ENTER = 0; @@ -377,7 +392,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe private LinearLayout attachLayout; private ImageView attachButton; private ImageView botButton; - private LinearLayout textFieldContainer; + private FrameLayout textFieldContainer; private FrameLayout sendButtonContainer; private FrameLayout doneButtonContainer; private ImageView doneButtonImage; @@ -839,6 +854,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe public boolean voiceEnterTransitionInProgress; public boolean skipDraw; + private int lastSize; public RecordCircle(Context context) { super(context); @@ -1001,18 +1017,22 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe @SuppressLint("DrawAllocation") @Override protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { + int currentSize = MeasureSpec.getSize(widthMeasureSpec); int h = AndroidUtilities.dp(194); - tooltipLayout = new StaticLayout(tooltipMessage, tooltipPaint, AndroidUtilities.dp(220), Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, true); - int n = tooltipLayout.getLineCount(); - tooltipWidth = 0; - for (int i = 0; i < n; i++) { - float w = tooltipLayout.getLineWidth(i); - if (w > tooltipWidth) { - tooltipWidth = w; + if (lastSize != currentSize) { + lastSize = currentSize; + tooltipLayout = new StaticLayout(tooltipMessage, tooltipPaint, AndroidUtilities.dp(220), Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, true); + int n = tooltipLayout.getLineCount(); + tooltipWidth = 0; + for (int i = 0; i < n; i++) { + float w = tooltipLayout.getLineWidth(i); + if (w > tooltipWidth) { + tooltipWidth = w; + } + } + if (tooltipLayout.getLineCount() > 1) { + h += tooltipLayout.getHeight() - tooltipLayout.getLineBottom(0); } - } - if (tooltipLayout.getLineCount() > 1) { - h += tooltipLayout.getHeight() - tooltipLayout.getLineBottom(0); } super.onMeasure(widthMeasureSpec, MeasureSpec.makeMeasureSpec(h, MeasureSpec.EXACTLY)); @@ -1681,8 +1701,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe sendByEnter = preferences.getBoolean("send_by_enter", false); configAnimationsEnabled = preferences.getBoolean("view_animations", true); - textFieldContainer = new LinearLayout(context); - textFieldContainer.setOrientation(LinearLayout.HORIZONTAL); + textFieldContainer = new FrameLayout(context); textFieldContainer.setClipChildren(false); textFieldContainer.setClipToPadding(false); textFieldContainer.setPadding(0, AndroidUtilities.dp(1), 0, 0); @@ -1722,7 +1741,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe } }; frameLayout.setClipChildren(false); - textFieldContainer.addView(frameLayout, LayoutHelper.createLinear(0, LayoutHelper.WRAP_CONTENT, 1.0f, Gravity.BOTTOM)); + textFieldContainer.addView(frameLayout, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT, Gravity.BOTTOM, 0, 0, 48, 0)); for (int a = 0; a < 2; a++) { emojiButton[a] = new ImageView(context) { @@ -1915,35 +1934,67 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe ClipboardManager clipboard = (ClipboardManager) getContext().getSystemService(Context.CLIPBOARD_SERVICE); ClipData clipData = clipboard.getPrimaryClip(); if (clipData != null) { - ClipDescription description = clipData.getDescription(); if (clipData.getItemCount() == 1 && clipData.getDescription().hasMimeType("image/*")) { -// final File cameraFile = AndroidUtilities.generatePicturePath(fragment.isSecretChat(), null); -// try { -// InputStream fis = context.getContentResolver().openInputStream(clipData.getItemAt(0).getUri()); -// FileOutputStream fos = new FileOutputStream(cameraFile); -// FileUtils.copy(fis, fos); -// fis.close(); -// fos.close(); -// MediaController.PhotoEntry photoEntry = new MediaController.PhotoEntry(0, -1, 0, cameraFile.getAbsolutePath(), 0, false, 0, 0, 0); -// ArrayList entries = new ArrayList<>(); -// entries.add(photoEntry); -// PhotoViewer.getInstance().setParentActivity(parentActivity); -// PhotoViewer.getInstance().openPhotoForSelect(entries, 0, 2, false, new PhotoViewer.EmptyPhotoViewerProvider() { -// @Override -// public void sendButtonPressed(int index, VideoEditedInfo videoEditedInfo, boolean notify, int scheduleDate, boolean forceDocument) { -// super.sendButtonPressed(index, videoEditedInfo, notify, scheduleDate, forceDocument); -// } -// }, parentFragment); -// } catch (FileNotFoundException e) { -// e.printStackTrace(); -// } catch (IOException e) { -// e.printStackTrace(); -// } - if (description.hasMimeType("image/gif")) { - SendMessagesHelper.prepareSendingDocument(accountInstance, null, null, clipData.getItemAt(0).getUri(), null, "image/gif", dialog_id, replyingMessageObject, getThreadMessage(), null, null, true, 0); - } else { - SendMessagesHelper.prepareSendingPhoto(accountInstance, null, clipData.getItemAt(0).getUri(), dialog_id, replyingMessageObject, getThreadMessage(), null, null, null, null, 0, null, true, 0); - } + final File file = AndroidUtilities.generatePicturePath(fragment.isSecretChat(), MimeTypeMap.getSingleton().getExtensionFromMimeType(clipData.getDescription().getMimeType(0))); + Uri uri = clipData.getItemAt(0).getUri(); + Utilities.globalQueue.postRunnable(() -> { + try { + InputStream in = context.getContentResolver().openInputStream(uri); + FileOutputStream fos = new FileOutputStream(file); + byte[] buffer = new byte[1024]; + int lengthRead; + while ((lengthRead = in.read(buffer)) > 0) { + fos.write(buffer, 0, lengthRead); + fos.flush(); + } + in.close(); + fos.close(); + MediaController.PhotoEntry photoEntry = new MediaController.PhotoEntry(0, -1, 0, file.getAbsolutePath(), 0, false, 0, 0, 0); + ArrayList entries = new ArrayList<>(); + entries.add(photoEntry); + AndroidUtilities.runOnUIThread(() -> { + PhotoViewer.getInstance().setParentActivity(parentActivity); + PhotoViewer.getInstance().openPhotoForSelect(entries, 0, 2, false, new PhotoViewer.EmptyPhotoViewerProvider() { + boolean sending; + @Override + public void sendButtonPressed(int index, VideoEditedInfo videoEditedInfo, boolean notify, int scheduleDate, boolean forceDocument) { + ArrayList photos = new ArrayList<>(); + SendMessagesHelper.SendingMediaInfo info = new SendMessagesHelper.SendingMediaInfo(); + if (!photoEntry.isVideo && photoEntry.imagePath != null) { + info.path = photoEntry.imagePath; + } else if (photoEntry.path != null) { + info.path = photoEntry.path; + } + info.thumbPath = photoEntry.thumbPath; + info.isVideo = photoEntry.isVideo; + info.caption = photoEntry.caption != null ? photoEntry.caption.toString() : null; + info.entities = photoEntry.entities; + info.masks = photoEntry.stickers; + info.ttl = photoEntry.ttl; + info.videoEditedInfo = videoEditedInfo; + info.canDeleteAfter = true; + photos.add(info); + photoEntry.reset(); + sending = true; + SendMessagesHelper.prepareSendingMedia(accountInstance, photos, dialog_id, replyingMessageObject, getThreadMessage(), null, false, false, editingMessageObject, notify, scheduleDate); + } + + @Override + public void willHidePhotoViewer() { + if (!sending) { + try { + file.delete(); + } catch (Throwable ignore) { + + } + } + } + }, parentFragment); + }); + } catch (Throwable e) { + e.printStackTrace(); + } + }); } } return super.onTextContextMenuItem(id); @@ -1974,7 +2025,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe } }); - + messageEditText.setIncludeFontPadding(false); messageEditText.setWindowView(parentActivity.getWindow().getDecorView()); TLRPC.EncryptedChat encryptedChat = parentFragment != null ? parentFragment.getCurrentEncryptedChat() : null; messageEditText.setAllowTextEntitiesIntersection(supportsSendingNewEntities()); @@ -2057,13 +2108,24 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe private boolean processChange; private boolean nextChangeIsSend; + private CharSequence prevText; + private boolean ignorePrevTextChange; @Override public void beforeTextChanged(CharSequence charSequence, int i, int i2, int i3) { + if (ignorePrevTextChange) { + return; + } + if (recordingAudioVideo) { + prevText = charSequence.toString(); + } } @Override public void onTextChanged(CharSequence charSequence, int start, int before, int count) { + if (ignorePrevTextChange) { + return; + } if (lineCount != messageEditText.getLineCount()) { if (!isInitLineCount && messageEditText.getMeasuredWidth() > 0) { onLineCountChanged(lineCount, messageEditText.getLineCount()); @@ -2101,6 +2163,16 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe @Override public void afterTextChanged(Editable editable) { + if (ignorePrevTextChange) { + return; + } + if (prevText != null) { + ignorePrevTextChange = true; + editable.replace(0, editable.length(), prevText); + prevText = null; + ignorePrevTextChange = false; + return; + } if (innerTextChange == 0) { if (nextChangeIsSend) { sendMessage(); @@ -2265,7 +2337,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe } }); botCommandsMenuContainer.setClipToPadding(false); - sizeNotifierLayout.addView(botCommandsMenuContainer, 14, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT, Gravity.BOTTOM, 0, 0, 0, 47)); + sizeNotifierLayout.addView(botCommandsMenuContainer, 14, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT, Gravity.BOTTOM)); botCommandsMenuContainer.setVisibility(View.GONE); @@ -2497,10 +2569,18 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe recordTimerView = new TimerView(context); recordTimeContainer.addView(recordTimerView, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT, Gravity.CENTER_VERTICAL, 6, 0, 0, 0)); - sendButtonContainer = new FrameLayout(context); + sendButtonContainer = new FrameLayout(context) { + @Override + protected boolean drawChild(Canvas canvas, View child, long drawingTime) { + if (child == sendButton && textTransitionIsRunning) { + return true; + } + return super.drawChild(canvas, child, drawingTime); + } + }; sendButtonContainer.setClipChildren(false); sendButtonContainer.setClipToPadding(false); - textFieldContainer.addView(sendButtonContainer, LayoutHelper.createLinear(48, 48, Gravity.BOTTOM)); + textFieldContainer.addView(sendButtonContainer, LayoutHelper.createFrame(48, 48, Gravity.BOTTOM | Gravity.RIGHT)); audioVideoButtonContainer = new FrameLayout(context); audioVideoButtonContainer.setSoundEffectsEnabled(false); @@ -2543,11 +2623,11 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe delegate.needStartRecordAudio(0); } recordingAudioVideo = false; - AndroidUtilities.runOnUIThread(moveToSendStateRunnable = () -> { - moveToSendStateRunnable = null; messageTransitionIsRunning = false; - updateRecordIntefrace(RECORD_STATE_SENDING); - }, 200); + AndroidUtilities.runOnUIThread(moveToSendStateRunnable = () -> { + moveToSendStateRunnable = null; + updateRecordIntefrace(RECORD_STATE_SENDING); + }, 200); } return false; } @@ -2632,11 +2712,11 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe } if (!NekoConfig.confirmAVMessage) { recordingAudioVideo = false; - AndroidUtilities.runOnUIThread(moveToSendStateRunnable = () -> { - moveToSendStateRunnable = null; - messageTransitionIsRunning = false; - updateRecordIntefrace(RECORD_STATE_SENDING); - }, 500); + messageTransitionIsRunning = false; + AndroidUtilities.runOnUIThread(moveToSendStateRunnable = () -> { + moveToSendStateRunnable = null; + updateRecordIntefrace(RECORD_STATE_SENDING); + }, 500); } } } @@ -2953,7 +3033,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe doneButtonContainer = new FrameLayout(context); doneButtonContainer.setVisibility(GONE); - textFieldContainer.addView(doneButtonContainer, LayoutHelper.createLinear(48, 48, Gravity.BOTTOM)); + textFieldContainer.addView(doneButtonContainer, LayoutHelper.createFrame(48, 48, Gravity.BOTTOM | Gravity.RIGHT)); doneButtonContainer.setOnClickListener(view -> doneEditingMessage()); Drawable doneCircleDrawable = Theme.createCircleDrawable(AndroidUtilities.dp(16), Theme.getColor(Theme.key_chat_messagePanelSend)); @@ -3936,8 +4016,8 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe NotificationCenter.getInstance(currentAccount).onAnimationFinish(notificationsIndex); } }); - currentTopViewAnimation.setDuration(250); - currentTopViewAnimation.setInterpolator(CubicBezierInterpolator.DEFAULT); + currentTopViewAnimation.setDuration(ChatListItemAnimator.DEFAULT_DURATION + 20); + currentTopViewAnimation.setInterpolator(ChatListItemAnimator.DEFAULT_INTERPOLATOR); currentTopViewAnimation.start(); notificationsIndex = NotificationCenter.getInstance(currentAccount).setAnimationInProgress(notificationsIndex, null); } else { @@ -4067,9 +4147,8 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe } } }); - currentTopViewAnimation.setDuration(220); - currentTopViewAnimation.setStartDelay(50); - currentTopViewAnimation.setInterpolator(CubicBezierInterpolator.DEFAULT); + currentTopViewAnimation.setDuration(ChatListItemAnimator.DEFAULT_DURATION); + currentTopViewAnimation.setInterpolator(ChatListItemAnimator.DEFAULT_INTERPOLATOR); currentTopViewAnimation.start(); } else { topViewEnterProgress = 0f; @@ -4756,17 +4835,27 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe } } } - if (StrUtil.isNotBlank(message)) { - if (delegate != null) { - delegate.beforeMessageSend(message, notify, scheduleDate); - } - if (processSendingText(message, notify, scheduleDate)) { + if (processSendingText(message, notify, scheduleDate)) { + if (delegate.hasForwardingMessages()) { messageEditText.setText(""); - lastTypingTimeSend = 0; if (delegate != null) { delegate.onMessageSend(message, notify, scheduleDate); } + } else { + if (delegate != null) { + delegate.beforeMessageSend(message, notify, scheduleDate); + } + messageTransitionIsRunning = false; + AndroidUtilities.runOnUIThread(moveToSendStateRunnable = () -> { + moveToSendStateRunnable = null; + hideTopView(true); + messageEditText.setText(""); + if (delegate != null) { + delegate.onMessageSend(message, notify, scheduleDate); + } + }, 200); } + lastTypingTimeSend = 0; } else if (forceShowSendButton) { if (delegate != null) { delegate.beforeMessageSend(null, notify, scheduleDate); @@ -4845,16 +4934,16 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe } CharSequence[] message = new CharSequence[]{AndroidUtilities.getTrimmedString(text.subSequence(start, end))}; ArrayList entities = MediaDataController.getInstance(currentAccount).getEntities(message, supportsNewEntities); - MessageObject.SendAnimationData sendAnimationData; - if (message[0].length() < 20) { + MessageObject.SendAnimationData sendAnimationData = null; + + if (!delegate.hasForwardingMessages()) { sendAnimationData = new MessageObject.SendAnimationData(); sendAnimationData.width = sendAnimationData.height = AndroidUtilities.dp(22); messageEditText.getLocationInWindow(location); sendAnimationData.x = location[0] + AndroidUtilities.dp(11); sendAnimationData.y = location[1] + AndroidUtilities.dp(8 + 11); - } else { - sendAnimationData = null; } + SendMessagesHelper.getInstance(currentAccount).sendMessage(message[0].toString(), dialog_id, replyingMessageObject, getThreadMessage(), messageWebPage, messageWebPageSearch, entities, null, null, notify, scheduleDate, sendAnimationData); start = end + 1; } while (end != text.length()); @@ -5643,13 +5732,13 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe public void startMessageTransition() { if (moveToSendStateRunnable != null) { AndroidUtilities.cancelRunOnUIThread(moveToSendStateRunnable); + messageTransitionIsRunning = true; + moveToSendStateRunnable.run(); moveToSendStateRunnable = null; } - messageTransitionIsRunning = true; - updateRecordIntefrace(RECORD_STATE_SENDING); } - public boolean canShowVoiceMessageTransition() { + public boolean canShowMessageTransition() { return moveToSendStateRunnable != null; } @@ -8550,6 +8639,7 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe private Rect cancelRect = new Rect(); Drawable selectableBackground; + private int lastSize; @Override public boolean onTouchEvent(MotionEvent event) { @@ -8653,24 +8743,28 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe @Override protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { super.onMeasure(widthMeasureSpec, heightMeasureSpec); - slideToCancelWidth = grayPaint.measureText(slideToCancelString); - cancelWidth = bluePaint.measureText(cancelString); - lastUpdateTime = System.currentTimeMillis(); + int currentSize = getMeasuredHeight() + (getMeasuredWidth() << 16); + if (lastSize != currentSize) { + lastSize = currentSize; + slideToCancelWidth = grayPaint.measureText(slideToCancelString); + cancelWidth = bluePaint.measureText(cancelString); + lastUpdateTime = System.currentTimeMillis(); - int heightHalf = getMeasuredHeight() >> 1; - arrowPath.reset(); - if (smallSize) { - arrowPath.setLastPoint(AndroidUtilities.dpf2(2.5f), heightHalf - AndroidUtilities.dpf2(3.12f)); - arrowPath.lineTo(0, heightHalf); - arrowPath.lineTo(AndroidUtilities.dpf2(2.5f), heightHalf + AndroidUtilities.dpf2(3.12f)); - } else { - arrowPath.setLastPoint(AndroidUtilities.dpf2(4f), heightHalf - AndroidUtilities.dpf2(5f)); - arrowPath.lineTo(0, heightHalf); - arrowPath.lineTo(AndroidUtilities.dpf2(4f), heightHalf + AndroidUtilities.dpf2(5f)); + int heightHalf = getMeasuredHeight() >> 1; + arrowPath.reset(); + if (smallSize) { + arrowPath.setLastPoint(AndroidUtilities.dpf2(2.5f), heightHalf - AndroidUtilities.dpf2(3.12f)); + arrowPath.lineTo(0, heightHalf); + arrowPath.lineTo(AndroidUtilities.dpf2(2.5f), heightHalf + AndroidUtilities.dpf2(3.12f)); + } else { + arrowPath.setLastPoint(AndroidUtilities.dpf2(4f), heightHalf - AndroidUtilities.dpf2(5f)); + arrowPath.lineTo(0, heightHalf); + arrowPath.lineTo(AndroidUtilities.dpf2(4f), heightHalf + AndroidUtilities.dpf2(5f)); + } + + slideToLayout = new StaticLayout(slideToCancelString, grayPaint, (int) slideToCancelWidth, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, false); + cancelLayout = new StaticLayout(cancelString, bluePaint, (int) cancelWidth, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, false); } - - slideToLayout = new StaticLayout(slideToCancelString, grayPaint, (int) slideToCancelWidth, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, false); - cancelLayout = new StaticLayout(cancelString, bluePaint, (int) cancelWidth, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, false); } @Override @@ -9017,9 +9111,9 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe if (botCommandsMenuContainer != null) { int padding; if (botCommandsAdapter.getItemCount() > 4) { - padding = Math.max(0, sizeNotifierLayout.getMeasuredHeight() - AndroidUtilities.dp(58 + 36 * 4.3f)); + padding = Math.max(0, sizeNotifierLayout.getMeasuredHeight() - AndroidUtilities.dp(8 + 36 * 4.3f)); } else { - padding = Math.max(0, sizeNotifierLayout.getMeasuredHeight() - AndroidUtilities.dp(58 + 36 * Math.max(1, Math.min(4, botCommandsAdapter.getItemCount())))); + padding = Math.max(0, sizeNotifierLayout.getMeasuredHeight() - AndroidUtilities.dp(8 + 36 * Math.max(1, Math.min(4, botCommandsAdapter.getItemCount())))); } if (botCommandsMenuContainer.listView.getPaddingTop() != padding) { @@ -9073,4 +9167,16 @@ public class ChatActivityEnterView extends FrameLayout implements NotificationCe botCommandsMenuButton.setOpened(false); botCommandsMenuContainer.dismiss(); } + + public void setTextTransitionIsRunning(boolean b) { + textTransitionIsRunning = b; + sendButtonContainer.invalidate(); + } + + public float getTopViewHeight() { + if (topView != null && topView.getVisibility() == View.VISIBLE) { + return topView.getLayoutParams().height; + } + return 0; + } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlert.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlert.java index 559732d0b..e8384a366 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlert.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlert.java @@ -1799,6 +1799,12 @@ public class ChatAttachAlert extends BottomSheet implements NotificationCenter.N calcMandatoryInsets = chatActivity.isKeyboardVisible(); } openTransitionFinished = false; + if (Build.VERSION.SDK_INT >= 30) { + int color = Theme.getColor(Theme.key_windowBackgroundGray); + if (AndroidUtilities.computePerceivedBrightness(color) < 0.721) { + getWindow().setNavigationBarColor(color); + } + } } public void setEditingMessageObject(MessageObject messageObject) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlertPhotoLayout.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlertPhotoLayout.java index 6b2e6d293..4aebc4a81 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlertPhotoLayout.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/ChatAttachAlertPhotoLayout.java @@ -1629,6 +1629,7 @@ public class ChatAttachAlertPhotoLayout extends ChatAttachAlert.AttachAlertLayou animateCameraValues[2] = itemSize; additionCloseCameraY = 0; cameraExpanded = true; + cameraView.setFpsLimit(-1); if (animated) { setCameraOpenProgress(0); cameraAnimationInProgress = true; @@ -1720,15 +1721,16 @@ public class ChatAttachAlertPhotoLayout extends ChatAttachAlert.AttachAlertLayou } else { AndroidUtilities.rectTmp.set(0 , 0, getMeasuredWidth(), getMeasuredHeight()); } + canvas.save(); canvas.clipRect(AndroidUtilities.rectTmp); super.dispatchDraw(canvas); canvas.restore(); } - } }; cameraView.setRecordFile(AndroidUtilities.generateVideoPath(parentAlert.baseFragment instanceof ChatActivity && ((ChatActivity) parentAlert.baseFragment).isSecretChat())); cameraView.setFocusable(true); + cameraView.setFpsLimit(30); if (Build.VERSION.SDK_INT >= 21) { Path path = new Path(); float[] radii = new float[8]; @@ -2062,6 +2064,7 @@ public class ChatAttachAlertPhotoLayout extends ChatAttachAlert.AttachAlertLayou cameraView.invalidate(); } cameraOpened = false; + if (cameraPanel != null) { cameraPanel.setVisibility(View.GONE); } @@ -2072,8 +2075,11 @@ public class ChatAttachAlertPhotoLayout extends ChatAttachAlert.AttachAlertLayou if (cameraPhotoRecyclerView != null) { cameraPhotoRecyclerView.setVisibility(View.GONE); } - if (Build.VERSION.SDK_INT >= 21 && cameraView != null) { - cameraView.setSystemUiVisibility(View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN); + if (cameraView != null) { + cameraView.setFpsLimit(30); + if (Build.VERSION.SDK_INT >= 21) { + cameraView.setSystemUiVisibility(View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN); + } } } }); @@ -2098,6 +2104,7 @@ public class ChatAttachAlertPhotoLayout extends ChatAttachAlert.AttachAlertLayou } } cameraOpened = false; + cameraView.setFpsLimit(30); if (Build.VERSION.SDK_INT >= 21) { cameraView.setSystemUiVisibility(View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/ColorPicker.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/ColorPicker.java index eb5b384eb..147b6d9ca 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/ColorPicker.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/ColorPicker.java @@ -421,6 +421,9 @@ public class ColorPicker extends FrameLayout { if (radioButton[1].getColor() == 0) { radioButton[1].setColor(generateGradientColors(radioButton[0].getColor())); } + if (myMessagesColor) { + delegate.setColor(radioButton[0].getColor(), 0, true); + } delegate.setColor(radioButton[1].getColor(), 1, true); colorsCount = 2; clearButton.setVisibility(VISIBLE); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/EditTextBoldCursor.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/EditTextBoldCursor.java index 2632da403..064596f80 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/EditTextBoldCursor.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/EditTextBoldCursor.java @@ -23,6 +23,7 @@ import android.graphics.drawable.ShapeDrawable; import android.graphics.drawable.shapes.RectShape; import android.os.Build; import android.os.SystemClock; + import androidx.annotation.Keep; import androidx.annotation.Nullable; import androidx.core.view.accessibility.AccessibilityNodeInfoCompat; @@ -42,6 +43,7 @@ import android.widget.EditText; import android.widget.TextView; import org.telegram.messenger.AndroidUtilities; +import org.telegram.messenger.BuildVars; import org.telegram.messenger.FileLog; import org.telegram.messenger.LocaleController; import org.telegram.messenger.R; @@ -120,6 +122,12 @@ public class EditTextBoldCursor extends EditText { private ViewTreeObserver.OnPreDrawListener floatingToolbarPreDrawListener; private View windowView; private View attachedToWindow; + private int lastSize; + int lastOffset = -1; + CharSequence lastText; + + boolean drawInMaim; + ShapeDrawable cursorDrawable; @TargetApi(23) private class ActionModeCallback2Wrapper extends ActionMode.Callback2 { @@ -168,6 +176,9 @@ public class EditTextBoldCursor extends EditText { @Nullable @Override public Drawable getTextCursorDrawable() { + if (cursorDrawable != null) { + return super.getTextCursorDrawable(); + } ShapeDrawable shapeDrawable = new ShapeDrawable(new RectShape()) { @Override public void draw(Canvas canvas) { @@ -194,6 +205,35 @@ public class EditTextBoldCursor extends EditText { setImportantForAutofill(View.IMPORTANT_FOR_AUTOFILL_NO); } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) { + cursorDrawable = new ShapeDrawable() { + + @Override + public void draw(Canvas canvas) { + if (drawInMaim) { + cursorDrawn = true; + } else { + super.draw(canvas); + } + } + + @Override + public int getIntrinsicHeight() { + return AndroidUtilities.dp(cursorSize + 20); + } + + @Override + public int getIntrinsicWidth() { + return AndroidUtilities.dp(cursorWidth); + } + }; + cursorDrawable.setShape(new RectShape()); + gradientDrawable = new GradientDrawable(GradientDrawable.Orientation.TOP_BOTTOM, new int[]{0xff54a1db, 0xff54a1db}); + + setTextCursorDrawable(cursorDrawable); + } + + try { if (!mScrollYGet && mScrollYField == null) { mScrollYGet = true; @@ -220,25 +260,27 @@ public class EditTextBoldCursor extends EditText { } catch (Throwable e) { FileLog.e(e); } - try { - gradientDrawable = new GradientDrawable(GradientDrawable.Orientation.TOP_BOTTOM, new int[] {0xff54a1db, 0xff54a1db}); - if (Build.VERSION.SDK_INT >= 29) { - setTextCursorDrawable(gradientDrawable); - } - editor = mEditor.get(this); - } catch (Throwable ignore) { + if (cursorDrawable == null) { + try { + gradientDrawable = new GradientDrawable(GradientDrawable.Orientation.TOP_BOTTOM, new int[]{0xff54a1db, 0xff54a1db}); + if (Build.VERSION.SDK_INT >= 29) { + setTextCursorDrawable(gradientDrawable); + } + editor = mEditor.get(this); + } catch (Throwable ignore) { - } - try { - if (mCursorDrawableResField == null) { - mCursorDrawableResField = TextView.class.getDeclaredField("mCursorDrawableRes"); - mCursorDrawableResField.setAccessible(true); } - if (mCursorDrawableResField != null) { - mCursorDrawableResField.set(this, R.drawable.field_carret_empty); - } - } catch (Throwable ignore) { + try { + if (mCursorDrawableResField == null) { + mCursorDrawableResField = TextView.class.getDeclaredField("mCursorDrawableRes"); + mCursorDrawableResField.setAccessible(true); + } + if (mCursorDrawableResField != null) { + mCursorDrawableResField.set(this, R.drawable.field_carret_empty); + } + } catch (Throwable ignore) { + } } cursorSize = AndroidUtilities.dp(24); } @@ -289,7 +331,12 @@ public class EditTextBoldCursor extends EditText { } public void setCursorColor(int color) { - gradientDrawable.setColor(color); + if (cursorDrawable != null) { + cursorDrawable.getPaint().setColor(color); + } + if (gradientDrawable != null) { + gradientDrawable.setColor(color); + } invalidate(); } @@ -342,11 +389,6 @@ public class EditTextBoldCursor extends EditText { requestLayout(); } - @Override - public boolean requestFocus(int direction, Rect previouslyFocusedRect) { - return super.requestFocus(direction, previouslyFocusedRect); - } - public boolean hasErrorText() { return !TextUtils.isEmpty(errorText); } @@ -385,10 +427,14 @@ public class EditTextBoldCursor extends EditText { @Override protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { super.onMeasure(widthMeasureSpec, heightMeasureSpec); + int currentSize = getMeasuredHeight() + (getMeasuredWidth() << 16); if (hintLayout != null) { - setHintText(hint); + if (lastSize != currentSize) { + setHintText(hint); + } lineY = (getMeasuredHeight() - hintLayout.getHeight()) / 2.0f + hintLayout.getHeight() + AndroidUtilities.dp(6); } + lastSize = currentSize; } public void setHintText(CharSequence text) { @@ -494,35 +540,6 @@ public class EditTextBoldCursor extends EditText { @Override protected void onDraw(Canvas canvas) { - int topPadding = getExtendedPaddingTop(); - scrollY = Integer.MAX_VALUE; - try { - if (mScrollYField != null) { - scrollY = mScrollYField.getInt(this); - mScrollYField.set(this, 0); - } else { - scrollY = getScrollX(); - } - } catch (Exception e) { - // - } - ignoreTopCount = 1; - ignoreBottomCount = 1; - canvas.save(); - canvas.translate(0, topPadding); - try { - super.onDraw(canvas); - } catch (Exception e) { - // - } - if (scrollY != Integer.MAX_VALUE) { - try { - mScrollYField.set(this, scrollY); - } catch (Exception e) { - // - } - } - canvas.restore(); if ((length() == 0 || transformHintToHeader) && hintLayout != null && (hintVisible || hintAlpha != 0)) { if (hintVisible && hintAlpha != 1.0f || !hintVisible && hintAlpha != 0.0f) { long newTime = System.currentTimeMillis(); @@ -594,48 +611,129 @@ public class EditTextBoldCursor extends EditText { getPaint().setColor(oldColor); canvas.restore(); } - try { - boolean showCursor; - if (mShowCursorField != null) { - long mShowCursor = mShowCursorField.getLong(editor); - showCursor = (SystemClock.uptimeMillis() - mShowCursor) % (2 * 500) < 500 && isFocused(); - } else { - showCursor = cursorDrawn; - cursorDrawn = false; - } - if (allowDrawCursor && showCursor) { - canvas.save(); - int voffsetCursor = 0; - if (getVerticalOffsetMethod != null) { - if ((getGravity() & Gravity.VERTICAL_GRAVITY_MASK) != Gravity.TOP) { - voffsetCursor = (int) getVerticalOffsetMethod.invoke(this, true); - } - } else { - if ((getGravity() & Gravity.VERTICAL_GRAVITY_MASK) != Gravity.TOP) { - voffsetCursor = getTotalPaddingTop() - getExtendedPaddingTop(); - } - } - canvas.translate(getPaddingLeft(), getExtendedPaddingTop() + voffsetCursor); - Layout layout = getLayout(); - int line = layout.getLineForOffset(getSelectionStart()); - int lineCount = layout.getLineCount(); - updateCursorPosition(); - Rect bounds = gradientDrawable.getBounds(); - rect.left = bounds.left; - rect.right = bounds.left + AndroidUtilities.dp(cursorWidth); - rect.bottom = bounds.bottom; - rect.top = bounds.top; - if (lineSpacingExtra != 0 && line < lineCount - 1) { - rect.bottom -= lineSpacingExtra; - } - rect.top = rect.centerY() - cursorSize / 2; - rect.bottom = rect.top + cursorSize; - gradientDrawable.setBounds(rect); - gradientDrawable.draw(canvas); - canvas.restore(); - } - } catch (Throwable ignore) { + int topPadding = getExtendedPaddingTop(); + scrollY = Integer.MAX_VALUE; + try { + if (mScrollYField != null) { + scrollY = mScrollYField.getInt(this); + mScrollYField.set(this, 0); + } else { + scrollY = getScrollX(); + } + } catch (Exception e) { + if (BuildVars.DEBUG_PRIVATE_VERSION) { + throw new RuntimeException(e); + } + } + ignoreTopCount = 1; + ignoreBottomCount = 1; + canvas.save(); + canvas.translate(0, topPadding); + try { + drawInMaim = true; + super.onDraw(canvas); + drawInMaim = false; + } catch (Exception e) { + if (BuildVars.DEBUG_PRIVATE_VERSION) { + throw new RuntimeException(e); + } + } + if (mScrollYField != null && scrollY != Integer.MAX_VALUE) { + try { + mScrollYField.set(this, scrollY); + } catch (Exception e) { + if (BuildVars.DEBUG_PRIVATE_VERSION) { + throw new RuntimeException(e); + } + } + } + canvas.restore(); + if (cursorDrawable == null) { + try { + boolean showCursor; + if (mShowCursorField != null && editor != null) { + long mShowCursor = mShowCursorField.getLong(editor); + showCursor = (SystemClock.uptimeMillis() - mShowCursor) % (2 * 500) < 500 && isFocused(); + } else { + showCursor = cursorDrawn; + cursorDrawn = false; + } + if (allowDrawCursor && showCursor) { + canvas.save(); + int voffsetCursor = 0; + if (getVerticalOffsetMethod != null) { + if ((getGravity() & Gravity.VERTICAL_GRAVITY_MASK) != Gravity.TOP) { + voffsetCursor = (int) getVerticalOffsetMethod.invoke(this, true); + } + } else { + if ((getGravity() & Gravity.VERTICAL_GRAVITY_MASK) != Gravity.TOP) { + voffsetCursor = getTotalPaddingTop() - getExtendedPaddingTop(); + } + } + canvas.translate(getPaddingLeft(), getExtendedPaddingTop() + voffsetCursor); + Layout layout = getLayout(); + int line = layout.getLineForOffset(getSelectionStart()); + int lineCount = layout.getLineCount(); + updateCursorPosition(); + Rect bounds = gradientDrawable.getBounds(); + rect.left = bounds.left; + rect.right = bounds.left + AndroidUtilities.dp(cursorWidth); + rect.bottom = bounds.bottom; + rect.top = bounds.top; + if (lineSpacingExtra != 0 && line < lineCount - 1) { + rect.bottom -= lineSpacingExtra; + } + rect.top = rect.centerY() - cursorSize / 2; + rect.bottom = rect.top + cursorSize; + gradientDrawable.setBounds(rect); + gradientDrawable.draw(canvas); + canvas.restore(); + } + } catch (Throwable exception) { + if (BuildVars.DEBUG_PRIVATE_VERSION) { + throw new RuntimeException(exception); + } + } + } else { + if (cursorDrawn) { + try { + canvas.save(); + int voffsetCursor = 0; + if (getVerticalOffsetMethod != null) { + if ((getGravity() & Gravity.VERTICAL_GRAVITY_MASK) != Gravity.TOP) { + voffsetCursor = (int) getVerticalOffsetMethod.invoke(this, true); + } + } else { + if ((getGravity() & Gravity.VERTICAL_GRAVITY_MASK) != Gravity.TOP) { + voffsetCursor = getTotalPaddingTop() - getExtendedPaddingTop(); + } + } + canvas.translate(getPaddingLeft(), getExtendedPaddingTop() + voffsetCursor); + Layout layout = getLayout(); + int line = layout.getLineForOffset(getSelectionStart()); + int lineCount = layout.getLineCount(); + updateCursorPosition(); + Rect bounds = gradientDrawable.getBounds(); + rect.left = bounds.left; + rect.right = bounds.left + AndroidUtilities.dp(cursorWidth); + rect.bottom = bounds.bottom; + rect.top = bounds.top; + if (lineSpacingExtra != 0 && line < lineCount - 1) { + rect.bottom -= lineSpacingExtra; + } + rect.top = rect.centerY() - cursorSize / 2; + rect.bottom = rect.top + cursorSize; + gradientDrawable.setBounds(rect); + gradientDrawable.draw(canvas); + canvas.restore(); + cursorDrawn = false; + } catch (Throwable exception) { + if (BuildVars.DEBUG_PRIVATE_VERSION) { + throw new RuntimeException(exception); + } + } + } } if (lineColor != 0 && hintLayout != null) { int h; @@ -666,14 +764,19 @@ public class EditTextBoldCursor extends EditText { private boolean updateCursorPosition() { final Layout layout = getLayout(); final int offset = getSelectionStart(); - final int line = layout.getLineForOffset(offset); - final int top = layout.getLineTop(line); - final int bottom = layout.getLineTop(line + 1); - updateCursorPosition(top, bottom, layout.getPrimaryHorizontal(offset)); + if (offset != lastOffset || lastText != layout.getText()) { + final int line = layout.getLineForOffset(offset); + final int top = layout.getLineTop(line); + final int bottom = layout.getLineTop(line + 1); + updateCursorPosition(top, bottom, layout.getPrimaryHorizontal(offset)); + } + lastText = layout.getText(); + lastOffset = offset; return true; } private Rect mTempRect; + private int clampHorizontalPosition(final Drawable drawable, float horizontal) { horizontal = Math.max(0.5f, horizontal - 0.5f); if (mTempRect == null) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/EmptyTextProgressView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/EmptyTextProgressView.java index 6e94c7de1..10ada601e 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/EmptyTextProgressView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/EmptyTextProgressView.java @@ -53,20 +53,24 @@ public class EmptyTextProgressView extends FrameLayout { textView.setText(LocaleController.getString("NoResult", R.string.NoResult)); addView(textView, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT)); - progressView.setAlpha(0f); - textView.setAlpha(0f); + AndroidUtilities.updateViewVisibilityAnimated(textView, false, 2f, false); + AndroidUtilities.updateViewVisibilityAnimated(progressView, false, 1f, false); setOnTouchListener((v, event) -> true); } public void showProgress() { - textView.animate().alpha(0f).setDuration(150).start(); - progressView.animate().alpha(1f).setDuration(150).start(); + showProgress(true); + } + + public void showProgress(boolean animated) { + AndroidUtilities.updateViewVisibilityAnimated(textView, false, 0.9f, animated); + AndroidUtilities.updateViewVisibilityAnimated(progressView, true, 1f, animated); } public void showTextView() { - textView.animate().alpha(1f).setDuration(150).start(); - progressView.animate().alpha(0f).setDuration(150).start(); + AndroidUtilities.updateViewVisibilityAnimated(textView, true, 0.9f, true); + AndroidUtilities.updateViewVisibilityAnimated(progressView, false, 1f, true); } public void setText(String text) { @@ -123,12 +127,16 @@ public class EmptyTextProgressView extends FrameLayout { int x = (width - child.getMeasuredWidth()) / 2; int y; - if (showAtPos == 2) { - y = (AndroidUtilities.dp(100) - child.getMeasuredHeight()) / 2 + getPaddingTop(); - } else if (showAtPos == 1) { - y = (height / 2 - child.getMeasuredHeight()) / 2 + getPaddingTop(); - } else { + if (child == progressView && progressView instanceof FlickerLoadingView) { y = (height - child.getMeasuredHeight()) / 2 + getPaddingTop(); + } else { + if (showAtPos == 2) { + y = (AndroidUtilities.dp(100) - child.getMeasuredHeight()) / 2 + getPaddingTop(); + } else if (showAtPos == 1) { + y = (height / 2 - child.getMeasuredHeight()) / 2 + getPaddingTop(); + } else { + y = (height - child.getMeasuredHeight()) / 2 + getPaddingTop(); + } } child.layout(x, y, x + child.getMeasuredWidth(), y + child.getMeasuredHeight()); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/FlickerLoadingView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/FlickerLoadingView.java index 4bfe6c082..a3e0bb8be 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/FlickerLoadingView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/FlickerLoadingView.java @@ -30,6 +30,7 @@ public class FlickerLoadingView extends View { public final static int INVITE_LINKS_TYPE = 9; public final static int USERS2_TYPE = 10; public final static int BOTS_MENU_TYPE = 11; + public final static int SHARE_ALERT_TYPE = 12; private int gradientWidth; private LinearGradient gradient; @@ -390,6 +391,26 @@ public class FlickerLoadingView extends View { break; } } + } else if (getViewType() == SHARE_ALERT_TYPE) { + int k = 0; + h += AndroidUtilities.dp(14); + while (h <= getMeasuredHeight()) { + int part = getMeasuredWidth() / 4; + for (int i = 0; i < 4; i++) { + float cx = part * i + part / 2f; + float cy = h + AndroidUtilities.dp(7) + AndroidUtilities.dp(56) / 2f; + canvas.drawCircle(cx, cy, AndroidUtilities.dp(56 / 2f), paint); + + float y = h + AndroidUtilities.dp(7) + AndroidUtilities.dp(56) + AndroidUtilities.dp(16); + AndroidUtilities.rectTmp.set(cx - AndroidUtilities.dp(24), y - AndroidUtilities.dp(4), cx + AndroidUtilities.dp(24), y + AndroidUtilities.dp(4)); + canvas.drawRoundRect(AndroidUtilities.rectTmp, AndroidUtilities.dp(4), AndroidUtilities.dp(4), paint); + } + h += getCellHeight(getMeasuredWidth()); + k++; + if (isSingleCell) { + break; + } + } } long newUpdateTime = SystemClock.elapsedRealtime(); @@ -453,6 +474,8 @@ public class FlickerLoadingView extends View { return AndroidUtilities.dp(61); } else if (getViewType() == BOTS_MENU_TYPE) { return AndroidUtilities.dp(36); + } else if (getViewType() == SHARE_ALERT_TYPE) { + return AndroidUtilities.dp(103); } return 0; } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/FragmentContextView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/FragmentContextView.java index 95b92b414..d40dd8545 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/FragmentContextView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/FragmentContextView.java @@ -1472,7 +1472,7 @@ public class FragmentContextView extends FrameLayout implements NotificationCent } else { isMusic = true; if (playbackSpeedButton != null) { - if (messageObject.getDuration() >= 20 * 60) { + if (messageObject.getDuration() >= 10 * 60) { playbackSpeedButton.setAlpha(1.0f); playbackSpeedButton.setEnabled(true); titleTextView.setPadding(0, 0, AndroidUtilities.dp(44), 0); @@ -1779,6 +1779,7 @@ public class FragmentContextView extends FrameLayout implements NotificationCent updateScheduleTimeRunnable.run(); } } else { + timeLayout = null; joinButton.setVisibility(VISIBLE); titleTextView.setText(LocaleController.getString("VoipGroupVoiceChat", R.string.VoipGroupVoiceChat), false); if (call.call.participants_count == 0) { @@ -1786,6 +1787,7 @@ public class FragmentContextView extends FrameLayout implements NotificationCent } else { subtitleTextView.setText(LocaleController.formatPluralString("Participants", call.call.participants_count), false); } + frameLayout.invalidate(); } updateAvatars(avatars.wasDraw && updateAnimated); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/HintView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/HintView.java index 84c8c6ef9..92452baaa 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/HintView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/HintView.java @@ -368,7 +368,7 @@ public class HintView extends FrameLayout { } else { setTranslationY(extraTranslationY + (translationY = top - getMeasuredHeight())); } - final int offset; + int offset; int leftMargin = 0; int rightMargin = 0; @@ -381,12 +381,18 @@ public class HintView extends FrameLayout { } else if (centerX > parentView.getMeasuredWidth() / 2) { if (currentType == TYPE_SEARCH_AS_LIST) { offset = (int) (parentWidth - getMeasuredWidth() * 1.5f); + if (offset < 0) { + offset = 0; + } } else { offset = parentWidth - getMeasuredWidth() - (leftMargin + rightMargin); } } else { if (currentType == TYPE_SEARCH_AS_LIST) { offset = centerX - getMeasuredWidth() / 2 - arrowImageView.getMeasuredWidth(); + if (offset < 0) { + offset = 0; + } } else { offset = 0; } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/InstantCameraView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/InstantCameraView.java index 890657ec4..da932687c 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/InstantCameraView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/InstantCameraView.java @@ -13,6 +13,7 @@ import android.animation.AnimatorListenerAdapter; import android.animation.AnimatorSet; import android.animation.ObjectAnimator; import android.animation.ValueAnimator; +import android.annotation.SuppressLint; import android.annotation.TargetApi; import android.app.Activity; import android.content.Context; @@ -205,39 +206,18 @@ public class InstantCameraView extends FrameLayout implements NotificationCenter private BlurBehindDrawable blurBehindDrawable; + float pinchStartDistance; + + float pinchScale; + + boolean isInPinchToZoomTouchMode; + + private int pointerId1, pointerId2; + + @SuppressLint("ClickableViewAccessibility") public InstantCameraView(Context context, ChatActivity parentFragment) { super(context); parentView = parentFragment.getFragmentView(); - setOnTouchListener((v, event) -> { - if (event.getAction() == MotionEvent.ACTION_DOWN && baseFragment != null) { - if (videoPlayer != null) { - boolean mute = !videoPlayer.isMuted(); - videoPlayer.setMute(mute); - if (muteAnimation != null) { - muteAnimation.cancel(); - } - muteAnimation = new AnimatorSet(); - muteAnimation.playTogether( - ObjectAnimator.ofFloat(muteImageView, View.ALPHA, mute ? 1.0f : 0.0f), - ObjectAnimator.ofFloat(muteImageView, View.SCALE_X, mute ? 1.0f : 0.5f), - ObjectAnimator.ofFloat(muteImageView, View.SCALE_Y, mute ? 1.0f : 0.5f)); - muteAnimation.addListener(new AnimatorListenerAdapter() { - @Override - public void onAnimationEnd(Animator animation) { - if (animation.equals(muteAnimation)) { - muteAnimation = null; - } - } - }); - muteAnimation.setDuration(180); - muteAnimation.setInterpolator(new DecelerateInterpolator()); - muteAnimation.start(); - } else { - baseFragment.checkRecordLocked(false); - } - } - return true; - }); setWillNotDraw(false); baseFragment = parentFragment; @@ -352,6 +332,19 @@ public class InstantCameraView extends FrameLayout implements NotificationCenter blurBehindDrawable = new BlurBehindDrawable(parentView, this); } + private boolean checkPointerIds(MotionEvent ev) { + if (ev.getPointerCount() < 2) { + return false; + } + if (pointerId1 == ev.getPointerId(0) && pointerId2 == ev.getPointerId(1)) { + return true; + } + if (pointerId1 == ev.getPointerId(1) && pointerId2 == ev.getPointerId(0)) { + return true; + } + return false; + } + @Override public boolean onInterceptTouchEvent(MotionEvent ev) { getParent().requestDisallowInterceptTouchEvent(true); @@ -894,7 +887,7 @@ public class InstantCameraView extends FrameLayout implements NotificationCenter } surfaceTexture.setDefaultBufferSize(previewSize.getWidth(), previewSize.getHeight()); - cameraSession = new CameraSession(selectedCamera, previewSize, pictureSize, ImageFormat.JPEG); + cameraSession = new CameraSession(selectedCamera, previewSize, pictureSize, ImageFormat.JPEG, true); cameraThread.setCurrentSession(cameraSession); CameraController.getInstance().openRound(cameraSession, surfaceTexture, () -> { if (cameraSession != null) { @@ -1912,7 +1905,7 @@ public class InstantCameraView extends FrameLayout implements NotificationCenter } private void createKeyframeThumb() { - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP && frameCount % 33 == 0) { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP && SharedConfig.getDevicePerformanceClass() != SharedConfig.PERFORMANCE_CLASS_LOW && frameCount % 33 == 0) { GenerateKeyframeThumbTask task = new GenerateKeyframeThumbTask(); generateKeyframeThumbsQueue.postRunnable(task); } @@ -2475,4 +2468,106 @@ public class InstantCameraView extends FrameLayout implements NotificationCenter } } } + + + @Override + public boolean onTouchEvent(MotionEvent ev) { + if (ev.getAction() == MotionEvent.ACTION_DOWN && baseFragment != null) { + if (videoPlayer != null) { + boolean mute = !videoPlayer.isMuted(); + videoPlayer.setMute(mute); + if (muteAnimation != null) { + muteAnimation.cancel(); + } + muteAnimation = new AnimatorSet(); + muteAnimation.playTogether( + ObjectAnimator.ofFloat(muteImageView, View.ALPHA, mute ? 1.0f : 0.0f), + ObjectAnimator.ofFloat(muteImageView, View.SCALE_X, mute ? 1.0f : 0.5f), + ObjectAnimator.ofFloat(muteImageView, View.SCALE_Y, mute ? 1.0f : 0.5f)); + muteAnimation.addListener(new AnimatorListenerAdapter() { + @Override + public void onAnimationEnd(Animator animation) { + if (animation.equals(muteAnimation)) { + muteAnimation = null; + } + } + }); + muteAnimation.setDuration(180); + muteAnimation.setInterpolator(new DecelerateInterpolator()); + muteAnimation.start(); + } else { + //baseFragment.checkRecordLocked(false); + } + } + + if (ev.getActionMasked() == MotionEvent.ACTION_DOWN || ev.getActionMasked() == MotionEvent.ACTION_POINTER_DOWN) { + if (!isInPinchToZoomTouchMode && ev.getPointerCount() == 2 && finishZoomTransition == null && recording) { + pinchStartDistance = (float) Math.hypot(ev.getX(1) - ev.getX(0), ev.getY(1) - ev.getY(0)); + + pinchScale = 1f; + + pointerId1 = ev.getPointerId(0); + pointerId2 = ev.getPointerId(1); + isInPinchToZoomTouchMode = true; + } + if (ev.getActionMasked() == MotionEvent.ACTION_DOWN) { + AndroidUtilities.rectTmp.set(cameraContainer.getX(), cameraContainer.getY(), cameraContainer.getX() + cameraContainer.getMeasuredWidth(), cameraContainer.getY() + cameraContainer.getMeasuredHeight()); + return AndroidUtilities.rectTmp.contains(ev.getX(), ev.getY()); + } + return true; + } else if (ev.getActionMasked() == MotionEvent.ACTION_MOVE && isInPinchToZoomTouchMode) { + int index1 = -1; + int index2 = -1; + for (int i = 0; i < ev.getPointerCount(); i++) { + if (pointerId1 == ev.getPointerId(i)) { + index1 = i; + } + if (pointerId2 == ev.getPointerId(i)) { + index2 = i; + } + } + if (index1 == -1 || index2 == -1) { + isInPinchToZoomTouchMode = false; + + finishZoom(); + return false; + } + pinchScale = (float) Math.hypot(ev.getX(index2) - ev.getX(index1), ev.getY(index2) - ev.getY(index1)) / pinchStartDistance; + float zoom = Math.min(1f, Math.max(0, pinchScale - 1f)); + + cameraSession.setZoom(zoom); + } else if ((ev.getActionMasked() == MotionEvent.ACTION_UP || (ev.getActionMasked() == MotionEvent.ACTION_POINTER_UP && checkPointerIds(ev)) || ev.getActionMasked() == MotionEvent.ACTION_CANCEL) && isInPinchToZoomTouchMode) { + isInPinchToZoomTouchMode = false; + finishZoom(); + } + return isInPinchToZoomTouchMode; + } + + ValueAnimator finishZoomTransition; + public void finishZoom() { + if (finishZoomTransition != null) { + return; + } + + float zoom = Math.min(1f, Math.max(0, pinchScale - 1f)); + + if (zoom > 0f) { + finishZoomTransition = ValueAnimator.ofFloat(zoom, 0); + finishZoomTransition.addUpdateListener(valueAnimator -> { + cameraSession.setZoom((float) valueAnimator.getAnimatedValue()); + }); + finishZoomTransition.addListener(new AnimatorListenerAdapter() { + @Override + public void onAnimationEnd(Animator animation) { + if (finishZoomTransition != null) { + finishZoomTransition = null; + } + } + }); + + finishZoomTransition.setDuration(350); + finishZoomTransition.setInterpolator(CubicBezierInterpolator.DEFAULT); + finishZoomTransition.start(); + } + } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/JoinGroupAlert.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/JoinGroupAlert.java index a271b8ab0..cb23c73a1 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/JoinGroupAlert.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/JoinGroupAlert.java @@ -93,7 +93,11 @@ public class JoinGroupAlert extends BottomSheet { textView.setTextColor(Theme.getColor(Theme.key_dialogTextGray3)); textView.setSingleLine(true); textView.setEllipsize(TextUtils.TruncateAt.END); - textView.setText(LocaleController.formatPluralString("Members", participants_count)); + if (invite.channel || ChatObject.isChannel(invite.chat) && !invite.chat.megagroup) { + textView.setText(LocaleController.formatPluralString("Subscribers", participants_count)); + } else { + textView.setText(LocaleController.formatPluralString("Members", participants_count)); + } linearLayout.addView(textView, LayoutHelper.createLinear(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.TOP | Gravity.CENTER_HORIZONTAL, 10, 3, 10, 20)); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/LinkActionView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/LinkActionView.java index 6a59f75c4..de9b4274b 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/LinkActionView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/LinkActionView.java @@ -496,8 +496,8 @@ public class LinkActionView extends LinearLayout { } else { avatarsContainer.avatarsImageView.setObject(i, UserConfig.selectedAccount, null); } - avatarsContainer.avatarsImageView.commitTransition(false); } + avatarsContainer.avatarsImageView.commitTransition(false); } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/MotionBackgroundDrawable.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/MotionBackgroundDrawable.java index 77a6c2680..99f331925 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/MotionBackgroundDrawable.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/MotionBackgroundDrawable.java @@ -27,6 +27,8 @@ import java.lang.ref.WeakReference; public class MotionBackgroundDrawable extends Drawable { + private final static int ANIMATION_CACHE_BITMAPS_COUNT = 3; + private int[] colors = new int[]{ 0xff426D57, 0xffF7E48B, @@ -37,7 +39,7 @@ public class MotionBackgroundDrawable extends Drawable { private long lastUpdateTime; private WeakReference parentView; - private CubicBezierInterpolator interpolator = new CubicBezierInterpolator(0.33, 0.0, 0.0, 1.0); + private final CubicBezierInterpolator interpolator = new CubicBezierInterpolator(0.33, 0.0, 0.0, 1.0); private int translationY; @@ -48,18 +50,27 @@ public class MotionBackgroundDrawable extends Drawable { private RectF rect = new RectF(); private Bitmap currentBitmap; + private Bitmap gradientFromBitmap; + private Bitmap[] gradientToBitmap = new Bitmap[ANIMATION_CACHE_BITMAPS_COUNT]; private Paint paint = new Paint(Paint.FILTER_BITMAP_FLAG); private Paint paint2 = new Paint(Paint.FILTER_BITMAP_FLAG); + private Paint paint3 = new Paint(); private int intensity = 100; + private Canvas gradientCanvas; + private Canvas gradientFromCanvas; private Bitmap patternBitmap; private BitmapShader bitmapShader; private BitmapShader gradientShader; private Matrix matrix; + private boolean fastAnimation; + private Canvas legacyCanvas; private Bitmap legacyBitmap; + private boolean rotationBack; + private boolean rotatingPreview; private android.graphics.Rect patternBounds = new android.graphics.Rect(); @@ -68,13 +79,10 @@ public class MotionBackgroundDrawable extends Drawable { public MotionBackgroundDrawable() { super(); - currentBitmap = Bitmap.createBitmap(60, 80, Bitmap.Config.ARGB_8888); - Utilities.generateGradient(currentBitmap, true, phase, interpolator.getInterpolation(posAnimationProgress), currentBitmap.getWidth(), currentBitmap.getHeight(), currentBitmap.getRowBytes(), colors); - if (Build.VERSION.SDK_INT >= 29) { - paint2.setBlendMode(BlendMode.SOFT_LIGHT); - } + init(); } + public MotionBackgroundDrawable(int c1, int c2, int c3, int c4, boolean preview) { super(); colors[0] = c1; @@ -82,11 +90,23 @@ public class MotionBackgroundDrawable extends Drawable { colors[2] = c3; colors[3] = c4; isPreview = preview; + init(); + } + + private void init() { + currentBitmap = Bitmap.createBitmap(60, 80, Bitmap.Config.ARGB_8888); + for (int i = 0; i < ANIMATION_CACHE_BITMAPS_COUNT; i++) { + gradientToBitmap[i] = Bitmap.createBitmap(60, 80, Bitmap.Config.ARGB_8888); + } + gradientCanvas = new Canvas(currentBitmap); + + gradientFromBitmap = Bitmap.createBitmap(60, 80, Bitmap.Config.ARGB_8888); + gradientFromCanvas = new Canvas(gradientFromBitmap); + + Utilities.generateGradient(currentBitmap, true, phase, interpolator.getInterpolation(posAnimationProgress), currentBitmap.getWidth(), currentBitmap.getHeight(), currentBitmap.getRowBytes(), colors); if (Build.VERSION.SDK_INT >= 29) { paint2.setBlendMode(BlendMode.SOFT_LIGHT); } - currentBitmap = Bitmap.createBitmap(60, 80, Bitmap.Config.ARGB_8888); - Utilities.generateGradient(currentBitmap, true, phase, interpolator.getInterpolation(posAnimationProgress), currentBitmap.getWidth(), currentBitmap.getHeight(), currentBitmap.getRowBytes(), colors); } public void setRoundRadius(int rad) { @@ -101,6 +121,10 @@ public class MotionBackgroundDrawable extends Drawable { return currentBitmap; } + public int getIntensity() { + return intensity; + } + public static boolean isDark(int color1, int color2, int color3, int color4) { int averageColor = AndroidUtilities.getAverageColor(color1, color2); if (color3 != 0) { @@ -147,12 +171,13 @@ public class MotionBackgroundDrawable extends Drawable { return phase; } - public void rotatePreview() { + public void rotatePreview(boolean back) { if (posAnimationProgress < 1.0f) { return; } rotatingPreview = true; posAnimationProgress = 0.0f; + rotationBack = back; invalidateParent(); } @@ -167,16 +192,44 @@ public class MotionBackgroundDrawable extends Drawable { } public void switchToNextPosition() { + switchToNextPosition(false); + } + + public void switchToNextPosition(boolean fast) { if (posAnimationProgress < 1.0f) { return; } rotatingPreview = false; + rotationBack = false; + fastAnimation = fast; posAnimationProgress = 0.0f; phase--; if (phase < 0) { phase = 7; } invalidateParent(); + gradientFromCanvas.drawBitmap(currentBitmap, 0, 0, null); + generateNextGradient(); + } + + private void generateNextGradient() { + for (int i = 0; i < ANIMATION_CACHE_BITMAPS_COUNT; i++) { + float p = (i + 1) / (float) ANIMATION_CACHE_BITMAPS_COUNT; + Utilities.generateGradient(gradientToBitmap[i], true, phase, p, currentBitmap.getWidth(), currentBitmap.getHeight(), currentBitmap.getRowBytes(), colors); + } + } + + public void switchToPrevPosition(boolean fast) { + if (posAnimationProgress < 1.0f) { + return; + } + rotatingPreview = false; + fastAnimation = fast; + rotationBack = true; + posAnimationProgress = 0.0f; + invalidateParent(); + Utilities.generateGradient(gradientFromBitmap, true, phase, 0, currentBitmap.getWidth(), currentBitmap.getHeight(), currentBitmap.getRowBytes(), colors); + generateNextGradient(); } public int[] getColors() { @@ -385,7 +438,7 @@ public class MotionBackgroundDrawable extends Drawable { } else { stageBefore = 3; } - posAnimationProgress += dt / 2000.0f; + posAnimationProgress += dt / (rotationBack ? 1000.0f : 2000.0f); if (posAnimationProgress > 1.0f) { posAnimationProgress = 1.0f; } @@ -393,9 +446,16 @@ public class MotionBackgroundDrawable extends Drawable { if (stageBefore == 0 && progress > 0.25f || stageBefore == 1 && progress > 0.5f || stageBefore == 2 && progress > 0.75f) { - phase--; - if (phase < 0) { - phase = 7; + if (rotationBack) { + phase++; + if (phase > 7) { + phase = 0; + } + } else { + phase--; + if (phase < 0) { + phase = 7; + } } } if (progress <= 0.25f) { @@ -407,14 +467,54 @@ public class MotionBackgroundDrawable extends Drawable { } else { progress = (progress - 0.75f) / 0.25f; } + if (rotationBack) { + float prevProgress = progress; + progress = 1.0f - progress; + if (posAnimationProgress >= 1.0f) { + phase++; + if (phase > 7) { + phase = 0; + } + progress = 1.0f; + } + } } else { - posAnimationProgress += dt / 500.0f; + posAnimationProgress += dt / (fastAnimation ? 300.0f : 500.0f); if (posAnimationProgress > 1.0f) { posAnimationProgress = 1.0f; } progress = interpolator.getInterpolation(posAnimationProgress); + if (rotationBack) { + progress = 1.0f - progress; + if (posAnimationProgress >= 1.0f) { + phase++; + if (phase > 7) { + phase = 0; + } + progress = 1.0f; + } + } } - Utilities.generateGradient(currentBitmap, true, phase, progress, currentBitmap.getWidth(), currentBitmap.getHeight(), currentBitmap.getRowBytes(), colors); + + if (rotatingPreview) { + Utilities.generateGradient(currentBitmap, true, phase, progress, currentBitmap.getWidth(), currentBitmap.getHeight(), currentBitmap.getRowBytes(), colors); + } else { + if (progress != 1f) { + float part = 1f / ANIMATION_CACHE_BITMAPS_COUNT; + int i = (int) (progress / part); + if (i == 0) { + gradientCanvas.drawBitmap(gradientFromBitmap, 0, 0, null); + } else { + gradientCanvas.drawBitmap(gradientToBitmap[i - 1], 0, 0, null); + } + float alpha = (progress - i * part) / part; + paint3.setAlpha((int) (255 * alpha)); + gradientCanvas.drawBitmap(gradientToBitmap[i], 0, 0, paint3); + } else { + gradientCanvas.drawBitmap(gradientToBitmap[ANIMATION_CACHE_BITMAPS_COUNT - 1], 0, 0, paint3); + } + } + invalidateParent(); } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Painting.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Painting.java index 2265da0fd..8cf2fead4 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Painting.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Painting.java @@ -170,6 +170,7 @@ public class Painting { GLES20.glUniformMatrix4fv(shader.getUniform("mvpMatrix"), 1, false, FloatBuffer.wrap(projection)); GLES20.glUniform1i(shader.getUniform("texture"), 0); + renderState.viewportScale = renderView.getScaleX(); bounds = Render.RenderPath(path, renderState); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Render.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Render.java index e0994401e..412ebad7a 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Render.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/Render.java @@ -45,7 +45,7 @@ public class Render { Point unitVector = new Point(1.0f, 1.0f, 0.0f); float vectorAngle = Math.abs(state.angle) > 0.0f ? state.angle : (float) Math.atan2(vector.y, vector.x); - float brushWeight = state.baseWeight * state.scale; + float brushWeight = state.baseWeight * state.scale * 1f / state.viewportScale; double step = Math.max(1.0f, state.spacing * brushWeight); if (distance > 0.0) { @@ -85,7 +85,7 @@ public class Render { } private static void PaintStamp(Point point, RenderState state) { - float brushWeight = state.baseWeight * state.scale; + float brushWeight = state.baseWeight * state.scale * 1f / state.viewportScale; PointF start = point.toPointF(); float angle = Math.abs(state.angle) > 0.0f ? state.angle : 0.0f; float alpha = state.alpha; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/RenderState.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/RenderState.java index dbe648b54..af3825513 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/RenderState.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/Paint/RenderState.java @@ -13,6 +13,7 @@ public class RenderState { public float alpha; public float angle; public float scale; + public float viewportScale; public double remainder; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/PasscodeView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/PasscodeView.java index 943ca171c..63d6b3718 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/PasscodeView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/PasscodeView.java @@ -12,10 +12,12 @@ import android.animation.Animator; import android.animation.AnimatorListenerAdapter; import android.animation.AnimatorSet; import android.animation.ObjectAnimator; +import android.animation.ValueAnimator; import android.app.Activity; import android.content.Context; import android.content.res.Configuration; import android.graphics.Canvas; +import android.graphics.Paint; import android.graphics.Rect; import android.graphics.Typeface; import android.graphics.drawable.ColorDrawable; @@ -26,9 +28,11 @@ import android.hardware.biometrics.BiometricPrompt; import android.os.Build; import android.os.SystemClock; import android.os.Vibrator; + import androidx.annotation.IdRes; import androidx.core.content.ContextCompat; import androidx.core.os.CancellationSignal; + import android.text.Editable; import android.text.InputFilter; import android.text.InputType; @@ -41,7 +45,10 @@ import android.view.HapticFeedbackConstants; import android.view.Menu; import android.view.MenuItem; import android.view.View; +import android.view.ViewAnimationUtils; +import android.view.ViewTreeObserver; import android.view.accessibility.AccessibilityNodeInfo; +import android.view.animation.DecelerateInterpolator; import android.view.inputmethod.EditorInfo; import android.widget.FrameLayout; import android.widget.ImageView; @@ -98,12 +105,7 @@ public class PasscodeView extends FrameLayout { textView.setAlpha(0); textView.setPivotX(AndroidUtilities.dp(25)); textView.setPivotY(AndroidUtilities.dp(25)); - addView(textView); - LayoutParams layoutParams = (LayoutParams) textView.getLayoutParams(); - layoutParams.width = AndroidUtilities.dp(50); - layoutParams.height = AndroidUtilities.dp(50); - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - textView.setLayoutParams(layoutParams); + addView(textView, LayoutHelper.createFrame(50, 50, Gravity.TOP | Gravity.LEFT)); characterTextViews.add(textView); textView = new TextView(context); @@ -114,12 +116,7 @@ public class PasscodeView extends FrameLayout { textView.setText(DOT); textView.setPivotX(AndroidUtilities.dp(25)); textView.setPivotY(AndroidUtilities.dp(25)); - addView(textView); - layoutParams = (LayoutParams) textView.getLayoutParams(); - layoutParams.width = AndroidUtilities.dp(50); - layoutParams.height = AndroidUtilities.dp(50); - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - textView.setLayoutParams(layoutParams); + addView(textView, LayoutHelper.createFrame(50, 50, Gravity.TOP | Gravity.LEFT)); dotTextViews.add(textView); } } @@ -251,9 +248,9 @@ public class PasscodeView extends FrameLayout { return stringBuilder.length(); } - public void eraseLastCharacter() { + public boolean eraseLastCharacter() { if (stringBuilder.length() == 0) { - return; + return false; } try { if (!NekoConfig.disableVibration) { @@ -320,6 +317,7 @@ public class PasscodeView extends FrameLayout { } }); currentAnimation.start(); + return true; } private void eraseAllCharacters(final boolean animated) { @@ -409,6 +407,7 @@ public class PasscodeView extends FrameLayout { } } + private FrameLayout container; private Drawable backgroundDrawable; private FrameLayout numbersFrameLayout; private ArrayList numberTextViews; @@ -416,12 +415,14 @@ public class PasscodeView extends FrameLayout { private ArrayList numberFrameLayouts; private FrameLayout passwordFrameLayout; private ImageView eraseView; + private ImageView fingerprintView; private EditTextBoldCursor passwordEditText; private AnimatingTextView passwordEditText2; private FrameLayout backgroundFrameLayout; private TextView passcodeTextView; private TextView retryTextView; private ImageView checkImage; + private ImageView fingerprintImage; private int keyboardHeight = 0; private CancellationSignal cancellationSignal; @@ -430,6 +431,10 @@ public class PasscodeView extends FrameLayout { private boolean selfCancelled; private AlertDialog fingerprintDialog; + private int imageY; + + private RLottieImageView imageView; + private Rect rect = new Rect(); private PasscodeViewDelegate delegate; @@ -437,6 +442,13 @@ public class PasscodeView extends FrameLayout { private final static int id_fingerprint_textview = 1000; private final static int id_fingerprint_imageview = 1001; + private static class InnerAnimator { + private AnimatorSet animatorSet; + private float startRadius; + } + + private ArrayList innerAnimators = new ArrayList<>(); + private static final @IdRes int[] ids = { R.id.passcode_btn_0, @@ -449,7 +461,8 @@ public class PasscodeView extends FrameLayout { R.id.passcode_btn_7, R.id.passcode_btn_8, R.id.passcode_btn_9, - R.id.passcode_btn_backspace + R.id.passcode_btn_backspace, + R.id.passcode_btn_fingerprint }; public PasscodeView(final Context context) { @@ -458,60 +471,64 @@ public class PasscodeView extends FrameLayout { setWillNotDraw(false); setVisibility(GONE); - backgroundFrameLayout = new FrameLayout(context); - addView(backgroundFrameLayout); - LayoutParams layoutParams = (LayoutParams) backgroundFrameLayout.getLayoutParams(); - layoutParams.width = LayoutHelper.MATCH_PARENT; - layoutParams.height = LayoutHelper.MATCH_PARENT; - backgroundFrameLayout.setLayoutParams(layoutParams); + backgroundFrameLayout = new FrameLayout(context) { + + private Paint paint = new Paint(); + + @Override + protected void onDraw(Canvas canvas) { + if (backgroundDrawable != null) { + if (backgroundDrawable instanceof MotionBackgroundDrawable || backgroundDrawable instanceof ColorDrawable || backgroundDrawable instanceof GradientDrawable) { + backgroundDrawable.setBounds(0, 0, getMeasuredWidth(), getMeasuredHeight()); + backgroundDrawable.draw(canvas); + } else { + float scaleX = (float) getMeasuredWidth() / (float) backgroundDrawable.getIntrinsicWidth(); + float scaleY = (float) (getMeasuredHeight() + keyboardHeight) / (float) backgroundDrawable.getIntrinsicHeight(); + float scale = Math.max(scaleX, scaleY); + int width = (int) Math.ceil(backgroundDrawable.getIntrinsicWidth() * scale); + int height = (int) Math.ceil(backgroundDrawable.getIntrinsicHeight() * scale); + int x = (getMeasuredWidth() - width) / 2; + int y = (getMeasuredHeight() - height + keyboardHeight) / 2; + backgroundDrawable.setBounds(x, y, x + width, y + height); + backgroundDrawable.draw(canvas); + } + } else { + super.onDraw(canvas); + } + canvas.drawRect(0, 0, getMeasuredWidth(), getMeasuredHeight(), paint); + } + + @Override + public void setBackgroundColor(int color) { + paint.setColor(color); + } + }; + backgroundFrameLayout.setWillNotDraw(false); + addView(backgroundFrameLayout, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT)); + + imageView = new RLottieImageView(context); + imageView.setAnimation(R.raw.passcode_lock_close, 58, 58); + imageView.setAutoRepeat(false); + addView(imageView, LayoutHelper.createFrame(58, 58, Gravity.LEFT | Gravity.TOP)); passwordFrameLayout = new FrameLayout(context); - addView(passwordFrameLayout); - layoutParams = (LayoutParams) passwordFrameLayout.getLayoutParams(); - layoutParams.width = LayoutHelper.MATCH_PARENT; - layoutParams.height = LayoutHelper.MATCH_PARENT; - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - passwordFrameLayout.setLayoutParams(layoutParams); - - ImageView imageView = new ImageView(context); - imageView.setScaleType(ImageView.ScaleType.FIT_XY); - imageView.setImageResource(R.drawable.passcode_logo); - passwordFrameLayout.addView(imageView); - layoutParams = (LayoutParams) imageView.getLayoutParams(); - if (AndroidUtilities.density < 1) { - layoutParams.width = AndroidUtilities.dp(30); - layoutParams.height = AndroidUtilities.dp(30); - } else { - layoutParams.width = AndroidUtilities.dp(40); - layoutParams.height = AndroidUtilities.dp(40); - } - layoutParams.gravity = Gravity.CENTER_HORIZONTAL | Gravity.BOTTOM; - layoutParams.bottomMargin = AndroidUtilities.dp(100); - imageView.setLayoutParams(layoutParams); + backgroundFrameLayout.addView(passwordFrameLayout, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT)); passcodeTextView = new TextView(context); passcodeTextView.setTextColor(0xffffffff); passcodeTextView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); passcodeTextView.setGravity(Gravity.CENTER_HORIZONTAL); - passwordFrameLayout.addView(passcodeTextView, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.BOTTOM | Gravity.CENTER_HORIZONTAL, 0, 0, 0, 62)); + passwordFrameLayout.addView(passcodeTextView, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.BOTTOM | Gravity.CENTER_HORIZONTAL, 0, 0, 0, 74)); retryTextView = new TextView(context); retryTextView.setTextColor(0xffffffff); retryTextView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 15); retryTextView.setGravity(Gravity.CENTER_HORIZONTAL); retryTextView.setVisibility(INVISIBLE); - addView(retryTextView, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.CENTER)); + backgroundFrameLayout.addView(retryTextView, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.CENTER)); passwordEditText2 = new AnimatingTextView(context); - passwordFrameLayout.addView(passwordEditText2); - layoutParams = (FrameLayout.LayoutParams) passwordEditText2.getLayoutParams(); - layoutParams.height = LayoutHelper.WRAP_CONTENT; - layoutParams.width = LayoutHelper.MATCH_PARENT; - layoutParams.leftMargin = AndroidUtilities.dp(70); - layoutParams.rightMargin = AndroidUtilities.dp(70); - layoutParams.bottomMargin = AndroidUtilities.dp(6); - layoutParams.gravity = Gravity.BOTTOM | Gravity.CENTER_HORIZONTAL; - passwordEditText2.setLayoutParams(layoutParams); + passwordFrameLayout.addView(passwordEditText2, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT, Gravity.BOTTOM | Gravity.CENTER_HORIZONTAL, 70, 0, 70, 6)); passwordEditText = new EditTextBoldCursor(context); passwordEditText.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 36); @@ -525,14 +542,7 @@ public class PasscodeView extends FrameLayout { passwordEditText.setBackgroundDrawable(null); passwordEditText.setCursorColor(0xffffffff); passwordEditText.setCursorSize(AndroidUtilities.dp(32)); - passwordFrameLayout.addView(passwordEditText); - layoutParams = (FrameLayout.LayoutParams) passwordEditText.getLayoutParams(); - layoutParams.height = LayoutHelper.WRAP_CONTENT; - layoutParams.width = LayoutHelper.MATCH_PARENT; - layoutParams.leftMargin = AndroidUtilities.dp(70); - layoutParams.rightMargin = AndroidUtilities.dp(70); - layoutParams.gravity = Gravity.BOTTOM | Gravity.CENTER_HORIZONTAL; - passwordEditText.setLayoutParams(layoutParams); + passwordFrameLayout.addView(passwordEditText, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT, Gravity.BOTTOM | Gravity.CENTER_HORIZONTAL, 70, 0, 70, 0)); passwordEditText.setOnEditorActionListener((textView, i, keyEvent) -> { if (i == EditorInfo.IME_ACTION_DONE) { processDone(false); @@ -543,7 +553,13 @@ public class PasscodeView extends FrameLayout { passwordEditText.addTextChangedListener(new TextWatcher() { @Override public void beforeTextChanged(CharSequence s, int start, int count, int after) { - + if (backgroundDrawable instanceof MotionBackgroundDrawable) { + if (count == 0 && after == 1) { + ((MotionBackgroundDrawable) backgroundDrawable).switchToNextPosition(true); + } else if (count == 1 && after == 0) { + ((MotionBackgroundDrawable) backgroundDrawable).switchToPrevPosition(true); + } + } } @Override @@ -579,35 +595,24 @@ public class PasscodeView extends FrameLayout { checkImage.setImageResource(R.drawable.passcode_check); checkImage.setScaleType(ImageView.ScaleType.CENTER); checkImage.setBackgroundResource(R.drawable.bar_selector_lock); - passwordFrameLayout.addView(checkImage); - layoutParams = (LayoutParams) checkImage.getLayoutParams(); - layoutParams.width = AndroidUtilities.dp(60); - layoutParams.height = AndroidUtilities.dp(60); - layoutParams.bottomMargin = AndroidUtilities.dp(4); - layoutParams.rightMargin = AndroidUtilities.dp(10); - layoutParams.gravity = Gravity.BOTTOM | Gravity.RIGHT; - checkImage.setLayoutParams(layoutParams); + passwordFrameLayout.addView(checkImage, LayoutHelper.createFrame(60, 60, Gravity.BOTTOM | Gravity.RIGHT, 0, 0, 10, 4)); checkImage.setContentDescription(LocaleController.getString("Done", R.string.Done)); checkImage.setOnClickListener(v -> processDone(false)); + fingerprintImage = new ImageView(context); + fingerprintImage.setImageResource(R.drawable.fingerprint); + fingerprintImage.setScaleType(ImageView.ScaleType.CENTER); + fingerprintImage.setBackgroundResource(R.drawable.bar_selector_lock); + passwordFrameLayout.addView(fingerprintImage, LayoutHelper.createFrame(60, 60, Gravity.BOTTOM | Gravity.LEFT, 10, 0, 0, 4)); + fingerprintImage.setContentDescription(LocaleController.getString("AccDescrFingerprint", R.string.AccDescrFingerprint)); + fingerprintImage.setOnClickListener(v -> checkFingerprint()); + FrameLayout lineFrameLayout = new FrameLayout(context); lineFrameLayout.setBackgroundColor(0x26ffffff); - passwordFrameLayout.addView(lineFrameLayout); - layoutParams = (LayoutParams) lineFrameLayout.getLayoutParams(); - layoutParams.width = LayoutHelper.MATCH_PARENT; - layoutParams.height = AndroidUtilities.dp(1); - layoutParams.gravity = Gravity.BOTTOM | Gravity.LEFT; - layoutParams.leftMargin = AndroidUtilities.dp(20); - layoutParams.rightMargin = AndroidUtilities.dp(20); - lineFrameLayout.setLayoutParams(layoutParams); + passwordFrameLayout.addView(lineFrameLayout, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, 1, Gravity.BOTTOM | Gravity.LEFT, 20, 0, 20, 0)); numbersFrameLayout = new FrameLayout(context); - addView(numbersFrameLayout); - layoutParams = (LayoutParams) numbersFrameLayout.getLayoutParams(); - layoutParams.width = LayoutHelper.MATCH_PARENT; - layoutParams.height = LayoutHelper.MATCH_PARENT; - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - numbersFrameLayout.setLayoutParams(layoutParams); + backgroundFrameLayout.addView(numbersFrameLayout, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT, Gravity.TOP | Gravity.LEFT)); lettersTextViews = new ArrayList<>(10); numberTextViews = new ArrayList<>(10); @@ -618,12 +623,7 @@ public class PasscodeView extends FrameLayout { textView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 36); textView.setGravity(Gravity.CENTER); textView.setText(String.format(Locale.US, "%d", a)); - numbersFrameLayout.addView(textView); - layoutParams = (LayoutParams) textView.getLayoutParams(); - layoutParams.width = AndroidUtilities.dp(50); - layoutParams.height = AndroidUtilities.dp(50); - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - textView.setLayoutParams(layoutParams); + numbersFrameLayout.addView(textView, LayoutHelper.createFrame(50, 50, Gravity.TOP | Gravity.LEFT)); textView.setImportantForAccessibility(IMPORTANT_FOR_ACCESSIBILITY_NO); numberTextViews.add(textView); @@ -631,12 +631,7 @@ public class PasscodeView extends FrameLayout { textView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 12); textView.setTextColor(0x7fffffff); textView.setGravity(Gravity.CENTER); - numbersFrameLayout.addView(textView); - layoutParams = (LayoutParams) textView.getLayoutParams(); - layoutParams.width = AndroidUtilities.dp(50); - layoutParams.height = AndroidUtilities.dp(20); - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - textView.setLayoutParams(layoutParams); + numbersFrameLayout.addView(textView, LayoutHelper.createFrame(50, 50, Gravity.TOP | Gravity.LEFT)); textView.setImportantForAccessibility(IMPORTANT_FOR_ACCESSIBILITY_NO); switch (a) { case 0: @@ -674,13 +669,16 @@ public class PasscodeView extends FrameLayout { eraseView = new ImageView(context); eraseView.setScaleType(ImageView.ScaleType.CENTER); eraseView.setImageResource(R.drawable.passcode_delete); - numbersFrameLayout.addView(eraseView); - layoutParams = (LayoutParams) eraseView.getLayoutParams(); - layoutParams.width = AndroidUtilities.dp(50); - layoutParams.height = AndroidUtilities.dp(50); - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - eraseView.setLayoutParams(layoutParams); - for (int a = 0; a < 11; a++) { + numbersFrameLayout.addView(eraseView, LayoutHelper.createFrame(50, 50, Gravity.TOP | Gravity.LEFT)); + + fingerprintView = new ImageView(context); + fingerprintView.setScaleType(ImageView.ScaleType.CENTER); + fingerprintView.setImageResource(R.drawable.fingerprint); + fingerprintView.setVisibility(GONE); + numbersFrameLayout.addView(fingerprintView, LayoutHelper.createFrame(50, 50, Gravity.TOP | Gravity.LEFT)); + checkFingerprintButton(); + + for (int a = 0; a < 12; a++) { FrameLayout frameLayout = new FrameLayout(context) { @Override public void onInitializeAccessibilityNodeInfo(AccessibilityNodeInfo info) { @@ -690,10 +688,16 @@ public class PasscodeView extends FrameLayout { }; frameLayout.setBackgroundResource(R.drawable.bar_selector_lock); frameLayout.setTag(a); - if (a == 10) { + if (a == 11) { + frameLayout.setContentDescription(LocaleController.getString("AccDescrFingerprint", R.string.AccDescrFingerprint)); + setNextFocus(frameLayout, R.id.passcode_btn_0); + } else if (a == 10) { frameLayout.setOnLongClickListener(v -> { passwordEditText.setText(""); passwordEditText2.eraseAllCharacters(true); + if (backgroundDrawable instanceof MotionBackgroundDrawable) { + ((MotionBackgroundDrawable) backgroundDrawable).switchToPrevPosition(true); + } return true; }); frameLayout.setContentDescription(LocaleController.getString("AccDescrBackspace", R.string.AccDescrBackspace)); @@ -703,7 +707,11 @@ public class PasscodeView extends FrameLayout { if (a == 0) { setNextFocus(frameLayout, R.id.passcode_btn_backspace); } else if (a == 9) { - setNextFocus(frameLayout, R.id.passcode_btn_0); + if (fingerprintView.getVisibility() == View.VISIBLE) { + setNextFocus(frameLayout, R.id.passcode_btn_fingerprint); + } else { + setNextFocus(frameLayout, R.id.passcode_btn_0); + } } else { setNextFocus(frameLayout, ids[a + 1]); } @@ -711,6 +719,7 @@ public class PasscodeView extends FrameLayout { frameLayout.setId(ids[a]); frameLayout.setOnClickListener(v -> { int tag = (Integer) v.getTag(); + boolean erased = false; switch (tag) { case 0: passwordEditText2.appendCharacter("0"); @@ -743,23 +752,32 @@ public class PasscodeView extends FrameLayout { passwordEditText2.appendCharacter("9"); break; case 10: - passwordEditText2.eraseLastCharacter(); + erased = passwordEditText2.eraseLastCharacter(); + break; + case 11: + checkFingerprint(); break; } if (passwordEditText2.length() == 4) { processDone(false); } + if (tag == 11) { + + } else if (tag == 10) { + if (erased && backgroundDrawable instanceof MotionBackgroundDrawable) { + ((MotionBackgroundDrawable) backgroundDrawable).switchToPrevPosition(true); + } + } else { + if (backgroundDrawable instanceof MotionBackgroundDrawable) { + ((MotionBackgroundDrawable) backgroundDrawable).switchToNextPosition(true); + } + } }); numberFrameLayouts.add(frameLayout); } - for (int a = 10; a >= 0; a--) { + for (int a = 11; a >= 0; a--) { FrameLayout frameLayout = numberFrameLayouts.get(a); - numbersFrameLayout.addView(frameLayout); - layoutParams = (LayoutParams) frameLayout.getLayoutParams(); - layoutParams.width = AndroidUtilities.dp(100); - layoutParams.height = AndroidUtilities.dp(100); - layoutParams.gravity = Gravity.TOP | Gravity.LEFT; - frameLayout.setLayoutParams(layoutParams); + numbersFrameLayout.addView(frameLayout, LayoutHelper.createFrame(100, 100, Gravity.TOP | Gravity.LEFT)); } } @@ -797,6 +815,9 @@ public class PasscodeView extends FrameLayout { passwordEditText.setText(""); passwordEditText2.eraseAllCharacters(true); onPasscodeError(); + if (backgroundDrawable instanceof MotionBackgroundDrawable) { + ((MotionBackgroundDrawable) backgroundDrawable).rotatePreview(true); + } return; } } @@ -804,19 +825,6 @@ public class PasscodeView extends FrameLayout { passwordEditText.clearFocus(); AndroidUtilities.hideKeyboard(passwordEditText); - AnimatorSet AnimatorSet = new AnimatorSet(); - AnimatorSet.setDuration(200); - AnimatorSet.playTogether( - ObjectAnimator.ofFloat(this, View.TRANSLATION_Y, AndroidUtilities.dp(20)), - ObjectAnimator.ofFloat(this, View.ALPHA, AndroidUtilities.dp(0.0f))); - AnimatorSet.addListener(new AnimatorListenerAdapter() { - @Override - public void onAnimationEnd(Animator animation) { - setVisibility(View.GONE); - } - }); - AnimatorSet.start(); - SharedConfig.appLocked = false; SharedConfig.saveConfig(); NotificationCenter.getGlobalInstance().postNotificationName(NotificationCenter.didSetPasscode); @@ -824,6 +832,21 @@ public class PasscodeView extends FrameLayout { if (delegate != null) { delegate.didAcceptedPassword(); } + + AndroidUtilities.runOnUIThread(() -> { + AnimatorSet AnimatorSet = new AnimatorSet(); + AnimatorSet.setDuration(200); + AnimatorSet.playTogether( + ObjectAnimator.ofFloat(this, View.TRANSLATION_Y, AndroidUtilities.dp(20)), + ObjectAnimator.ofFloat(this, View.ALPHA, AndroidUtilities.dp(0.0f))); + AnimatorSet.addListener(new AnimatorListenerAdapter() { + @Override + public void onAnimationEnd(Animator animation) { + setVisibility(View.GONE); + } + }); + AnimatorSet.start(); + }); } private void shakeTextView(final float x, final int num) { @@ -938,8 +961,11 @@ public class PasscodeView extends FrameLayout { } private void checkFingerprint() { + if (Build.VERSION.SDK_INT < 23) { + return; + } Activity parentActivity = (Activity) getContext(); - if (Build.VERSION.SDK_INT >= 23 && parentActivity != null && SharedConfig.useFingerprint && !ApplicationLoader.mainInterfacePaused) { + if (parentActivity != null && fingerprintView.getVisibility() == VISIBLE && !ApplicationLoader.mainInterfacePaused) { if (Build.VERSION.SDK_INT >= 28) { try { boolean useBiometric; @@ -1047,7 +1073,16 @@ public class PasscodeView extends FrameLayout { fingerprintManager.authenticate(null, 0, cancellationSignal, new FingerprintManagerCompat.AuthenticationCallback() { @Override public void onAuthenticationError(int errMsgId, CharSequence errString) { - if (!selfCancelled && errMsgId != 5) { + if (errMsgId == 10) { + try { + if (fingerprintDialog.isShowing()) { + fingerprintDialog.dismiss(); + } + } catch (Exception e) { + FileLog.e(e); + } + fingerprintDialog = null; + } else if (!selfCancelled && errMsgId != 5) { showFingerprintError(errString); } } @@ -1082,11 +1117,40 @@ public class PasscodeView extends FrameLayout { } } - public void onShow() { + public void onShow(boolean fingerprint, boolean animated) { + onShow(fingerprint, animated, -1, -1, null, null); + } + + private void checkFingerprintButton() { + Activity parentActivity = (Activity) getContext(); + if (Build.VERSION.SDK_INT >= 23 && parentActivity != null && SharedConfig.useFingerprint) { + try { + if (fingerprintDialog != null && fingerprintDialog.isShowing()) { + return; + } + } catch (Exception e) { + FileLog.e(e); + } + try { + FingerprintManagerCompat fingerprintManager = FingerprintManagerCompat.from(ApplicationLoader.applicationContext); + if (fingerprintManager.isHardwareDetected() && fingerprintManager.hasEnrolledFingerprints()) { + fingerprintView.setVisibility(VISIBLE); + } + } catch (Throwable e) { + FileLog.e(e); + } + } + if (SharedConfig.passcodeType == 1) { + fingerprintImage.setVisibility(fingerprintView.getVisibility()); + } + } + + public void onShow(boolean fingerprint, boolean animated, int x, int y, Runnable onShow, Runnable onStart) { + checkFingerprintButton(); checkRetryTextView(); Activity parentActivity = (Activity) getContext(); if (SharedConfig.passcodeType == 1) { - if (retryTextView.getVisibility() != VISIBLE && passwordEditText != null) { + if (!animated && retryTextView.getVisibility() != VISIBLE && passwordEditText != null) { passwordEditText.requestFocus(); AndroidUtilities.showKeyboard(passwordEditText); } @@ -1099,33 +1163,55 @@ public class PasscodeView extends FrameLayout { } } } - if (retryTextView.getVisibility() != VISIBLE) { + if (fingerprint && retryTextView.getVisibility() != VISIBLE) { checkFingerprint(); } if (getVisibility() == View.VISIBLE) { return; } - setAlpha(1.0f); setTranslationY(0); - if (Theme.isCustomTheme()) { + backgroundDrawable = null; + if (Theme.getCachedWallpaper() instanceof MotionBackgroundDrawable) { backgroundDrawable = Theme.getCachedWallpaper(); backgroundFrameLayout.setBackgroundColor(0xbf000000); + } else if (Theme.isCustomTheme() && !"CJz3BZ6YGEYBAAAABboWp6SAv04".equals(Theme.getSelectedBackgroundSlug()) && !"qeZWES8rGVIEAAAARfWlK1lnfiI".equals(Theme.getSelectedBackgroundSlug())) { + backgroundDrawable = Theme.getCurrentGradientWallpaper(); + if (backgroundDrawable == null) { + backgroundDrawable = Theme.getCachedWallpaper(); + } + if (backgroundDrawable instanceof BackgroundGradientDrawable) { + backgroundFrameLayout.setBackgroundColor(0x22000000); + } else { + backgroundFrameLayout.setBackgroundColor(0xbf000000); + } } else { String selectedBackgroundSlug = Theme.getSelectedBackgroundSlug(); - if (Theme.DEFAULT_BACKGROUND_SLUG.equals(selectedBackgroundSlug)) { + if (Theme.DEFAULT_BACKGROUND_SLUG.equals(selectedBackgroundSlug) || Theme.isPatternWallpaper()) { backgroundFrameLayout.setBackgroundColor(0xff517c9e); } else { backgroundDrawable = Theme.getCachedWallpaper(); - if (backgroundDrawable != null) { + if (backgroundDrawable instanceof BackgroundGradientDrawable) { + backgroundFrameLayout.setBackgroundColor(0x22000000); + } else if (backgroundDrawable != null) { backgroundFrameLayout.setBackgroundColor(0xbf000000); } else { backgroundFrameLayout.setBackgroundColor(0xff517c9e); } } } + if (backgroundDrawable instanceof MotionBackgroundDrawable) { + MotionBackgroundDrawable drawable = (MotionBackgroundDrawable) backgroundDrawable; + int[] colors = drawable.getColors(); + backgroundDrawable = new MotionBackgroundDrawable(colors[0], colors[1], colors[2], colors[3], false); + if (drawable.hasPattern() && drawable.getIntensity() < 0) { + backgroundFrameLayout.setBackgroundColor(0x7f000000); + } else { + backgroundFrameLayout.setBackgroundColor(0x22000000); + } + ((MotionBackgroundDrawable) backgroundDrawable).setParentView(backgroundFrameLayout); + } - passcodeTextView.setText(LocaleController.getString("EnterYourPasscode", R.string.EnterYourPasscode)); - + passcodeTextView.setText(LocaleController.getString("EnterYourTelegramPasscode", R.string.EnterYourTelegramPasscode)); if (SharedConfig.passcodeType == 0) { if (retryTextView.getVisibility() != VISIBLE) { @@ -1134,6 +1220,7 @@ public class PasscodeView extends FrameLayout { passwordEditText.setVisibility(GONE); passwordEditText2.setVisibility(VISIBLE); checkImage.setVisibility(GONE); + fingerprintImage.setVisibility(GONE); } else if (SharedConfig.passcodeType == 1) { passwordEditText.setFilters(new InputFilter[0]); passwordEditText.setInputType(InputType.TYPE_CLASS_TEXT | InputType.TYPE_TEXT_VARIATION_PASSWORD); @@ -1143,11 +1230,150 @@ public class PasscodeView extends FrameLayout { passwordEditText.setVisibility(VISIBLE); passwordEditText2.setVisibility(GONE); checkImage.setVisibility(VISIBLE); + fingerprintImage.setVisibility(fingerprintView.getVisibility()); } setVisibility(VISIBLE); passwordEditText.setTransformationMethod(PasswordTransformationMethod.getInstance()); passwordEditText.setText(""); passwordEditText2.eraseAllCharacters(false); + if (animated) { + setAlpha(0.0f); + getViewTreeObserver().addOnGlobalLayoutListener(new ViewTreeObserver.OnGlobalLayoutListener() { + @Override + public void onGlobalLayout() { + setAlpha(1.0f); + getViewTreeObserver().removeOnGlobalLayoutListener(this); + imageView.setProgress(0); + imageView.playAnimation(); + AndroidUtilities.runOnUIThread(() -> imageView.performHapticFeedback(HapticFeedbackConstants.KEYBOARD_TAP, HapticFeedbackConstants.FLAG_IGNORE_GLOBAL_SETTING), 350); + AnimatorSet animatorSet = new AnimatorSet(); + ArrayList animators = new ArrayList<>(); + int w = AndroidUtilities.displaySize.x; + int h = AndroidUtilities.displaySize.y + (Build.VERSION.SDK_INT >= 21 ? AndroidUtilities.statusBarHeight : 0); + if (Build.VERSION.SDK_INT >= 21) { + double d1 = Math.sqrt((w - x) * (w - x) + (h - y) * (h - y)); + double d2 = Math.sqrt(x * x + (h - y) * (h - y)); + double d3 = Math.sqrt(x * x + y * y); + double d4 = Math.sqrt((w - x) * (w - x) + y * y); + double finalRadius = Math.max(Math.max(Math.max(d1, d2), d3), d4); + + innerAnimators.clear(); + + for (int a = -1, N = numbersFrameLayout.getChildCount(); a < N; a++) { + View child; + if (a == -1) { + child = passcodeTextView; + } else { + child = numbersFrameLayout.getChildAt(a); + } + if (!(child instanceof TextView || child instanceof ImageView)) { + continue; + } + child.setScaleX(0.7f); + child.setScaleY(0.7f); + child.setAlpha(0.0f); + InnerAnimator innerAnimator = new InnerAnimator(); + child.getLocationInWindow(pos); + int buttonX = pos[0] + child.getMeasuredWidth() / 2; + int buttonY = pos[1] + child.getMeasuredHeight() / 2; + innerAnimator.startRadius = (float) Math.sqrt((x - buttonX) * (x - buttonX) + (y - buttonY) * (y - buttonY)) - AndroidUtilities.dp(40); + + AnimatorSet animatorSetInner; + if (a != -1) { + animatorSetInner = new AnimatorSet(); + animatorSetInner.playTogether( + ObjectAnimator.ofFloat(child, View.SCALE_X, 1.0f), + ObjectAnimator.ofFloat(child, View.SCALE_Y, 1.0f)); + animatorSetInner.setDuration(140); + animatorSetInner.setInterpolator(new DecelerateInterpolator()); + } else { + animatorSetInner = null; + } + + innerAnimator.animatorSet = new AnimatorSet(); + innerAnimator.animatorSet.playTogether(ObjectAnimator.ofFloat(child, View.SCALE_X, a == -1 ? 0.9f : 0.6f, a == -1 ? 1.0f : 1.04f), + ObjectAnimator.ofFloat(child, View.SCALE_Y, a == -1 ? 0.9f : 0.6f, a == -1 ? 1.0f : 1.04f), + ObjectAnimator.ofFloat(child, View.ALPHA, 0.0f, 1.0f)); + innerAnimator.animatorSet.addListener(new AnimatorListenerAdapter() { + @Override + public void onAnimationEnd(Animator animation) { + if (animatorSetInner != null) { + animatorSetInner.start(); + } + } + }); + innerAnimator.animatorSet.setDuration(a == -1 ? 232 : 200); + innerAnimator.animatorSet.setInterpolator(new DecelerateInterpolator()); + innerAnimators.add(innerAnimator); + } + + animators.add(ViewAnimationUtils.createCircularReveal(backgroundFrameLayout, x, y, 0, (float) finalRadius)); + ValueAnimator animator = ValueAnimator.ofFloat(0, 1f); + animators.add(animator); + animator.addUpdateListener(animation -> { + float fraction = animation.getAnimatedFraction(); + double rad = finalRadius * fraction; + + for (int a = 0; a < innerAnimators.size(); a++) { + InnerAnimator innerAnimator = innerAnimators.get(a); + if (innerAnimator.startRadius > rad) { + continue; + } + innerAnimator.animatorSet.start(); + innerAnimators.remove(a); + a--; + } + }); + animatorSet.setInterpolator(Easings.easeInOutQuad); + animatorSet.setDuration(498); + } else { + animators.add(ObjectAnimator.ofFloat(backgroundFrameLayout, View.ALPHA, 0.0f, 1.0f)); + animatorSet.setDuration(350); + } + animatorSet.playTogether(animators); + animatorSet.addListener(new AnimatorListenerAdapter() { + @Override + public void onAnimationEnd(Animator animation) { + if (onShow != null) { + onShow.run(); + } + if (SharedConfig.passcodeType == 1 && retryTextView.getVisibility() != VISIBLE && passwordEditText != null) { + passwordEditText.requestFocus(); + AndroidUtilities.showKeyboard(passwordEditText); + } + } + }); + animatorSet.start(); + + AnimatorSet animatorSet2 = new AnimatorSet(); + animatorSet2.setDuration(332); + + float ix; + if (!AndroidUtilities.isTablet() && getContext().getResources().getConfiguration().orientation == Configuration.ORIENTATION_LANDSCAPE) { + ix = (SharedConfig.passcodeType == 0 ? w / 2 : w) / 2 - AndroidUtilities.dp(30); + } else { + ix = w / 2 - AndroidUtilities.dp(29); + } + + animatorSet2.playTogether(ObjectAnimator.ofFloat(imageView, View.TRANSLATION_X, x - AndroidUtilities.dp(29), ix), + ObjectAnimator.ofFloat(imageView, View.TRANSLATION_Y, y - AndroidUtilities.dp(29), imageY), + ObjectAnimator.ofFloat(imageView, View.SCALE_X, 0.5f, 1.0f), + ObjectAnimator.ofFloat(imageView, View.SCALE_Y, 0.5f, 1.0f)); + animatorSet2.setInterpolator(CubicBezierInterpolator.EASE_OUT); + animatorSet2.start(); + } + }); + requestLayout(); + } else { + setAlpha(1.0f); + imageView.setScaleX(1.0f); + imageView.setScaleY(1.0f); + imageView.stopAnimation(); + imageView.getAnimatedDrawable().setCurrentFrame(38, false); + if (onShow != null) { + onShow.run(); + } + } setOnTouchListener((v, event) -> true); } @@ -1168,10 +1394,12 @@ public class PasscodeView extends FrameLayout { LayoutParams layoutParams; if (!AndroidUtilities.isTablet() && getContext().getResources().getConfiguration().orientation == Configuration.ORIENTATION_LANDSCAPE) { + imageView.setTranslationX((SharedConfig.passcodeType == 0 ? width / 2 : width) / 2 - AndroidUtilities.dp(29)); + layoutParams = (LayoutParams) passwordFrameLayout.getLayoutParams(); layoutParams.width = SharedConfig.passcodeType == 0 ? width / 2 : width; layoutParams.height = AndroidUtilities.dp(140); - layoutParams.topMargin = (height - AndroidUtilities.dp(140)) / 2; + layoutParams.topMargin = (height - AndroidUtilities.dp(140)) / 2 + (SharedConfig.passcodeType == 0 ? AndroidUtilities.dp(40) : 0); passwordFrameLayout.setLayoutParams(layoutParams); layoutParams = (LayoutParams) numbersFrameLayout.getLayoutParams(); @@ -1181,6 +1409,8 @@ public class PasscodeView extends FrameLayout { layoutParams.width = width / 2; numbersFrameLayout.setLayoutParams(layoutParams); } else { + imageView.setTranslationX(width / 2 - AndroidUtilities.dp(29)); + int top = 0; int left = 0; if (AndroidUtilities.isTablet()) { @@ -1194,7 +1424,7 @@ public class PasscodeView extends FrameLayout { } } layoutParams = (LayoutParams) passwordFrameLayout.getLayoutParams(); - layoutParams.height = height / 3; + layoutParams.height = height / 3 + (SharedConfig.passcodeType == 0 ? AndroidUtilities.dp(40) : 0); layoutParams.width = width; layoutParams.topMargin = top; layoutParams.leftMargin = left; @@ -1202,9 +1432,13 @@ public class PasscodeView extends FrameLayout { passwordFrameLayout.setLayoutParams(layoutParams); layoutParams = (LayoutParams) numbersFrameLayout.getLayoutParams(); - layoutParams.height = height / 3 * 2; + layoutParams.height = height / 3 * 2 + AndroidUtilities.dp(20); layoutParams.leftMargin = left; - layoutParams.topMargin = height - layoutParams.height + top; + if (AndroidUtilities.isTablet()) { + layoutParams.topMargin = height - layoutParams.height + top; + } else { + layoutParams.topMargin = height - layoutParams.height + top + (SharedConfig.passcodeType == 0 ? AndroidUtilities.dp(40) : 0); + } layoutParams.width = width; numbersFrameLayout.setLayoutParams(layoutParams); } @@ -1212,13 +1446,15 @@ public class PasscodeView extends FrameLayout { int sizeBetweenNumbersX = (layoutParams.width - AndroidUtilities.dp(50) * 3) / 4; int sizeBetweenNumbersY = (layoutParams.height - AndroidUtilities.dp(50) * 4) / 5; - for (int a = 0; a < 11; a++) { + for (int a = 0; a < 12; a++) { LayoutParams layoutParams1; int num; if (a == 0) { num = 10; } else if (a == 10) { num = 11; + } else if (a == 11) { + num = 9; } else { num = a - 1; } @@ -1235,12 +1471,18 @@ public class PasscodeView extends FrameLayout { layoutParams1.topMargin += AndroidUtilities.dp(40); textView.setLayoutParams(layoutParams); textView1.setLayoutParams(layoutParams1); - } else { + } else if (a == 10) { layoutParams = (LayoutParams) eraseView.getLayoutParams(); top = layoutParams.topMargin = sizeBetweenNumbersY + (sizeBetweenNumbersY + AndroidUtilities.dp(50)) * row + AndroidUtilities.dp(8); layoutParams.leftMargin = sizeBetweenNumbersX + (sizeBetweenNumbersX + AndroidUtilities.dp(50)) * col; top -= AndroidUtilities.dp(8); eraseView.setLayoutParams(layoutParams); + } else { + layoutParams = (LayoutParams) fingerprintView.getLayoutParams(); + top = layoutParams.topMargin = sizeBetweenNumbersY + (sizeBetweenNumbersY + AndroidUtilities.dp(50)) * row + AndroidUtilities.dp(8); + layoutParams.leftMargin = sizeBetweenNumbersX + (sizeBetweenNumbersX + AndroidUtilities.dp(50)) * col; + top -= AndroidUtilities.dp(8); + fingerprintView.setLayoutParams(layoutParams); } FrameLayout frameLayout = numberFrameLayouts.get(a); @@ -1253,6 +1495,8 @@ public class PasscodeView extends FrameLayout { super.onMeasure(widthMeasureSpec, heightMeasureSpec); } + private int[] pos = new int[2]; + @Override protected void onLayout(boolean changed, int left, int top, int right, int bottom) { View rootView = getRootView(); @@ -1271,30 +1515,12 @@ public class PasscodeView extends FrameLayout { } super.onLayout(changed, left, top, right, bottom); - } - @Override - protected void onDraw(Canvas canvas) { - if (getVisibility() != VISIBLE) { - return; - } - if (backgroundDrawable != null) { - if (backgroundDrawable instanceof MotionBackgroundDrawable || backgroundDrawable instanceof ColorDrawable || backgroundDrawable instanceof GradientDrawable) { - backgroundDrawable.setBounds(0, 0, getMeasuredWidth(), getMeasuredHeight()); - backgroundDrawable.draw(canvas); - } else { - float scaleX = (float) getMeasuredWidth() / (float) backgroundDrawable.getIntrinsicWidth(); - float scaleY = (float) (getMeasuredHeight() + keyboardHeight) / (float) backgroundDrawable.getIntrinsicHeight(); - float scale = scaleX < scaleY ? scaleY : scaleX; - int width = (int) Math.ceil(backgroundDrawable.getIntrinsicWidth() * scale); - int height = (int) Math.ceil(backgroundDrawable.getIntrinsicHeight() * scale); - int x = (getMeasuredWidth() - width) / 2; - int y = (getMeasuredHeight() - height + keyboardHeight) / 2; - backgroundDrawable.setBounds(x, y, x + width, y + height); - backgroundDrawable.draw(canvas); - } + passcodeTextView.getLocationInWindow(pos); + if (!AndroidUtilities.isTablet() && getContext().getResources().getConfiguration().orientation == Configuration.ORIENTATION_LANDSCAPE) { + imageView.setTranslationY(imageY = pos[1] - AndroidUtilities.dp(100)); } else { - super.onDraw(canvas); + imageView.setTranslationY(imageY = pos[1] - AndroidUtilities.dp(100)); } } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/PollVotesAlert.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/PollVotesAlert.java index 9c6956bf7..074914030 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/PollVotesAlert.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/PollVotesAlert.java @@ -40,7 +40,6 @@ import android.widget.FrameLayout; import android.widget.TextView; import org.telegram.messenger.AndroidUtilities; -import org.telegram.messenger.ChatObject; import org.telegram.messenger.Emoji; import org.telegram.messenger.LocaleController; import org.telegram.messenger.MessageObject; @@ -448,21 +447,7 @@ public class PollVotesAlert extends BottomSheet { TLRPC.TL_messageMediaPoll mediaPoll = (TLRPC.TL_messageMediaPoll) messageObject.messageOwner.media; poll = mediaPoll.poll; Context context = parentFragment.getParentActivity(); - - TLRPC.Chat chat = parentFragment.getCurrentChat(); - TLRPC.User user = parentFragment.getCurrentUser(); - if (ChatObject.isChannel(chat)) { - peer = new TLRPC.TL_inputPeerChannel(); - peer.channel_id = chat.id; - peer.access_hash = chat.access_hash; - } else if (chat != null) { - peer = new TLRPC.TL_inputPeerChat(); - peer.chat_id = chat.id; - } else { - peer = new TLRPC.TL_inputPeerUser(); - peer.user_id = user.id; - peer.access_hash = user.access_hash; - } + peer = parentFragment.getMessagesController().getInputPeer((int) message.getDialogId()); ArrayList loadedVoters = new ArrayList<>(); int count = mediaPoll.results.results.size(); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/RecyclerListView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/RecyclerListView.java index 979191a98..0a5629369 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/RecyclerListView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/RecyclerListView.java @@ -25,6 +25,7 @@ import android.os.SystemClock; import android.text.Layout; import android.text.StaticLayout; import android.text.TextPaint; +import android.util.SparseBooleanArray; import android.util.SparseIntArray; import android.util.StateSet; import android.view.GestureDetector; @@ -47,6 +48,7 @@ import org.telegram.ui.ActionBar.Theme; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.util.ArrayList; +import java.util.HashSet; import androidx.recyclerview.widget.LinearLayoutManager; import androidx.recyclerview.widget.RecyclerView; @@ -126,6 +128,20 @@ public class RecyclerListView extends RecyclerView { private int emptyViewAnimationType; private int selectorRadius; private int topBottomSelectorRadius; + private int touchSlop; + + boolean useRelativePositions; + boolean multiSelectionGesture; + boolean multiSelectionGestureStarted; + int startSelectionFrom; + int currentSelectedPosition; + onMultiSelectionChanged multiSelectionListener; + boolean multiselectScrollRunning; + boolean multiselectScrollToTop; + float lastX = Float.MAX_VALUE; + float lastY = Float.MAX_VALUE; + int[] listPaddings; + HashSet selectedPositions; public interface OnItemClickListener { void onItemClick(View view, int position); @@ -953,6 +969,7 @@ public class RecyclerListView extends RecyclerView { fastScroll.getLayoutParams().height = height; fastScroll.measure(MeasureSpec.makeMeasureSpec(AndroidUtilities.dp(132), MeasureSpec.EXACTLY), MeasureSpec.makeMeasureSpec(height, MeasureSpec.EXACTLY)); } + touchSlop = ViewConfiguration.get(getContext()).getScaledTouchSlop(); } @Override @@ -1913,4 +1930,169 @@ public class RecyclerListView extends RecyclerView { fastScroll.setTranslationY(translationY); } } + + public void startMultiselect(int positionFrom, boolean useRelativePositions, onMultiSelectionChanged multiSelectionListener) { + if (!multiSelectionGesture) { + listPaddings = new int[2]; + selectedPositions = new HashSet<>(); + + getParent().requestDisallowInterceptTouchEvent(true); + + this.multiSelectionListener = multiSelectionListener; + multiSelectionGesture = true; + startSelectionFrom = currentSelectedPosition = positionFrom; + } + this.useRelativePositions = useRelativePositions; + } + + + @Override + public boolean onTouchEvent(MotionEvent e) { + if (multiSelectionGesture && e.getAction() != MotionEvent.ACTION_UP && e.getAction() != MotionEvent.ACTION_CANCEL) { + if (lastX == Float.MAX_VALUE && lastY == Float.MAX_VALUE) { + lastX = e.getX(); + lastY = e.getY(); + } + if (!multiSelectionGestureStarted && Math.abs(e.getY() - lastY) > touchSlop) { + multiSelectionGestureStarted = true; + getParent().requestDisallowInterceptTouchEvent(true); + } + if (multiSelectionGestureStarted) { + chekMultiselect(e.getX(), e.getY()); + multiSelectionListener.getPaddings(listPaddings); + if (e.getY() > getMeasuredHeight() - AndroidUtilities.dp(56) - listPaddings[1] && !(currentSelectedPosition < startSelectionFrom && multiSelectionListener.limitReached())) { + startMultiselectScroll(false); + } else if (e.getY() < AndroidUtilities.dp(56) + listPaddings[0] && !(currentSelectedPosition > startSelectionFrom && multiSelectionListener.limitReached())) { + startMultiselectScroll(true); + } else { + cancelMultiselectScroll(); + } + } + return true; + } + lastX = Float.MAX_VALUE; + lastY = Float.MAX_VALUE; + multiSelectionGesture = false; + multiSelectionGestureStarted = false; + getParent().requestDisallowInterceptTouchEvent(false); + cancelMultiselectScroll(); + return super.onTouchEvent(e); + } + + private boolean chekMultiselect(float x, float y) { + y = Math.min(getMeasuredHeight() - listPaddings[1], Math.max(y, listPaddings[0])); + x = Math.min(getMeasuredWidth(), Math.max(x, 0)); + for (int i = 0; i < getChildCount(); i++) { + multiSelectionListener.getPaddings(listPaddings); + if (useRelativePositions) { + + } else { + View child = getChildAt(i); + AndroidUtilities.rectTmp.set(child.getLeft(), child.getTop(), child.getLeft() + child.getMeasuredWidth(), child.getTop() + child.getMeasuredHeight()); + + if (AndroidUtilities.rectTmp.contains(x, y)) { + int position = getChildLayoutPosition(child); + + if (currentSelectedPosition != position) { + boolean selectionFromTop = currentSelectedPosition > startSelectionFrom || position > startSelectionFrom; + position = multiSelectionListener.checkPosition(position, selectionFromTop); + + if (selectionFromTop) { + if (position > currentSelectedPosition) { + if (!multiSelectionListener.limitReached()) { + for (int k = currentSelectedPosition + 1; k <= position; k++) { + if (k == startSelectionFrom) { + continue; + } + if (multiSelectionListener.canSelect(k)) { + multiSelectionListener.onSelectionChanged(k, true, x, y); + } + } + } + } else { + for (int k = currentSelectedPosition; k > position; k--) { + if (k == startSelectionFrom) { + continue; + } + if (multiSelectionListener.canSelect(k)) { + multiSelectionListener.onSelectionChanged(k, false, x, y); + } + } + } + } else { + if (position > currentSelectedPosition) { + for (int k = currentSelectedPosition; k < position; k++) { + if (k == startSelectionFrom) { + continue; + } + if (multiSelectionListener.canSelect(k)) { + multiSelectionListener.onSelectionChanged(k, false, x, y); + } + } + } else { + if (!multiSelectionListener.limitReached()) { + for (int k = currentSelectedPosition - 1; k >= position; k--) { + if (k == startSelectionFrom) { + continue; + } + if (multiSelectionListener.canSelect(k)) { + multiSelectionListener.onSelectionChanged(k, true, x, y); + } + } + } + } + } + } + if (!multiSelectionListener.limitReached()) { + currentSelectedPosition = position; + } + break; + } + } + + } + return true; + } + + private void cancelMultiselectScroll() { + multiselectScrollRunning = false; + AndroidUtilities.cancelRunOnUIThread(scroller); + } + + Runnable scroller = new Runnable() { + @Override + public void run() { + int dy; + multiSelectionListener.getPaddings(listPaddings); + if (multiselectScrollToTop) { + dy = -AndroidUtilities.dp(12f); + chekMultiselect(0, listPaddings[0]); + } else { + dy = AndroidUtilities.dp(12f); + chekMultiselect(0, getMeasuredHeight() - listPaddings[1]); + } + multiSelectionListener.scrollBy(dy); + if (multiselectScrollRunning) { + AndroidUtilities.runOnUIThread(scroller); + } + } + }; + + private void startMultiselectScroll(boolean top) { + multiselectScrollToTop = top; + if (!multiselectScrollRunning) { + multiselectScrollRunning = true; + AndroidUtilities.cancelRunOnUIThread(scroller); + AndroidUtilities.runOnUIThread(scroller); + } + } + + public interface onMultiSelectionChanged { + void onSelectionChanged(int position, boolean selected, float x, float y); + boolean canSelect(int position); + int checkPosition(int position, boolean selectionFromTop); + boolean limitReached(); + void getPaddings(int paddings[]); + void scrollBy(int dy); + } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/ShareAlert.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/ShareAlert.java index 9a2153e68..3c2fefa99 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/ShareAlert.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/ShareAlert.java @@ -56,6 +56,8 @@ import androidx.core.view.ViewCompat; import androidx.recyclerview.widget.GridLayoutManager; import androidx.recyclerview.widget.RecyclerView; +import com.google.android.exoplayer2.util.Log; + import org.telegram.SQLite.SQLiteCursor; import org.telegram.messenger.AccountInstance; import org.telegram.messenger.AndroidUtilities; @@ -149,6 +151,8 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi private int topOffset; + RecyclerItemsEnterAnimator recyclerItemsEnterAnimator; + public interface ShareAlertDelegate { default void didShare() { @@ -864,6 +868,19 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi frameLayout.addView(searchView, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, 58, Gravity.BOTTOM | Gravity.LEFT)); gridView = new RecyclerListView(context) { + + @Override + protected void dispatchDraw(Canvas canvas) { + recyclerItemsEnterAnimator.dispatchDraw(); + super.dispatchDraw(canvas); + } + + @Override + protected void onDetachedFromWindow() { + super.onDetachedFromWindow(); + recyclerItemsEnterAnimator.onDetached(); + } + @Override protected boolean allowSelectChildAtPosition(float x, float y) { return y >= AndroidUtilities.dp(darkTheme && linkToCopy[1] != null ? 111 : 58) + (Build.VERSION.SDK_INT >= 21 ? AndroidUtilities.statusBarHeight : 0); @@ -880,6 +897,7 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi gridView.setPadding(0, 0, 0, AndroidUtilities.dp(48)); gridView.setClipToPadding(false); gridView.setLayoutManager(layoutManager = new GridLayoutManager(getContext(), 4)); + recyclerItemsEnterAnimator = new RecyclerItemsEnterAnimator(gridView, true); layoutManager.setSpanSizeLookup(new GridLayoutManager.SpanSizeLookup() { @Override public int getSpanSize(int position) { @@ -978,11 +996,14 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi } }); - searchEmptyView = new EmptyTextProgressView(context); + FlickerLoadingView flickerLoadingView = new FlickerLoadingView(context); + flickerLoadingView.setViewType(FlickerLoadingView.SHARE_ALERT_TYPE); + searchEmptyView = new EmptyTextProgressView(context, flickerLoadingView); searchEmptyView.setShowAtCenter(true); searchEmptyView.showTextView(); searchEmptyView.setText(LocaleController.getString("NoChats", R.string.NoChats)); gridView.setEmptyView(searchEmptyView); + gridView.setAnimateEmptyView(true, 0); containerView.addView(searchEmptyView, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT, Gravity.TOP | Gravity.LEFT, 0, 52, 0, 0)); FrameLayout.LayoutParams frameLayoutParams = new FrameLayout.LayoutParams(LayoutHelper.MATCH_PARENT, AndroidUtilities.getShadowHeight(), Gravity.TOP | Gravity.LEFT); @@ -1664,7 +1685,6 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi private int lastReqId; private int lastSearchId; - private int waitingResponseCount; private int lastGlobalSearchId; private int lastLocalSearchId; @@ -1674,11 +1694,16 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi searchAdapterHelper.setDelegate(new SearchAdapterHelper.SearchAdapterHelperDelegate() { @Override public void onDataSetChanged(int searchId) { - waitingResponseCount--; lastGlobalSearchId = searchId; if (lastLocalSearchId != searchId) { searchResult.clear(); } + int oldItemsCount = lastItemCont; + if (getItemCount() == 0 && !searchAdapterHelper.isSearchInProgress() && !internalDialogsIsSearching) { + searchEmptyView.showTextView(); + } else { + recyclerItemsEnterAnimator.showItemsAnimated(oldItemsCount); + } notifyDataSetChanged(); } @@ -1689,6 +1714,7 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi }); } + boolean internalDialogsIsSearching = false; private void searchDialogsInternal(final String query, final int searchId) { MessagesStorage.getInstance(currentAccount).getStorageQueue().postRunnable(() -> { try { @@ -1889,13 +1915,14 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi if (searchId != lastSearchId) { return; } + int oldItemCount = getItemCount(); + internalDialogsIsSearching = false; lastLocalSearchId = searchId; if (lastGlobalSearchId != searchId) { searchAdapterHelper.clear(); } if (gridView.getAdapter() != searchAdapter) { topBeforeSwitch = getCurrentTop(); - gridView.setAdapter(searchAdapter); searchAdapter.notifyDataSetChanged(); } for (int a = 0; a < result.size(); a++) { @@ -1915,12 +1942,19 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi } searchResult = result; searchAdapterHelper.mergeResults(searchResult); + int oldItemsCount = lastItemCont; + if (getItemCount() == 0 && !searchAdapterHelper.isSearchInProgress() && !internalDialogsIsSearching) { + searchEmptyView.showTextView(); + } else { + recyclerItemsEnterAnimator.showItemsAnimated(oldItemsCount); + } notifyDataSetChanged(); if (!isEmpty && !becomeEmpty && topBeforeSwitch > 0) { layoutManager.scrollToPositionWithOffset(0, -topBeforeSwitch); topBeforeSwitch = -1000; } - searchEmptyView.showTextView(); + + }); } @@ -1937,15 +1971,21 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi AndroidUtilities.cancelRunOnUIThread(searchRunnable2); searchRunnable2 = null; } + searchResult.clear(); + searchAdapterHelper.mergeResults(null); + searchAdapterHelper.queryServerSearch(null, true, true, true, true, false, 0, false, 0, 0); + notifyDataSetChanged(); + if (TextUtils.isEmpty(query)) { - searchResult.clear(); - searchAdapterHelper.mergeResults(null); - searchAdapterHelper.queryServerSearch(null, true, true, true, true, false, 0, false, 0, 0); topBeforeSwitch = getCurrentTop(); lastSearchId = -1; - notifyDataSetChanged(); + internalDialogsIsSearching = false; + gridView.setAdapter(listAdapter); } else { + gridView.setAdapter(searchAdapter); + internalDialogsIsSearching = true; final int searchId = ++lastSearchId; + searchEmptyView.showProgress(false); Utilities.searchQueue.postRunnable(searchRunnable = () -> { searchRunnable = null; searchDialogsInternal(query, searchId); @@ -1960,6 +2000,8 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi } } + int lastItemCont; + @Override public int getItemCount() { int count = searchResult.size(); @@ -1967,7 +2009,7 @@ public class ShareAlert extends BottomSheet implements NotificationCenter.Notifi if (count != 0) { count++; } - return count; + return lastItemCont = count; } public TLRPC.Dialog getItem(int position) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/SharedMediaLayout.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/SharedMediaLayout.java index 894cde282..2b9500798 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/SharedMediaLayout.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/SharedMediaLayout.java @@ -46,8 +46,6 @@ import androidx.recyclerview.widget.GridLayoutManager; import androidx.recyclerview.widget.LinearLayoutManager; import androidx.recyclerview.widget.RecyclerView; -import com.google.android.exoplayer2.util.Log; - import org.telegram.messenger.AndroidUtilities; import org.telegram.messenger.ApplicationLoader; import org.telegram.messenger.ChatObject; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/UndoView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/UndoView.java index 9e5f32be9..5de30e1f4 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/UndoView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/UndoView.java @@ -12,6 +12,7 @@ import android.graphics.PorterDuff; import android.graphics.PorterDuffColorFilter; import android.graphics.RectF; import android.graphics.Typeface; +import android.graphics.drawable.Drawable; import android.os.SystemClock; import android.text.Layout; import android.text.Selection; @@ -37,6 +38,8 @@ import android.widget.TextView; import com.jakewharton.processphoenix.ProcessPhoenix; +import androidx.annotation.Keep; + import org.telegram.messenger.AndroidUtilities; import org.telegram.messenger.ChatObject; import org.telegram.messenger.ContactsController; @@ -169,12 +172,19 @@ public class UndoView extends FrameLayout { private CharSequence infoText; private int hideAnimationType = 1; + Drawable backgroundDrawable; public class LinkMovementMethodMy extends LinkMovementMethod { @Override public boolean onTouchEvent(TextView widget, Spannable buffer, MotionEvent event) { try { boolean result; + if (event.getAction() == MotionEvent.ACTION_DOWN) { + CharacterStyle[] links = buffer.getSpans(widget.getSelectionStart(), widget.getSelectionEnd(), CharacterStyle.class); + if (links == null || links.length == 0) { + return false; + } + } if (event.getAction() == MotionEvent.ACTION_UP) { CharacterStyle[] links = buffer.getSpans(widget.getSelectionStart(), widget.getSelectionEnd(), CharacterStyle.class); if (links != null && links.length > 0) { @@ -281,7 +291,8 @@ public class UndoView extends FrameLayout { textPaint.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); textPaint.setColor(Theme.getColor(Theme.key_undo_infoColor)); - setBackgroundDrawable(Theme.createRoundRectDrawable(AndroidUtilities.dp(6), Theme.getColor(Theme.key_undo_background))); + setWillNotDraw(false); + backgroundDrawable = Theme.createRoundRectDrawable(AndroidUtilities.dp(6), Theme.getColor(Theme.key_undo_background)); setOnTouchListener((v, event) -> true); @@ -289,7 +300,7 @@ public class UndoView extends FrameLayout { } public void setColors(int background, int text) { - Theme.setDrawableColor(getBackground(), background); + Theme.setDrawableColor(backgroundDrawable, background); infoTextView.setTextColor(text); subinfoTextView.setTextColor(text); leftImageView.setLayerColor("info1.**", background | 0xff000000); @@ -319,7 +330,10 @@ public class UndoView extends FrameLayout { } public void setAdditionalTranslationY(float value) { - additionalTranslationY = value; + if (additionalTranslationY != value) { + additionalTranslationY = value; + updatePosition(); + } } public Object getCurrentInfoObject() { @@ -351,7 +365,7 @@ public class UndoView extends FrameLayout { if (animated != 0) { AnimatorSet animatorSet = new AnimatorSet(); if (animated == 1) { - animatorSet.playTogether(ObjectAnimator.ofFloat(this, View.TRANSLATION_Y, (fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight))); + animatorSet.playTogether(ObjectAnimator.ofFloat(this, "enterOffset", (fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight))); animatorSet.setDuration(250); } else { animatorSet.playTogether( @@ -372,7 +386,7 @@ public class UndoView extends FrameLayout { }); animatorSet.start(); } else { - setTranslationY((fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight)); + setEnterOffset((fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight)); setVisibility(INVISIBLE); } } @@ -443,9 +457,15 @@ public class UndoView extends FrameLayout { boolean infoOnly = false; boolean reversedPlay = false; int reversedPlayEndFrame = 0; - setOnClickListener(null); - setOnTouchListener((v, event) -> true); - infoTextView.setMovementMethod(new LinkMovementMethodMy()); + if (actionRunnable == null && cancelRunnable == null) { + setOnClickListener(view -> hide(false, 1)); + setOnTouchListener(null); + } else { + setOnClickListener(null); + setOnTouchListener((v, event) -> true); + } + + infoTextView.setMovementMethod(null); if (currentAction == ACTION_NEED_RESATRT) { infoTextView.setText(LocaleController.getString("RestartAppToTakeEffect", R.string.RestartAppToTakeEffect)); @@ -839,7 +859,7 @@ public class UndoView extends FrameLayout { leftImageView.setVisibility(VISIBLE); infoTextView.setTypeface(Typeface.DEFAULT); - + long hapticDelay = -1; if (currentAction == ACTION_GIGAGROUP_SUCCESS) { infoTextView.setText(LocaleController.getString("BroadcastGroupConvertSuccess", R.string.BroadcastGroupConvertSuccess)); leftImageView.setAnimation(R.raw.gigagroup_convert, 36, 36); @@ -984,6 +1004,7 @@ public class UndoView extends FrameLayout { } } leftImageView.setAnimation(R.raw.forward, 30, 30); + hapticDelay = 300; } } else { int amount = (Integer) infoObject2; @@ -993,6 +1014,7 @@ public class UndoView extends FrameLayout { infoTextView.setText(AndroidUtilities.replaceTags(LocaleController.formatString("FwdMessagesToChats", R.string.FwdMessagesToChats, LocaleController.formatPluralString("Chats", amount)))); } leftImageView.setAnimation(R.raw.forward, 30, 30); + hapticDelay = 300; } timeLeft = 3000; } else if (currentAction == ACTION_SHARE_BACKGROUND) { @@ -1028,9 +1050,11 @@ public class UndoView extends FrameLayout { leftImageView.setProgress(0); leftImageView.playAnimation(); - leftImageView.postDelayed(() -> { - leftImageView.performHapticFeedback(HapticFeedbackConstants.KEYBOARD_TAP, HapticFeedbackConstants.FLAG_IGNORE_GLOBAL_SETTING); - }, 300); + if (hapticDelay > 0) { + leftImageView.postDelayed(() -> { + leftImageView.performHapticFeedback(HapticFeedbackConstants.KEYBOARD_TAP, HapticFeedbackConstants.FLAG_IGNORE_GLOBAL_SETTING); + }, hapticDelay); + } } else if (currentAction == ACTION_PROXIMITY_SET || currentAction == ACTION_PROXIMITY_REMOVED) { int radius = (Integer) infoObject; TLRPC.User user = (TLRPC.User) infoObject2; @@ -1319,9 +1343,9 @@ public class UndoView extends FrameLayout { if (getVisibility() != VISIBLE) { setVisibility(VISIBLE); - setTranslationY((fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight)); + setEnterOffset((fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight)); AnimatorSet animatorSet = new AnimatorSet(); - animatorSet.playTogether(ObjectAnimator.ofFloat(this, View.TRANSLATION_Y, (fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight), (fromTop ? 1.0f : -1.0f) * additionalTranslationY)); + animatorSet.playTogether(ObjectAnimator.ofFloat(this, "enterOffset", (fromTop ? -1.0f : 1.0f) * (AndroidUtilities.dp(8) + undoViewHeight), (fromTop ? 1.0f : -1.0f))); animatorSet.setInterpolator(new DecelerateInterpolator()); animatorSet.setDuration(180); animatorSet.start(); @@ -1335,6 +1359,7 @@ public class UndoView extends FrameLayout { @Override protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { super.onMeasure(MeasureSpec.makeMeasureSpec(MeasureSpec.getSize(widthMeasureSpec), MeasureSpec.EXACTLY), MeasureSpec.makeMeasureSpec(undoViewHeight, MeasureSpec.EXACTLY)); + backgroundDrawable.setBounds(0, 0, getMeasuredWidth(), getMeasuredHeight()); } StaticLayout timeLayout; @@ -1343,8 +1368,38 @@ public class UndoView extends FrameLayout { float timeReplaceProgress = 1f; + @Override + protected void dispatchDraw(Canvas canvas) { + if (additionalTranslationY != 0) { + canvas.save(); + + float bottom = getMeasuredHeight() - enterOffset + AndroidUtilities.dp(9); + if (bottom > 0) { + canvas.clipRect(0, 0, getMeasuredWidth(), bottom); + super.dispatchDraw(canvas); + } + canvas.restore(); + } else { + super.dispatchDraw(canvas); + } + } + @Override protected void onDraw(Canvas canvas) { + if (additionalTranslationY != 0) { + canvas.save(); + + float bottom = getMeasuredHeight() - enterOffset + AndroidUtilities.dp(9); + if (bottom > 0) { + canvas.clipRect(0, 0, getMeasuredWidth(), bottom); + super.dispatchDraw(canvas); + } + backgroundDrawable.draw(canvas); + canvas.restore(); + } else { + backgroundDrawable.draw(canvas); + } + if (currentAction == ACTION_DELETE || currentAction == ACTION_CLEAR) { int newSeconds = timeLeft > 0 ? (int) Math.ceil(timeLeft / 1000.0f) : 0; if (prevSeconds != newSeconds) { @@ -1404,7 +1459,6 @@ public class UndoView extends FrameLayout { if (timeLeft <= 0) { hide(true, hideAnimationType); } - invalidate(); } @@ -1422,4 +1476,30 @@ public class UndoView extends FrameLayout { public void setHideAnimationType(int hideAnimationType) { this.hideAnimationType = hideAnimationType; } + + float enterOffset; + + + @Keep + public float getEnterOffset() { + return enterOffset; + } + + @Keep + public void setEnterOffset(float enterOffset) { + if (this.enterOffset != enterOffset) { + this.enterOffset = enterOffset; + updatePosition(); + } + } + + private void updatePosition() { + setTranslationY(enterOffset - additionalTranslationY); + invalidate(); + } + + @Override + public Drawable getBackground() { + return backgroundDrawable; + } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/CellFlickerDrawable.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/CellFlickerDrawable.java index 1c8341858..f4270d731 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/CellFlickerDrawable.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/CellFlickerDrawable.java @@ -31,9 +31,12 @@ public class CellFlickerDrawable { public float repeatProgress = 1.2f; public CellFlickerDrawable() { + this(64, 204); + } + public CellFlickerDrawable(int a1, int a2) { size = AndroidUtilities.dp(160); - gradientShader = new LinearGradient(0, 0, size, 0, new int[]{Color.TRANSPARENT, ColorUtils.setAlphaComponent(Color.WHITE, 64), Color.TRANSPARENT}, null, Shader.TileMode.CLAMP); - gradientShader2 = new LinearGradient(0, 0, size, 0, new int[]{Color.TRANSPARENT, ColorUtils.setAlphaComponent(Color.WHITE, 204), Color.TRANSPARENT}, null, Shader.TileMode.CLAMP); + gradientShader = new LinearGradient(0, 0, size, 0, new int[]{Color.TRANSPARENT, ColorUtils.setAlphaComponent(Color.WHITE, a1), Color.TRANSPARENT}, null, Shader.TileMode.CLAMP); + gradientShader2 = new LinearGradient(0, 0, size, 0, new int[]{Color.TRANSPARENT, ColorUtils.setAlphaComponent(Color.WHITE, a2), Color.TRANSPARENT}, null, Shader.TileMode.CLAMP); paint.setShader(gradientShader); paintOutline.setShader(gradientShader2); paintOutline.setStyle(Paint.Style.STROKE); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallMiniTextureView.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallMiniTextureView.java index b03cb17aa..ada1502fd 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallMiniTextureView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallMiniTextureView.java @@ -1139,6 +1139,8 @@ public class GroupCallMiniTextureView extends FrameLayout implements GroupCallSt VoIPService.getSharedInstance().addRemoteSink(participant.participant, participant.presentation, textureView.renderer, null); } } + + updateIconColor(true); } updateInfo(); @@ -1459,16 +1461,20 @@ public class GroupCallMiniTextureView extends FrameLayout implements GroupCallSt float v = (float) valueAnimator.getAnimatedValue(); lastIconColor = ColorUtils.blendARGB(colorFrom, newColor, v); lastSpeakingFrameColor = ColorUtils.blendARGB(colorFromSpeaking, newSpeakingFrameColor, v); - // micIconView.setColorFilter(new PorterDuffColorFilter(lastIconColor, PorterDuff.Mode.MULTIPLY)); speakingPaint.setColor(lastSpeakingFrameColor); + if (progressToSpeaking > 0) { + invalidate(); + } }); colorAnimator.addListener(new AnimatorListenerAdapter() { @Override public void onAnimationEnd(Animator animation) { animateToColor = lastIconColor = newColor; lastSpeakingFrameColor = newSpeakingFrameColor; - // micIconView.setColorFilter(new PorterDuffColorFilter(lastIconColor, PorterDuff.Mode.MULTIPLY)); speakingPaint.setColor(lastSpeakingFrameColor); + if (progressToSpeaking > 0) { + invalidate(); + } } }); colorAnimator.start(); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallRenderersContainer.java b/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallRenderersContainer.java index 8bd7a0088..587465767 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallRenderersContainer.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/Components/voip/GroupCallRenderersContainer.java @@ -650,6 +650,7 @@ public class GroupCallRenderersContainer extends FrameLayout { } if (!inFullscreenMode) { inFullscreenMode = true; + clearCurrentFullscreenTextureView(); fullscreenTextureView = textureView; fullscreenTextureView.setShowingInFullscreen(true, true); invalidate(); @@ -747,6 +748,7 @@ public class GroupCallRenderersContainer extends FrameLayout { } }); + clearCurrentFullscreenTextureView(); fullscreenTextureView = newFullscreenTextureView; fullscreenTextureView.setShowingInFullscreen(true, false); update(); @@ -813,12 +815,14 @@ public class GroupCallRenderersContainer extends FrameLayout { }); replaceFullscreenViewAnimator.start(); + clearCurrentFullscreenTextureView(); fullscreenTextureView = newFullscreenTextureView; fullscreenTextureView.setShowingInFullscreen(true, false); fullscreenTextureView.updateAttachState(false); update(); } else { inFullscreenMode = true; + clearCurrentFullscreenTextureView(); fullscreenTextureView = new GroupCallMiniTextureView(this, attachedRenderers, call, groupCallActivity); fullscreenTextureView.participant = videoParticipant; fullscreenTextureView.setFullscreenMode(inFullscreenMode, false); @@ -881,6 +885,7 @@ public class GroupCallRenderersContainer extends FrameLayout { fullscreenAnimator = null; textureViewFinal.animateToFullscreen = false; if (!inFullscreenMode) { + clearCurrentFullscreenTextureView(); fullscreenTextureView = null; fullscreenPeerId = 0; } @@ -904,6 +909,13 @@ public class GroupCallRenderersContainer extends FrameLayout { animateSwipeToBack(fullscreenParticipant == null); } + private void clearCurrentFullscreenTextureView() { + if (fullscreenTextureView != null) { + fullscreenTextureView.setSwipeToBack(false, 0); + fullscreenTextureView.setZoom(false, 1f, 0, 0, 0, 0); + } + } + protected void update() { invalidate(); } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/DialogsActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/DialogsActivity.java index c8030712b..7f4ab558b 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/DialogsActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/DialogsActivity.java @@ -193,7 +193,7 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. private final String ACTION_MODE_SEARCH_DIALOGS_TAG = "search_dialogs_action_mode"; - private class ViewPage extends FrameLayout { + private static class ViewPage extends FrameLayout { private DialogsRecyclerView listView; private LinearLayoutManager layoutManager; private DialogsAdapter dialogsAdapter; @@ -236,7 +236,6 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. private FilterTabsView filterTabsView; private boolean askingForPermissions; private RLottieDrawable passcodeDrawable; - private RLottieDrawable passcodeDrawable2; private SearchViewPager searchViewPager; @@ -1988,10 +1987,10 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. proxyItem = menu.addItem(2, proxyDrawable); proxyItem.setContentDescription(LocaleController.getString("ProxySettings", R.string.ProxySettings)); - passcodeDrawable = new RLottieDrawable(R.raw.passcode_lock_open, "passcode_lock_open", AndroidUtilities.dp(28), AndroidUtilities.dp(28), true, null); - passcodeDrawable2 = new RLottieDrawable(R.raw.passcode_lock_close, "passcode_lock_close", AndroidUtilities.dp(28), AndroidUtilities.dp(28), true, null); + passcodeDrawable = new RLottieDrawable(R.raw.passcode_lock_close, "passcode_lock_close", AndroidUtilities.dp(28), AndroidUtilities.dp(28), true, null); passcodeItem = menu.addItem(1, passcodeDrawable); - updatePasscodeButton(false); + passcodeItem.setContentDescription(LocaleController.getString("AccDescrPasscodeLock", R.string.AccDescrPasscodeLock)); + updatePasscodeButton(); updateProxyButton(false); } @@ -2024,7 +2023,7 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. } } setScrollY(0); - updatePasscodeButton(false); + updatePasscodeButton(); actionBar.setBackButtonContentDescription(LocaleController.getString("AccDescrGoBack", R.string.AccDescrGoBack)); } @@ -2061,7 +2060,7 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. } showSearch(false, true); } - updatePasscodeButton(false); + updatePasscodeButton(); if (menuDrawable != null) { if (actionBar.getBackButton().getDrawable() != menuDrawable) { actionBar.setBackButtonDrawable(menuDrawable); @@ -3817,9 +3816,15 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. parentLayout.getDrawerLayoutContainer().openDrawer(false); } } else if (id == 1) { - SharedConfig.appLocked = !SharedConfig.appLocked; + if (getParentActivity() == null) { + return; + } + SharedConfig.appLocked = true; SharedConfig.saveConfig(); - updatePasscodeButton(true); + int[] position = new int[2]; + passcodeItem.getLocationInWindow(position); + ((LaunchActivity) getParentActivity()).showPasscodeActivity(false, true, position[0] + passcodeItem.getMeasuredWidth() / 2, position[1] + passcodeItem.getMeasuredHeight() / 2, () -> passcodeItem.setAlpha(1.0f), () -> passcodeItem.setAlpha(0.0f)); + updatePasscodeButton(); } else if (id == 2) { presentFragment(new ProxyListActivity()); } else if (id == 3) { @@ -4243,6 +4248,7 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. if (searchIsShowed) { AndroidUtilities.requestAdjustResize(getParentActivity(), classGuid); } + updateVisibleRows(0); } @Override @@ -6579,7 +6585,7 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. } else if (id == NotificationCenter.messageReceivedByAck || id == NotificationCenter.messageReceivedByServer || id == NotificationCenter.messageSendError) { updateVisibleRows(MessagesController.UPDATE_MASK_SEND_STATE); } else if (id == NotificationCenter.didSetPasscode) { - updatePasscodeButton(true); + updatePasscodeButton(); } else if (id == NotificationCenter.needReloadRecentDialogsSearch) { if (searchViewPager != null && searchViewPager.dialogsSearchAdapter != null) { searchViewPager.dialogsSearchAdapter.loadRecentSearch(); @@ -6795,46 +6801,16 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. sideMenu.setGlowColor(Theme.getColor(Theme.key_chats_menuBackground)); } - Runnable hapticLockRunnable = new Runnable() { - @Override - public void run() { - passcodeItem.getIconView().performHapticFeedback(HapticFeedbackConstants.KEYBOARD_TAP, HapticFeedbackConstants.FLAG_IGNORE_GLOBAL_SETTING); - } - }; - - private void updatePasscodeButton(boolean animated) { + private void updatePasscodeButton() { if (passcodeItem == null) { return; } - if (isPaused) { - animated = false; - } - AndroidUtilities.cancelRunOnUIThread(hapticLockRunnable); if (SharedConfig.passcodeHash.length() != 0 && !searching) { if (doneItem == null || doneItem.getVisibility() != View.VISIBLE) { passcodeItem.setVisibility(View.VISIBLE); } + passcodeItem.setIcon(passcodeDrawable); passcodeItemVisible = true; - if (SharedConfig.appLocked) { - passcodeItem.setContentDescription(LocaleController.getString("AccDescrPasscodeUnlock", R.string.AccDescrPasscodeUnlock)); - passcodeItem.setIcon(passcodeDrawable2); - if (animated) { - passcodeDrawable2.setCurrentFrame(0, false); - passcodeItem.getIconView().playAnimation(); - AndroidUtilities.runOnUIThread(hapticLockRunnable, 350); - } else { - passcodeDrawable2.setCurrentFrame(38, false); - } - } else { - passcodeItem.setContentDescription(LocaleController.getString("AccDescrPasscodeLock", R.string.AccDescrPasscodeLock)); - passcodeItem.setIcon(passcodeDrawable); - if (animated) { - passcodeDrawable.setCurrentFrame(0, false); - passcodeItem.getIconView().playAnimation(); - } else { - passcodeDrawable.setCurrentFrame(31, false); - } - } } else { passcodeItem.setVisibility(View.GONE); passcodeItemVisible = false; @@ -6889,7 +6865,7 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. } private void updateVisibleRows(int mask) { - if (dialogsListFrozen && (mask & MessagesController.UPDATE_MASK_REORDER) == 0) { + if ((dialogsListFrozen && (mask & MessagesController.UPDATE_MASK_REORDER) == 0) || isPaused) { return; } for (int c = 0; c < 3; c++) { @@ -7763,7 +7739,7 @@ public class DialogsActivity extends BaseFragment implements NotificationCenter. @Override public void setProgressToDrawerOpened(float progress) { - if (SharedConfig.getDevicePerformanceClass() == SharedConfig.PERFORMANCE_CLASS_LOW) { + if (SharedConfig.getDevicePerformanceClass() == SharedConfig.PERFORMANCE_CLASS_LOW || isSlideBackTransition) { return; } boolean drawerTransition = progress > 0; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/EditWidgetActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/EditWidgetActivity.java index 77526d0ba..5ce4a9785 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/EditWidgetActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/EditWidgetActivity.java @@ -75,6 +75,7 @@ import org.telegram.ui.Components.CombinedDrawable; import org.telegram.ui.Components.ForegroundColorSpanThemable; import org.telegram.ui.Components.InviteMembersBottomSheet; import org.telegram.ui.Components.LayoutHelper; +import org.telegram.ui.Components.MotionBackgroundDrawable; import org.telegram.ui.Components.RecyclerListView; import java.io.File; @@ -653,7 +654,7 @@ public class EditWidgetActivity extends BaseFragment { } else { drawable.setAlpha(255); } - if (drawable instanceof ColorDrawable || drawable instanceof GradientDrawable) { + if (drawable instanceof ColorDrawable || drawable instanceof GradientDrawable || drawable instanceof MotionBackgroundDrawable) { drawable.setBounds(0, 0, getMeasuredWidth(), getMeasuredHeight()); if (drawable instanceof BackgroundGradientDrawable) { final BackgroundGradientDrawable backgroundGradientDrawable = (BackgroundGradientDrawable) drawable; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ExternalActionActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ExternalActionActivity.java index 995ddc47f..d0acc23e4 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ExternalActionActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ExternalActionActivity.java @@ -214,7 +214,7 @@ public class ExternalActionActivity extends Activity implements ActionBarLayout. } else if (ArticleViewer.hasInstance() && ArticleViewer.getInstance().isVisible()) { ArticleViewer.getInstance().close(false, true); } - passcodeView.onShow(); + passcodeView.onShow(true, false); SharedConfig.isWaitingForPasscodeEnter = true; drawerLayoutContainer.setAllowOpenDrawer(false, false); passcodeView.setDelegate(() -> { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/GroupCallActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/GroupCallActivity.java index 6c2393638..c3bf89981 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/GroupCallActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/GroupCallActivity.java @@ -791,7 +791,7 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter currentParticipant.flags |= 128; double vol = ChatObject.getParticipantVolume(currentParticipant) / 100.0; textView.setText(String.format(Locale.US, "%d%%", (int) (vol > 0 ? Math.max(vol, 1) : 0))); - VoIPService.getSharedInstance().setParticipantVolume(currentParticipant.source, currentParticipant.volume); + VoIPService.getSharedInstance().setParticipantVolume(currentParticipant, currentParticipant.volume); if (finalMove) { int id = MessageObject.getPeerId(currentParticipant.peer); TLObject object; @@ -1434,7 +1434,6 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter autoPinEnabled = true; } - boolean hasVideoLocal = !call.visibleVideoParticipants.isEmpty(); if (hasVideoLocal != hasVideo) { hasVideo = hasVideoLocal; @@ -1482,14 +1481,12 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter if (changingPermissions) { return; } - boolean anyVisible = false; TLRPC.Chat newChat = accountInstance.getMessagesController().getChat(currentChat.id); if (newChat != null) { currentChat = newChat; } - if (ChatObject.canUserDoAdminAction(currentChat, ChatObject.ACTION_INVITE)) { + if (ChatObject.canUserDoAdminAction(currentChat, ChatObject.ACTION_INVITE) || (!ChatObject.isChannel(currentChat) || currentChat.megagroup) && ChatObject.canWriteToChat(currentChat) || ChatObject.isChannel(currentChat) && !currentChat.megagroup && !TextUtils.isEmpty(currentChat.username)) { inviteItem.setVisibility(View.VISIBLE); - anyVisible = true; } else { inviteItem.setVisibility(View.GONE); } @@ -1500,13 +1497,11 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter noiseItem.setVisibility(View.GONE); } else { noiseItem.setVisibility(View.VISIBLE); - anyVisible = true; } noiseItem.setIcon(SharedConfig.noiseSupression ? R.drawable.msg_noise_on : R.drawable.msg_noise_off); noiseItem.setSubtext(SharedConfig.noiseSupression ? LocaleController.getString("VoipNoiseCancellationEnabled", R.string.VoipNoiseCancellationEnabled) : LocaleController.getString("VoipNoiseCancellationDisabled", R.string.VoipNoiseCancellationDisabled)); if (ChatObject.canManageCalls(currentChat)) { - anyVisible = true; leaveItem.setVisibility(View.VISIBLE); editTitleItem.setVisibility(View.VISIBLE); if (call.isScheduled()) { @@ -1515,7 +1510,7 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter } else { recordItem.setVisibility(View.VISIBLE); } - if (!call.canStreamVideo || call.isScheduled() || Build.VERSION.SDK_INT < 21) { + if (!call.canRecordVideo() || call.isScheduled() || Build.VERSION.SDK_INT < 21) { screenItem.setVisibility(View.GONE); } else { screenItem.setVisibility(View.VISIBLE); @@ -1545,9 +1540,9 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter updateRecordCallText(); } else { boolean mutedByAdmin = participant != null && !participant.can_self_unmute && participant.muted && !ChatObject.canManageCalls(currentChat); - if (Build.VERSION.SDK_INT >= 21 && !mutedByAdmin && call.canStreamVideo) { - anyVisible = true; - if (VoIPService.getSharedInstance() != null && VoIPService.getSharedInstance().getVideoState(true) == Instance.VIDEO_STATE_ACTIVE) { + boolean sharingScreen = VoIPService.getSharedInstance() != null && VoIPService.getSharedInstance().getVideoState(true) == Instance.VIDEO_STATE_ACTIVE; + if (Build.VERSION.SDK_INT >= 21 && !mutedByAdmin && (call.canRecordVideo() || sharingScreen)) { + if (sharingScreen) { screenShareItem.setVisibility(View.GONE); screenItem.setVisibility(View.VISIBLE); screenItem.setTextAndIcon(LocaleController.getString("VoipChatStopScreenCapture", R.string.VoipChatStopScreenCapture), R.drawable.msg_screencast_off); @@ -1568,16 +1563,14 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter } if (ChatObject.canManageCalls(currentChat) && call.call.can_change_join_muted) { permissionItem.setVisibility(View.VISIBLE); - anyVisible = true; } else { permissionItem.setVisibility(View.GONE); } - if (soundButton.getVisibility() != View.VISIBLE) { - soundItem.setVisibility(View.VISIBLE); + soundItem.setVisibility(View.VISIBLE); + if (editTitleItem.getVisibility() == View.VISIBLE || permissionItem.getVisibility() == View.VISIBLE || inviteItem.getVisibility() == View.VISIBLE || + screenItem.getVisibility() == View.VISIBLE || recordItem.getVisibility() == View.VISIBLE || leaveItem.getVisibility() == View.VISIBLE) { soundItemDivider.setVisibility(View.VISIBLE); - anyVisible = true; } else { - soundItem.setVisibility(View.GONE); soundItemDivider.setVisibility(View.GONE); } @@ -1594,18 +1587,13 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter } accountSelectCell.setObject(object); margin += 48; - anyVisible = true; } else { margin += 48; accountSelectCell.setVisibility(View.GONE); accountGap.setVisibility(View.GONE); } - if (anyVisible) { - otherItem.setVisibility(View.VISIBLE); - } else { - otherItem.setVisibility(View.GONE); - } + otherItem.setVisibility(View.VISIBLE); FrameLayout.LayoutParams layoutParams = ((FrameLayout.LayoutParams) titleTextView.getLayoutParams()); if (layoutParams.rightMargin != AndroidUtilities.dp(margin)) { @@ -4201,8 +4189,7 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter otherItem.hideSubItem(eveyone_can_speak_item); otherItem.hideSubItem(admin_can_speak_item); - if (VoIPService.getSharedInstance() != null && soundButton.getVisibility() != View.VISIBLE && (VoIPService.getSharedInstance().hasEarpiece() || VoIPService.getSharedInstance().isBluetoothHeadsetConnected())) { - soundItem.setVisibility(View.VISIBLE); + if (VoIPService.getSharedInstance() != null && (VoIPService.getSharedInstance().hasEarpiece() || VoIPService.getSharedInstance().isBluetoothHeadsetConnected())) { int rout = VoIPService.getSharedInstance().getCurrentAudioRoute(); if (rout == VoIPService.AUDIO_ROUTE_BLUETOOTH) { soundItem.setIcon(R.drawable.msg_voice_bluetooth); @@ -4220,9 +4207,6 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter soundItem.setSubtext(LocaleController.getString("VoipAudioRoutingPhone", R.string.VoipAudioRoutingPhone)); } } - - } else { - soundItem.setVisibility(View.GONE); } updateItems(); otherItem.toggleSubMenu(); @@ -4349,8 +4333,8 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter noiseItem.setItemHeight(56); soundItemDivider = otherItem.addDivider(ColorUtils.blendARGB(Theme.getColor(Theme.key_voipgroup_actionBar), Color.BLACK, 0.3f)); - ((ViewGroup.MarginLayoutParams)soundItemDivider.getLayoutParams()).topMargin = 0; - ((ViewGroup.MarginLayoutParams)soundItemDivider.getLayoutParams()).bottomMargin = 0; + ((ViewGroup.MarginLayoutParams) soundItemDivider.getLayoutParams()).topMargin = 0; + ((ViewGroup.MarginLayoutParams) soundItemDivider.getLayoutParams()).bottomMargin = 0; editTitleItem = otherItem.addSubItem(edit_item, R.drawable.msg_edit, recordCallDrawable, LocaleController.getString("VoipGroupEditTitle", R.string.VoipGroupEditTitle), true, false); permissionItem = otherItem.addSubItem(permission_item, R.drawable.msg_permissions, recordCallDrawable, LocaleController.getString("VoipGroupEditPermissions", R.string.VoipGroupEditPermissions), false, false); inviteItem = otherItem.addSubItem(share_invite_link_item, R.drawable.msg_link, LocaleController.getString("VoipGroupShareInviteLink", R.string.VoipGroupShareInviteLink)); @@ -5965,7 +5949,7 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter boolean cameraButtonVisible; boolean flipButtonVisible; boolean soundButtonVisible; - if ((!mutedByAdmin && call.canStreamVideo) || outgoingVideoIsActive) { + if (!mutedByAdmin && call.canRecordVideo() || outgoingVideoIsActive) { cameraButtonVisible = true; soundButtonVisible = false; } else { @@ -6685,7 +6669,7 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter if (option == 5) { voIPService.editCallMember(object, true, null, null, null, null); getUndoView().showWithAction(0, UndoView.ACTION_VOIP_MUTED_FOR_YOU, object); - voIPService.setParticipantVolume(participant.source, 0); + voIPService.setParticipantVolume(participant, 0); } else { if ((participant.flags & 128) != 0 && participant.volume == 0) { participant.volume = 10000; @@ -6694,7 +6678,7 @@ public class GroupCallActivity extends BottomSheet implements NotificationCenter } else { voIPService.editCallMember(object, false, null, null, null, null); } - voIPService.setParticipantVolume(participant.source, ChatObject.getParticipantVolume(participant)); + voIPService.setParticipantVolume(participant, ChatObject.getParticipantVolume(participant)); getUndoView().showWithAction(0, option == 1 ? UndoView.ACTION_VOIP_UNMUTED : UndoView.ACTION_VOIP_UNMUTED_FOR_YOU, object, null, null, null); } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/IntroActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/IntroActivity.java index 8ecffea20..371028ace 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/IntroActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/IntroActivity.java @@ -14,6 +14,7 @@ import android.content.SharedPreferences; import android.content.pm.ActivityInfo; import android.database.DataSetObserver; import android.graphics.Bitmap; +import android.graphics.Canvas; import android.graphics.Shader; import android.graphics.SurfaceTexture; import android.graphics.drawable.BitmapDrawable; @@ -52,6 +53,7 @@ import org.telegram.ui.ActionBar.Theme; import org.telegram.ui.Components.BottomPagesView; import org.telegram.ui.Components.LayoutHelper; import org.telegram.ui.Components.SizeNotifierFrameLayout; +import org.telegram.ui.Components.voip.CellFlickerDrawable; import javax.microedition.khronos.egl.EGL10; import javax.microedition.khronos.egl.EGLConfig; @@ -218,15 +220,41 @@ public class IntroActivity extends Activity implements NotificationCenter.Notifi } }); - startMessagingButton = new TextView(this); + startMessagingButton = new TextView(this) { + CellFlickerDrawable cellFlickerDrawable; + + @Override + protected void onDraw(Canvas canvas) { + super.onDraw(canvas); + if (cellFlickerDrawable == null) { + cellFlickerDrawable = new CellFlickerDrawable(); + cellFlickerDrawable.drawFrame = false; + cellFlickerDrawable.repeatProgress = 2f; + } + cellFlickerDrawable.setParentWidth(getMeasuredWidth()); + AndroidUtilities.rectTmp.set(0, 0, getMeasuredWidth(), getMeasuredHeight()); + cellFlickerDrawable.draw(canvas, AndroidUtilities.rectTmp, AndroidUtilities.dp(4)); + invalidate(); + } + + @Override + protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { + int size = MeasureSpec.getSize(widthMeasureSpec); + if (size > AndroidUtilities.dp(260)) { + super.onMeasure(MeasureSpec.makeMeasureSpec(AndroidUtilities.dp(320), MeasureSpec.EXACTLY), heightMeasureSpec); + } else { + super.onMeasure(widthMeasureSpec, heightMeasureSpec); + } + } + }; startMessagingButton.setText(LocaleController.getString("StartMessaging", R.string.StartMessaging)); startMessagingButton.setGravity(Gravity.CENTER); startMessagingButton.setTextColor(0xffffffff); startMessagingButton.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); - startMessagingButton.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); + startMessagingButton.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 15); startMessagingButton.setBackgroundDrawable(Theme.createSimpleSelectorRoundRectDrawable(AndroidUtilities.dp(4), 0xff50a8eb, 0xff439bde)); startMessagingButton.setPadding(AndroidUtilities.dp(34), 0, AndroidUtilities.dp(34), 0); - frameLayout.addView(startMessagingButton, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, 42, Gravity.CENTER_HORIZONTAL | Gravity.BOTTOM, 10, 0, 10, 76)); + frameLayout.addView(startMessagingButton, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, 42, Gravity.CENTER_HORIZONTAL | Gravity.BOTTOM, 36, 0, 36, 76)); startMessagingButton.setOnClickListener(view -> { if (startPressed) { return; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java index 981ddab7c..33267193a 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java @@ -152,7 +152,11 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Set; @@ -226,6 +230,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa private RadialProgress2 updateLayoutIcon; private SimpleTextView updateTextView; private TextView updateSizeTextView; + private FrameLayout sideMenuContainer; private AlertDialog localeDialog; private boolean loadingLocaleDialog; @@ -325,11 +330,11 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa if (Build.VERSION.SDK_INT >= 24) { AndroidUtilities.isInMultiwindow = isInMultiWindowMode(); } - Theme.createChatResources(this, false); //TODO optimize + Theme.createCommonChatResources(this); + Theme.createDialogsResources(this); if (SharedConfig.passcodeHash.length() != 0 && SharedConfig.appLocked) { SharedConfig.lastPauseTime = (int) (SystemClock.elapsedRealtime() / 1000); } - //FileLog.d("UI create5 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); AndroidUtilities.fillStatusBarHeight(this); actionBarLayout = new ActionBarLayout(this) { @Override @@ -509,8 +514,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } else { drawerLayoutContainer.addView(actionBarLayout, new ViewGroup.LayoutParams(ViewGroup.LayoutParams.MATCH_PARENT, ViewGroup.LayoutParams.MATCH_PARENT)); } - //FileLog.d("UI create7 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); - FrameLayout sideMenuContainer = new FrameLayout(this); + sideMenuContainer = new FrameLayout(this); sideMenu = new RecyclerListView(this) { @Override public boolean drawChild(Canvas canvas, View child, long drawingTime) { @@ -528,7 +532,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa return result; } }; - //FileLog.d("UI create34 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); itemAnimator = new SideMenultItemAnimator(sideMenu); sideMenu.setItemAnimator(itemAnimator); sideMenu.setBackgroundColor(Theme.getColor(Theme.key_chats_menuBackground)); @@ -692,7 +695,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } } }); - //FileLog.d("UI create33 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); final ItemTouchHelper sideMenuTouchHelper = new ItemTouchHelper(new ItemTouchHelper.SimpleCallback(ItemTouchHelper.UP | ItemTouchHelper.DOWN, 0) { private RecyclerView.ViewHolder selectedViewHolder; @@ -769,7 +771,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa view.setTranslationY(dY); } }); - //FileLog.d("UI create32 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); sideMenuTouchHelper.attachToRecyclerView(sideMenu); sideMenu.setOnItemLongClickListener((view, position) -> { if (view instanceof DrawerUserCell) { @@ -802,96 +803,17 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } return false; }); - //FileLog.d("UI create31 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); drawerLayoutContainer.setParentActionBarLayout(actionBarLayout); actionBarLayout.setDrawerLayoutContainer(drawerLayoutContainer); actionBarLayout.init(mainFragmentsStack); actionBarLayout.setDelegate(this); - //FileLog.d("UI create30 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); Theme.loadWallpaper(); - //FileLog.d("UI create8 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); - - updateLayout = new FrameLayout(this) { - - private Paint paint = new Paint(); - private Matrix matrix = new Matrix(); - private LinearGradient updateGradient; - private int lastGradientWidth; - - @Override - protected void onDraw(Canvas canvas) { - if (updateGradient == null) { - return; - } - paint.setColor(0xffffffff); - paint.setShader(updateGradient); - updateGradient.setLocalMatrix(matrix); - canvas.drawRect(0, 0, getMeasuredWidth(), getMeasuredHeight(), paint); - updateLayoutIcon.setBackgroundGradientDrawable(updateGradient); - updateLayoutIcon.draw(canvas); - } - - @Override - protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { - super.onMeasure(widthMeasureSpec, heightMeasureSpec); - int width = MeasureSpec.getSize(widthMeasureSpec); - if (lastGradientWidth != width) { - updateGradient = new LinearGradient(0, 0, width, 0, new int[]{0xff69BF72, 0xff53B3AD}, new float[]{0.0f, 1.0f}, Shader.TileMode.CLAMP); - lastGradientWidth = width; - } - } - }; - updateLayout.setWillNotDraw(false); - updateLayout.setVisibility(View.INVISIBLE); - updateLayout.setTranslationY(AndroidUtilities.dp(44)); - if (Build.VERSION.SDK_INT >= 21) { - updateLayout.setBackground(Theme.getSelectorDrawable(Theme.getColor(Theme.key_listSelector), null)); - } - sideMenuContainer.addView(updateLayout, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, 44, Gravity.LEFT | Gravity.BOTTOM)); - updateLayout.setOnClickListener(v -> { - if (!SharedConfig.isAppUpdateAvailable()) { - return; - } - if (updateLayoutIcon.getIcon() == MediaActionDrawable.ICON_DOWNLOAD) { - FileLoader.getInstance(currentAccount).loadFile(SharedConfig.pendingAppUpdate.document, "update", 1, 1); - } else if (updateLayoutIcon.getIcon() == MediaActionDrawable.ICON_CANCEL) { - FileLoader.getInstance(currentAccount).cancelLoadFile(SharedConfig.pendingAppUpdate.document); - } else { - AndroidUtilities.openForView(SharedConfig.pendingAppUpdate.document, true, this); - } - }); - - updateLayoutIcon = new RadialProgress2(updateLayout); - updateLayoutIcon.setColors(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); - updateLayoutIcon.setProgressRect(AndroidUtilities.dp(22), AndroidUtilities.dp(11), AndroidUtilities.dp(22 + 22), AndroidUtilities.dp(11 + 22)); - updateLayoutIcon.setCircleRadius(AndroidUtilities.dp(11)); - updateLayoutIcon.setAsMini(); - - updateTextView = new SimpleTextView(this); - updateTextView.setTextSize(15); - updateTextView.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); - updateTextView.setText(LocaleController.getString("AppUpdate", R.string.AppUpdate)); - updateTextView.setTextColor(0xffffffff); - updateTextView.setGravity(Gravity.LEFT); - updateLayout.addView(updateTextView, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.CENTER_VERTICAL, 74, 0, 0, 0)); - - updateSizeTextView = new TextView(this); - updateSizeTextView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 15); - updateSizeTextView.setTypeface(AndroidUtilities.getTypeface("fonts/rmedium.ttf")); - updateSizeTextView.setGravity(Gravity.RIGHT); - updateSizeTextView.setTextColor(0xffffffff); - updateLayout.addView(updateSizeTextView, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.CENTER_VERTICAL | Gravity.RIGHT, 0, 0, 17, 0)); - - passcodeView = new PasscodeView(this); - drawerLayoutContainer.addView(passcodeView, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT)); checkCurrentAccount(); updateCurrentConnectionState(currentAccount); - NotificationCenter.getGlobalInstance().postNotificationName(NotificationCenter.closeOtherAppActivities, this); currentConnectionState = ConnectionsManager.getInstance(currentAccount).getConnectionState(); - //FileLog.d("UI create10 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); NotificationCenter.getGlobalInstance().addObserver(this, NotificationCenter.needShowAlert); NotificationCenter.getGlobalInstance().addObserver(this, NotificationCenter.reloadInterface); NotificationCenter.getGlobalInstance().addObserver(this, NotificationCenter.suggestedLangpack); @@ -994,12 +916,9 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } drawerLayoutContainer.setAllowOpenDrawer(allowOpen, false); } - //FileLog.d("UI create11 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); checkLayout(); checkSystemBarColors(); - //FileLog.d("UI create12 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); handleIntent(getIntent(), false, savedInstanceState != null, false); - //FileLog.d("UI create9 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); try { String os1 = Build.DISPLAY; String os2 = Build.USER; @@ -1208,6 +1127,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.historyImportProgressChanged); NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.groupCallUpdated); NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.stickersImportComplete); + NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.newSuggestionsAvailable); } currentAccount = UserConfig.selectedAccount; NotificationCenter.getInstance(currentAccount).addObserver(this, NotificationCenter.appDidLogout); @@ -1224,6 +1144,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa NotificationCenter.getInstance(currentAccount).addObserver(this, NotificationCenter.historyImportProgressChanged); NotificationCenter.getInstance(currentAccount).addObserver(this, NotificationCenter.groupCallUpdated); NotificationCenter.getInstance(currentAccount).addObserver(this, NotificationCenter.stickersImportComplete); + NotificationCenter.getInstance(currentAccount).addObserver(this, NotificationCenter.newSuggestionsAvailable); } private void checkLayout() { @@ -1244,7 +1165,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa rightActionBarLayout.fragmentsStack.add(chatFragment); a--; } - if (passcodeView.getVisibility() != View.VISIBLE) { + if (passcodeView == null || passcodeView.getVisibility() != View.VISIBLE) { actionBarLayout.showLastFragment(); rightActionBarLayout.showLastFragment(); } @@ -1265,7 +1186,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa actionBarLayout.fragmentsStack.add(chatFragment); a--; } - if (passcodeView.getVisibility() != View.VISIBLE) { + if (passcodeView == null || passcodeView.getVisibility() != View.VISIBLE) { actionBarLayout.showLastFragment(); } } @@ -1331,10 +1252,14 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa termsOfServiceView.animate().alpha(1f).setDuration(150).setInterpolator(AndroidUtilities.decelerateInterpolator).setListener(null).start(); } - private void showPasscodeActivity() { - if (passcodeView == null) { + public void showPasscodeActivity(boolean fingerprint, boolean animated, int x, int y, Runnable onShow, Runnable onStart) { + if (drawerLayoutContainer == null) { return; } + if (passcodeView == null) { + passcodeView = new PasscodeView(this); + drawerLayoutContainer.addView(passcodeView, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT)); + } SharedConfig.appLocked = true; if (SecretMediaViewer.hasInstance() && SecretMediaViewer.getInstance().isVisible()) { SecretMediaViewer.getInstance().closePhoto(false, false); @@ -1347,7 +1272,18 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa if (messageObject != null && messageObject.isRoundVideo()) { MediaController.getInstance().cleanupPlayer(true, true); } - passcodeView.onShow(); + passcodeView.onShow(fingerprint, animated, x, y, () -> { + actionBarLayout.setVisibility(View.INVISIBLE); + if (AndroidUtilities.isTablet()) { + if (layersActionBarLayout.getVisibility() == View.VISIBLE) { + layersActionBarLayout.setVisibility(View.INVISIBLE); + } + rightActionBarLayout.setVisibility(View.INVISIBLE); + } + if (onShow != null) { + onShow.run(); + } + }, onStart); SharedConfig.isWaitingForPasscodeEnter = true; drawerLayoutContainer.setAllowOpenDrawer(false, false); passcodeView.setDelegate(() -> { @@ -1368,13 +1304,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa rightActionBarLayout.setVisibility(View.VISIBLE); } }); - actionBarLayout.setVisibility(View.INVISIBLE); - if (AndroidUtilities.isTablet()) { - if (layersActionBarLayout.getVisibility() == View.VISIBLE) { - layersActionBarLayout.setVisibility(View.INVISIBLE); - } - rightActionBarLayout.setVisibility(View.INVISIBLE); - } } private boolean handleIntent(Intent intent, boolean isNew, boolean restore, boolean fromPassword) { @@ -1386,7 +1315,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } return true; } - //FileLog.d("UI create13 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); if (isNew && PhotoViewer.hasInstance() && PhotoViewer.getInstance().isVisible()) { if (intent == null || !Intent.ACTION_MAIN.equals(intent.getAction())) { PhotoViewer.getInstance().closePhoto(false, true); @@ -1398,7 +1326,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa switchToAccount(intentAccount[0], true); boolean isVoipIntent = action != null && action.equals("voip"); if (!fromPassword && (AndroidUtilities.needShowPasscode(true) || SharedConfig.isWaitingForPasscodeEnter)) { - showPasscodeActivity(); + showPasscodeActivity(true, false, -1, -1, null, null); UserConfig.getInstance(currentAccount).saveConfig(false); if (!isVoipIntent) { passcodeSaveIntent = intent; @@ -1408,7 +1336,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } } boolean pushOpened = false; - //FileLog.d("UI create14 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); int push_user_id = 0; int push_chat_id = 0; int push_enc_id = 0; @@ -1756,6 +1683,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa Integer channelId = null; Integer threadId = null; Integer commentId = null; + int videoTimestamp = -1; boolean hasUrl = false; final String scheme = data.getScheme(); boolean internal = intent.getExtras() != null && intent.getExtras().get("internal") != null && (boolean) intent.getExtras().get("internal"); @@ -1924,6 +1852,18 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } } } + if (messageId != null && segments.contains("video")) { + String str = data.getQuery(); + DateFormat dateFormat = new SimpleDateFormat("mm:ss"); + Date reference = null; + try { + reference = dateFormat.parse("00:00"); + Date date = dateFormat.parse(str); + videoTimestamp = (int) ((date.getTime() - reference.getTime()) / 1000L); + } catch (ParseException e) { + e.printStackTrace(); + } + } botUser = data.getQueryParameter("start"); botChat = data.getQueryParameter("startgroup"); game = data.getQueryParameter("game"); @@ -2274,7 +2214,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa if (message != null && message.startsWith("@")) { message = " " + message; } - runLinkRequest(intentAccount[0], username, group, sticker, botUser, botChat, message, hasUrl, messageId, channelId, threadId, commentId, game, auth, lang, unsupportedUrl, code, login, wallPaper, theme, voicechat, internal ? 3 : 0); + runLinkRequest(intentAccount[0], username, group, sticker, botUser, botChat, message, hasUrl, messageId, channelId, threadId, commentId, game, auth, lang, unsupportedUrl, code, login, wallPaper, theme, voicechat, internal ? 3 : 0, videoTimestamp); } else { try (Cursor cursor = getContentResolver().query(intent.getData(), null, null, null, null)) { if (cursor != null) { @@ -2343,7 +2283,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } } } - //FileLog.d("UI create15 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); if (UserConfig.getInstance(currentAccount).isClientActivated()) { if (searchQuery != null) { final BaseFragment lastFragment = actionBarLayout.getLastFragment(); @@ -2619,7 +2558,6 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa pushOpened = true; } } - //FileLog.d("UI create16 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); if (!pushOpened && !isNew) { if (AndroidUtilities.isTablet()) { if (!UserConfig.getInstance(currentAccount).isClientActivated()) { @@ -2660,16 +2598,14 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa rightActionBarLayout.showLastFragment(); } } - //FileLog.d("UI create17 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); if (isVoipIntent) { VoIPFragment.show(this, intentAccount[0]); } - if (!showGroupVoip && GroupCallActivity.groupCallInstance != null && (intent == null || !Intent.ACTION_MAIN.equals(intent.getAction()))) { + if (!showGroupVoip && (intent == null || !Intent.ACTION_MAIN.equals(intent.getAction())) && GroupCallActivity.groupCallInstance != null) { GroupCallActivity.groupCallInstance.dismiss(); } intent.setAction(null); - //FileLog.d("UI create18 time = " + (SystemClock.elapsedRealtime() - ApplicationLoader.startTime)); return pushOpened; } @@ -2922,13 +2858,14 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa final TLRPC.TL_wallPaper wallPaper, final String theme, final String voicechat, - final int state) { + final int state, + final int videoTimestamp) { if (state == 0 && UserConfig.getActivatedAccountsCount() >= 2) { AlertsCreator.createAccountSelectDialog(this, account -> { if (account != intentAccount) { switchToAccount(account, true); } - runLinkRequest(account, username, group, sticker, botUser, botChat, message, hasUrl, messageId, channelId, threadId, commentId, game, auth, lang, unsupportedUrl, code, loginToken, wallPaper, theme, voicechat, 1); + runLinkRequest(account, username, group, sticker, botUser, botChat, message, hasUrl, messageId, channelId, threadId, commentId, game, auth, lang, unsupportedUrl, code, loginToken, wallPaper, theme, voicechat, 1, videoTimestamp); }).show(); return; } else if (code != null) { @@ -3114,6 +3051,9 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa if (voicechat != null) { args.putString("voicechat", voicechat); } + if (videoTimestamp >= 0) { + args.putInt("video_timestamp", videoTimestamp); + } BaseFragment lastFragment = !mainFragmentsStack.isEmpty() && voicechat == null ? mainFragmentsStack.get(mainFragmentsStack.size() - 1) : null; if (lastFragment == null || MessagesController.getInstance(intentAccount).checkCanOpenChat(args, lastFragment)) { if (isBot && lastFragment instanceof ChatActivity && ((ChatActivity) lastFragment).getDialogId() == dialog_id) { @@ -3991,6 +3931,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.historyImportProgressChanged); NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.groupCallUpdated); NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.stickersImportComplete); + NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.newSuggestionsAvailable); } NotificationCenter.getGlobalInstance().removeObserver(this, NotificationCenter.needShowAlert); @@ -4329,7 +4270,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa checkFreeDiscSpace(); MediaController.checkGallery(); onPasscodeResume(); - if (passcodeView.getVisibility() != View.VISIBLE) { + if (passcodeView == null || passcodeView.getVisibility() != View.VISIBLE) { actionBarLayout.onResume(); if (AndroidUtilities.isTablet()) { rightActionBarLayout.onResume(); @@ -4641,6 +4582,8 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa FileLog.e(e2); } } + } else { + DrawerProfileCell.switchingTheme = false; } Theme.ThemeInfo theme = (Theme.ThemeInfo) args[0]; boolean nigthTheme = (Boolean) args[1]; @@ -4758,6 +4701,8 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa } } else if (id == NotificationCenter.stickersImportComplete) { MediaDataController.getInstance(account).toggleStickerSet(this, (TLObject) args[0], 2, !mainFragmentsStack.isEmpty() ? mainFragmentsStack.get(mainFragmentsStack.size() - 1) : null, false, true); + } else if (id == NotificationCenter.newSuggestionsAvailable) { + sideMenu.invalidateViews(); } else if (id == NotificationCenter.showBulletin) { if (!mainFragmentsStack.isEmpty()) { int type = (int) args[0]; @@ -5132,7 +5077,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa if (BuildVars.LOGS_ENABLED) { FileLog.d("lock app"); } - showPasscodeActivity(); + showPasscodeActivity(true, false, -1, -1, null, null); } else { if (BuildVars.LOGS_ENABLED) { FileLog.d("didn't pass lock check"); @@ -5168,7 +5113,7 @@ public class LaunchActivity extends Activity implements ActionBarLayout.ActionBa lockRunnable = null; } if (AndroidUtilities.needShowPasscode(true)) { - showPasscodeActivity(); + showPasscodeActivity(true, false, -1, -1, null, null); } if (SharedConfig.lastPauseTime != 0) { SharedConfig.lastPauseTime = 0; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/LoginActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/LoginActivity.java index 4d8888660..a9b33896a 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/LoginActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/LoginActivity.java @@ -35,6 +35,7 @@ import android.graphics.drawable.Drawable; import android.net.Uri; import android.os.Build; import android.os.Bundle; +import android.os.Vibrator; import android.telephony.PhoneNumberUtils; import android.telephony.TelephonyManager; import android.text.Editable; @@ -144,7 +145,7 @@ import tw.nekomimi.nekogram.utils.VibrateUtil; public class LoginActivity extends BaseFragment implements NotificationCenter.NotificationCenterDelegate { private int currentViewNum; - private SlideView[] views = new SlideView[9]; + private SlideView[] views = new SlideView[11]; private boolean restoringState; @@ -164,7 +165,6 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No private ActionBarMenuItem doneItem; private AnimatorSet doneItemAnimation; private ContextProgressView doneProgressView; - private AnimatorSet pagesAnimation; private ImageView floatingButtonIcon; private FrameLayout floatingButtonContainer; private RadialProgressView floatingProgressView; @@ -701,6 +701,8 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No views[6] = new LoginActivityPasswordView(context); views[7] = new LoginActivityRecoverView(context); views[8] = new LoginActivityResetWaitView(context); + views[9] = new LoginActivityNewPasswordView(context, 0); + views[10] = new LoginActivityNewPasswordView(context, 1); for (int a = 0; a < views.length; a++) { views[a].setVisibility(a == 0 ? View.VISIBLE : View.GONE); @@ -720,7 +722,14 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No } } else if (currentViewNum == 6) { LoginActivityPasswordView view = (LoginActivityPasswordView) views[6]; - if (view.passwordType == 0 || view.current_salt1 == null || view.current_salt2 == null) { + if (view.currentPassword == null) { + currentViewNum = 0; + savedInstanceState = null; + clearCurrentState(); + } + } else if (currentViewNum == 7) { + LoginActivityRecoverView view = (LoginActivityRecoverView) views[7]; + if (view.passwordString == null) { currentViewNum = 0; savedInstanceState = null; clearCurrentState(); @@ -1127,6 +1136,12 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No } } else if (currentViewNum == 5) { ((LoginActivityRegisterView) views[currentViewNum]).wrongNumber.callOnClick(); + } else if (currentViewNum == 9) { + views[currentViewNum].onBackPressed(true); + setPage(7, true, null, true); + } else if (currentViewNum == 10 || currentViewNum == 11) { + views[currentViewNum].onBackPressed(true); + setPage(9, true, null, true); } return false; } @@ -1464,7 +1479,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No } public void setPage(int page, boolean animated, Bundle params, boolean back) { - final boolean needFloatingButton = page == 0 || page == 5 || page == 6 || page == 7; + final boolean needFloatingButton = page == 0 || page == 5 || page == 6 || page == 7 || page == 9 || page == 10 || page == 11; if (needFloatingButton) { if (page == 0) { checkPermissions = true; @@ -1497,7 +1512,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No newView.setX(back ? -AndroidUtilities.displaySize.x : AndroidUtilities.displaySize.x); newView.setVisibility(View.VISIBLE); - pagesAnimation = new AnimatorSet(); + AnimatorSet pagesAnimation = new AnimatorSet(); pagesAnimation.addListener(new AnimatorListenerAdapter() { @Override public void onAnimationEnd(Animator animation) { @@ -3066,22 +3081,12 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No return; } Bundle bundle = new Bundle(); - if (password.current_algo instanceof TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) { - TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) password.current_algo; - bundle.putString("current_salt1", Utilities.bytesToHex(algo.salt1)); - bundle.putString("current_salt2", Utilities.bytesToHex(algo.salt2)); - bundle.putString("current_p", Utilities.bytesToHex(algo.p)); - bundle.putInt("current_g", algo.g); - bundle.putString("current_srp_B", Utilities.bytesToHex(password.srp_B)); - bundle.putLong("current_srp_id", password.srp_id); - bundle.putInt("passwordType", 1); - } - bundle.putString("hint", password.hint != null ? password.hint : ""); - bundle.putString("email_unconfirmed_pattern", password.email_unconfirmed_pattern != null ? password.email_unconfirmed_pattern : ""); + SerializedData data = new SerializedData(password.getObjectSize()); + password.serializeToStream(data); + bundle.putString("password", Utilities.bytesToHex(data.toByteArray())); bundle.putString("phoneFormated", requestPhone); bundle.putString("phoneHash", phoneHash); bundle.putString("code", req.phone_code); - bundle.putInt("has_recovery", password.has_recovery ? 1 : 0); setPage(6, true, bundle, false); } else { needShowAlert(LocaleController.getString("NekoX", R.string.NekoX), error1.text); @@ -3291,14 +3296,8 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No private Bundle currentParams; private boolean nextPressed; - private byte[] current_salt1; - private byte[] current_salt2; - private int current_g; - private long current_srp_id; - private byte[] current_srp_B; - private byte[] current_p; - private int passwordType; - private boolean has_recovery; + private TLRPC.TL_account_password currentPassword; + private String passwordString; private String requestPhone; private String phoneHash; private String phoneCode; @@ -3353,7 +3352,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No if (doneProgressView.getTag() != null) { return; } - if (has_recovery) { + if (currentPassword.has_recovery) { needShowProgress(0); TLRPC.TL_auth_requestPasswordRecovery req = new TLRPC.TL_auth_requestPasswordRecovery(); ConnectionsManager.getInstance(currentAccount).sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { @@ -3366,6 +3365,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No builder.setPositiveButton(LocaleController.getString("OK", R.string.OK), (dialogInterface, i) -> { Bundle bundle = new Bundle(); bundle.putString("email_unconfirmed_pattern", res.email_pattern); + bundle.putString("password", passwordString); setPage(7, true, bundle, false); }); Dialog dialog = showDialog(builder.create()); @@ -3481,21 +3481,18 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No resetAccountText.setVisibility(GONE); codeField.setText(""); currentParams = params; - current_salt1 = Utilities.hexToBytes(currentParams.getString("current_salt1")); - current_salt2 = Utilities.hexToBytes(currentParams.getString("current_salt2")); - current_p = Utilities.hexToBytes(currentParams.getString("current_p")); - current_g = currentParams.getInt("current_g"); - current_srp_B = Utilities.hexToBytes(currentParams.getString("current_srp_B")); - current_srp_id = currentParams.getLong("current_srp_id"); - passwordType = currentParams.getInt("passwordType"); - String hint = currentParams.getString("hint"); - has_recovery = currentParams.getInt("has_recovery") == 1; + passwordString = currentParams.getString("password"); + if (passwordString != null) { + SerializedData data = new SerializedData(Utilities.hexToBytes(passwordString)); + currentPassword = TLRPC.TL_account_password.TLdeserialize(data, data.readInt32(false), false); + } + requestPhone = params.getString("phoneFormated"); phoneHash = params.getString("phoneHash"); phoneCode = params.getString("code"); - if (hint != null && hint.length() > 0) { - codeField.setHint(hint); + if (currentPassword != null && !TextUtils.isEmpty(currentPassword.hint)) { + codeField.setHint(currentPassword.hint); } else { codeField.setHint(LocaleController.getString("LoginPassword", R.string.LoginPassword)); } @@ -3528,18 +3525,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No Utilities.globalQueue.postRunnable(() -> { final byte[] x_bytes; - TLRPC.PasswordKdfAlgo current_algo; - if (passwordType == 1) { - TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = new TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow(); - algo.salt1 = current_salt1; - algo.salt2 = current_salt2; - algo.g = current_g; - algo.p = current_p; - current_algo = algo; - } else { - current_algo = new TLRPC.TL_passwordKdfAlgoUnknown(); - } - + TLRPC.PasswordKdfAlgo current_algo = currentPassword.current_algo; if (current_algo instanceof TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) { byte[] passwordBytes = AndroidUtilities.getStringBytes(oldPassword); TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) current_algo; @@ -3557,9 +3543,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No TLRPC.TL_account_getPassword getPasswordReq = new TLRPC.TL_account_getPassword(); ConnectionsManager.getInstance(currentAccount).sendRequest(getPasswordReq, (response2, error2) -> AndroidUtilities.runOnUIThread(() -> { if (error2 == null) { - TLRPC.TL_account_password password = (TLRPC.TL_account_password) response2; - current_srp_B = password.srp_B; - current_srp_id = password.srp_id; + currentPassword = (TLRPC.TL_account_password) response2; onNextPressed(); } }), ConnectionsManager.RequestFlagWithoutLogin); @@ -3593,12 +3577,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No }); if (current_algo instanceof TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) { - TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) current_algo; - algo.salt1 = current_salt1; - algo.salt2 = current_salt2; - algo.g = current_g; - algo.p = current_p; - req.password = SRPHelper.startCheck(x_bytes, current_srp_id, current_srp_B, algo); + req.password = SRPHelper.startCheck(x_bytes, currentPassword.srp_id, currentPassword.srp_B, (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) current_algo); if (req.password == null) { TLRPC.TL_error error = new TLRPC.TL_error(); error.text = "PASSWORD_HASH_INVALID"; @@ -3608,7 +3587,6 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No ConnectionsManager.getInstance(currentAccount).sendRequest(req, requestDelegate, ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); } }); - } @Override @@ -3834,6 +3812,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No private TextView cancelButton; private Bundle currentParams; + private String passwordString; private boolean nextPressed; public LoginActivityRecoverView(Context context) { @@ -3916,6 +3895,7 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No } codeField.setText(""); currentParams = params; + passwordString = currentParams.getString("password"); String email_unconfirmed_pattern = currentParams.getString("email_unconfirmed_pattern"); cancelButton.setText(LocaleController.formatString("RestoreEmailTrouble", R.string.RestoreEmailTrouble, email_unconfirmed_pattern)); @@ -3940,36 +3920,25 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No return; } - String oldPassword = codeField.getText().toString(); - if (oldPassword.length() == 0) { - onPasscodeError(false); - return; - } - nextPressed = true; - String code = codeField.getText().toString(); if (code.length() == 0) { onPasscodeError(false); return; } + nextPressed = true; needShowProgress(0); - TLRPC.TL_auth_recoverPassword req = new TLRPC.TL_auth_recoverPassword(); + TLRPC.TL_auth_checkRecoveryPassword req = new TLRPC.TL_auth_checkRecoveryPassword(); req.code = code; ConnectionsManager.getInstance(currentAccount).sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { needHideProgress(false); nextPressed = false; - if (response instanceof TLRPC.TL_auth_authorization) { - AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); - builder.setPositiveButton(LocaleController.getString("OK", R.string.OK), (dialogInterface, i) -> onAuthSuccess((TLRPC.TL_auth_authorization) response)); - builder.setMessage(LocaleController.getString("PasswordReset", R.string.PasswordReset)); - builder.setTitle(LocaleController.getString("NekoX", R.string.NekoX)); - Dialog dialog = showDialog(builder.create()); - if (dialog != null) { - dialog.setCanceledOnTouchOutside(false); - dialog.setCancelable(false); - } + if (response instanceof TLRPC.TL_boolTrue) { + Bundle params = new Bundle(); + params.putString("emailCode", code); + params.putString("password", passwordString); + setPage(9, true, params, false); } else { - if (error.text.startsWith("CODE_INVALID")) { + if (error == null || error.text.startsWith("CODE_INVALID")) { onPasscodeError(true); } else if (error.text.startsWith("FLOOD_WAIT")) { int time = Utilities.parseInt(error.text); @@ -4030,6 +3999,292 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No } } + public class LoginActivityNewPasswordView extends SlideView { + + private EditTextBoldCursor[] codeField; + private TextView confirmTextView; + private TextView cancelButton; + + private String emailCode; + private String newPassword; + private String passwordString; + private TLRPC.TL_account_password currentPassword; + private Bundle currentParams; + private boolean nextPressed; + private int currentStage; + + public LoginActivityNewPasswordView(Context context, int stage) { + super(context); + currentStage = stage; + + setOrientation(VERTICAL); + + codeField = new EditTextBoldCursor[stage == 1 ? 1 : 2]; + + confirmTextView = new TextView(context); + confirmTextView.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteGrayText6)); + confirmTextView.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); + confirmTextView.setGravity((LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT)); + confirmTextView.setLineSpacing(AndroidUtilities.dp(2), 1.0f); + addView(confirmTextView, LayoutHelper.createLinear(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, (LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT))); + + for (int a = 0; a < codeField.length; a++) { + codeField[a] = new EditTextBoldCursor(context); + codeField[a].setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlackText)); + codeField[a].setCursorColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlackText)); + codeField[a].setCursorSize(AndroidUtilities.dp(20)); + codeField[a].setCursorWidth(1.5f); + codeField[a].setHintTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteHintText)); + codeField[a].setBackgroundDrawable(Theme.createEditTextDrawable(context, false)); + codeField[a].setImeOptions(EditorInfo.IME_ACTION_NEXT | EditorInfo.IME_FLAG_NO_EXTRACT_UI); + codeField[a].setTextSize(TypedValue.COMPLEX_UNIT_DIP, 18); + codeField[a].setMaxLines(1); + codeField[a].setPadding(0, 0, 0, 0); + if (stage == 0) { + codeField[a].setInputType(InputType.TYPE_CLASS_TEXT | InputType.TYPE_TEXT_VARIATION_PASSWORD); + } + codeField[a].setTransformationMethod(PasswordTransformationMethod.getInstance()); + codeField[a].setTypeface(Typeface.DEFAULT); + codeField[a].setGravity(LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT); + addView(codeField[a], LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, 36, Gravity.CENTER_HORIZONTAL, 0, a == 0 ? 20 : 30, 0, 0)); + int num = a; + codeField[a].setOnEditorActionListener((textView, i, keyEvent) -> { + if (num == 0 && codeField.length == 2) { + codeField[1].requestFocus(); + return true; + } else if (i == EditorInfo.IME_ACTION_NEXT) { + onNextPressed(); + return true; + } + return false; + }); + + if (stage == 0) { + if (a == 0) { + codeField[a].setHint(LocaleController.getString("PleaseEnterNewFirstPasswordHint", R.string.PleaseEnterNewFirstPasswordHint)); + } else { + codeField[a].setHint(LocaleController.getString("PleaseEnterNewSecondPasswordHint", R.string.PleaseEnterNewSecondPasswordHint)); + } + } else { + codeField[a].setHint(LocaleController.getString("PasswordHintPlaceholder", R.string.PasswordHintPlaceholder)); + } + } + + if (stage == 0) { + confirmTextView.setText(LocaleController.getString("PleaseEnterNewFirstPasswordLogin", R.string.PleaseEnterNewFirstPasswordLogin)); + } else { + confirmTextView.setText(LocaleController.getString("PasswordHintTextLogin", R.string.PasswordHintTextLogin)); + } + + cancelButton = new TextView(context); + cancelButton.setGravity((LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.BOTTOM); + cancelButton.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlueText4)); + cancelButton.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); + cancelButton.setLineSpacing(AndroidUtilities.dp(2), 1.0f); + cancelButton.setPadding(0, AndroidUtilities.dp(14), 0, 0); + cancelButton.setText(LocaleController.getString("YourEmailSkip", R.string.YourEmailSkip)); + addView(cancelButton, LayoutHelper.createLinear(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.BOTTOM | (LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT), 0, 6, 0, 14)); + cancelButton.setOnClickListener(view -> { + if (currentStage == 0) { + recoverPassword(null, null); + } else { + recoverPassword(newPassword, null); + } + }); + } + + @Override + public boolean needBackButton() { + return true; + } + + @Override + public void onCancelPressed() { + nextPressed = false; + } + + @Override + public String getHeaderName() { + return LocaleController.getString("NewPassword", R.string.NewPassword); + } + + @Override + public void setParams(Bundle params, boolean restore) { + if (params == null) { + return; + } + for (int a = 0; a < codeField.length; a++) { + codeField[a].setText(""); + } + currentParams = params; + emailCode = currentParams.getString("emailCode"); + passwordString = currentParams.getString("password"); + if (passwordString != null) { + SerializedData data = new SerializedData(Utilities.hexToBytes(passwordString)); + currentPassword = TLRPC.TL_account_password.TLdeserialize(data, data.readInt32(false), false); + TwoStepVerificationActivity.initPasswordNewAlgo(currentPassword); + } + newPassword = currentParams.getString("new_password"); + + AndroidUtilities.showKeyboard(codeField[0]); + codeField[0].requestFocus(); + } + + private void onPasscodeError(boolean clear, int num) { + if (getParentActivity() == null) { + return; + } + Vibrator v = (Vibrator) getParentActivity().getSystemService(Context.VIBRATOR_SERVICE); + if (v != null) { + v.vibrate(200); + } + AndroidUtilities.shakeView(codeField[num], 2, 0); + } + + @Override + public void onNextPressed() { + if (nextPressed) { + return; + } + + String code = codeField[0].getText().toString(); + if (code.length() == 0) { + onPasscodeError(false, 0); + return; + } + if (currentStage == 0) { + if (!code.equals(codeField[1].getText().toString())) { + onPasscodeError(false, 1); + return; + } + Bundle params = new Bundle(); + params.putString("emailCode", emailCode); + params.putString("new_password", code); + params.putString("password", passwordString); + setPage(10, true, params, false); + } else { + nextPressed = true; + needShowProgress(0); + recoverPassword(newPassword, code); + } + } + + private void recoverPassword(String password, String hint) { + TLRPC.TL_auth_recoverPassword req = new TLRPC.TL_auth_recoverPassword(); + req.code = emailCode; + if (!TextUtils.isEmpty(password)) { + req.flags |= 1; + req.new_settings = new TLRPC.TL_account_passwordInputSettings(); + req.new_settings.flags |= 1; + req.new_settings.hint = hint != null ? hint : ""; + req.new_settings.new_algo = currentPassword.new_algo; + } + Utilities.globalQueue.postRunnable(() -> { + byte[] newPasswordBytes; + if (password != null) { + newPasswordBytes = AndroidUtilities.getStringBytes(password); + } else { + newPasswordBytes = null; + } + + RequestDelegate requestDelegate = (response, error) -> AndroidUtilities.runOnUIThread(() -> { + if (error != null && ("SRP_ID_INVALID".equals(error.text) || "NEW_SALT_INVALID".equals(error.text))) { + TLRPC.TL_account_getPassword getPasswordReq = new TLRPC.TL_account_getPassword(); + ConnectionsManager.getInstance(currentAccount).sendRequest(getPasswordReq, (response2, error2) -> AndroidUtilities.runOnUIThread(() -> { + if (error2 == null) { + currentPassword = (TLRPC.TL_account_password) response2; + TwoStepVerificationActivity.initPasswordNewAlgo(currentPassword); + recoverPassword(password, hint); + } + }), ConnectionsManager.RequestFlagWithoutLogin); + return; + } + needHideProgress(false); + if (response instanceof TLRPC.auth_Authorization) { + AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); + builder.setPositiveButton(LocaleController.getString("OK", R.string.OK), (dialogInterface, i) -> onAuthSuccess((TLRPC.TL_auth_authorization) response)); + if (TextUtils.isEmpty(password)) { + builder.setMessage(LocaleController.getString("PasswordReset", R.string.PasswordReset)); + } else { + builder.setMessage(LocaleController.getString("YourPasswordChangedSuccessText", R.string.YourPasswordChangedSuccessText)); + } + builder.setTitle(LocaleController.getString("TwoStepVerificationTitle", R.string.TwoStepVerificationTitle)); + Dialog dialog = showDialog(builder.create()); + if (dialog != null) { + dialog.setCanceledOnTouchOutside(false); + dialog.setCancelable(false); + } + } else if (error != null) { + nextPressed = false; + if (error.text.startsWith("FLOOD_WAIT")) { + int time = Utilities.parseInt(error.text); + String timeString; + if (time < 60) { + timeString = LocaleController.formatPluralString("Seconds", time); + } else { + timeString = LocaleController.formatPluralString("Minutes", time / 60); + } + needShowAlert(LocaleController.getString("AppName", R.string.AppName), LocaleController.formatString("FloodWaitTime", R.string.FloodWaitTime, timeString)); + } else { + needShowAlert(LocaleController.getString("AppName", R.string.AppName), error.text); + } + } + }); + + if (currentPassword.new_algo instanceof TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) { + if (password != null) { + TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) currentPassword.new_algo; + req.new_settings.new_password_hash = SRPHelper.getVBytes(newPasswordBytes, algo); + if (req.new_settings.new_password_hash == null) { + TLRPC.TL_error error = new TLRPC.TL_error(); + error.text = "ALGO_INVALID"; + requestDelegate.run(null, error); + } + } + ConnectionsManager.getInstance(currentAccount).sendRequest(req, requestDelegate, ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); + } else { + TLRPC.TL_error error = new TLRPC.TL_error(); + error.text = "PASSWORD_HASH_INVALID"; + requestDelegate.run(null, error); + } + }); + } + + @Override + public boolean onBackPressed(boolean force) { + needHideProgress(true); + currentParams = null; + nextPressed = false; + return true; + } + + @Override + public void onShow() { + super.onShow(); + AndroidUtilities.runOnUIThread(() -> { + if (codeField != null) { + codeField[0].requestFocus(); + codeField[0].setSelection(codeField[0].length()); + } + }, 100); + } + + @Override + public void saveStateParams(Bundle bundle) { + if (currentParams != null) { + bundle.putBundle("recoveryview_params" + currentStage, currentParams); + } + } + + @Override + public void restoreStateParams(Bundle bundle) { + currentParams = bundle.getBundle("recoveryview_params" + currentStage); + if (currentParams != null) { + setParams(currentParams, true); + } + } + } + public class LoginActivityRegisterView extends SlideView implements ImageUpdater.ImageUpdaterDelegate { private EditTextBoldCursor firstNameField; @@ -4705,6 +4960,21 @@ public class LoginActivity extends BaseFragment implements NotificationCenter.No arrayList.add(new ThemeDescription(smsView4.blueImageView, 0, null, null, null, descriptionDelegate, Theme.key_chats_actionBackground)); + for (int a = 0; a < 2; a++) { + if (views[9 + a] == null) { + continue; + } + LoginActivityNewPasswordView view = (LoginActivityNewPasswordView) views[9 + a]; + arrayList.add(new ThemeDescription(view.confirmTextView, ThemeDescription.FLAG_TEXTCOLOR, null, null, null, null, Theme.key_windowBackgroundWhiteGrayText6)); + for (int b = 0; b < view.codeField.length; b++) { + arrayList.add(new ThemeDescription(view.codeField[b], ThemeDescription.FLAG_TEXTCOLOR, null, null, null, null, Theme.key_windowBackgroundWhiteBlackText)); + arrayList.add(new ThemeDescription(view.codeField[b], ThemeDescription.FLAG_HINTTEXTCOLOR, null, null, null, null, Theme.key_windowBackgroundWhiteHintText)); + arrayList.add(new ThemeDescription(view.codeField[b], ThemeDescription.FLAG_BACKGROUNDFILTER, null, null, null, null, Theme.key_windowBackgroundWhiteInputField)); + arrayList.add(new ThemeDescription(view.codeField[b], ThemeDescription.FLAG_BACKGROUNDFILTER | ThemeDescription.FLAG_DRAWABLESELECTEDSTATE, null, null, null, null, Theme.key_windowBackgroundWhiteInputFieldActivated)); + } + arrayList.add(new ThemeDescription(view.cancelButton, ThemeDescription.FLAG_TEXTCOLOR, null, null, null, null, Theme.key_windowBackgroundWhiteBlueText4)); + } + return arrayList; } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/MessageEnterTransitionContainer.java b/TMessagesProj/src/main/java/org/telegram/ui/MessageEnterTransitionContainer.java new file mode 100644 index 000000000..6358a8e65 --- /dev/null +++ b/TMessagesProj/src/main/java/org/telegram/ui/MessageEnterTransitionContainer.java @@ -0,0 +1,63 @@ +package org.telegram.ui; + +import android.annotation.SuppressLint; +import android.content.Context; +import android.graphics.Canvas; +import android.view.View; + +import com.google.android.exoplayer2.util.Log; + +import org.telegram.messenger.NotificationCenter; + +import java.util.ArrayList; + +@SuppressLint("ViewConstructor") +public class MessageEnterTransitionContainer extends View { + + private ArrayList transitions = new ArrayList<>(); + private final int currentAccount; + + Runnable hideRunnable = () -> setVisibility(View.GONE); + + public MessageEnterTransitionContainer(Context context, int currentAccount) { + super(context); + this.currentAccount = currentAccount; + } + + public interface Transition { + void onDraw(Canvas canvas); + } + + void addTransition(Transition transition) { + transitions.add(transition); + checkVisibility(); + } + + void removeTransition(Transition transition) { + transitions.remove(transition); + checkVisibility(); + } + + long time; + @Override + protected void onDraw(Canvas canvas) { + if (transitions.isEmpty()) { + return; + } + long currentTime = System.currentTimeMillis(); + time = currentTime; + for (int i = 0; i < transitions.size(); i++) { + transitions.get(i).onDraw(canvas); + } + } + + private void checkVisibility() { + if (transitions.isEmpty() && getVisibility() != View.GONE) { + NotificationCenter.getInstance(currentAccount).removeDelayed(hideRunnable); + NotificationCenter.getInstance(currentAccount).doOnIdle(hideRunnable); + } else if (!transitions.isEmpty() && getVisibility() != View.VISIBLE) { + NotificationCenter.getInstance(currentAccount).removeDelayed(hideRunnable); + setVisibility(View.VISIBLE); + } + } +} diff --git a/TMessagesProj/src/main/java/org/telegram/ui/PhotoViewer.java b/TMessagesProj/src/main/java/org/telegram/ui/PhotoViewer.java index b2c61484e..c9e06290e 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/PhotoViewer.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/PhotoViewer.java @@ -383,6 +383,7 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat private boolean padImageForHorizontalInsets; private boolean doneButtonPressed; + boolean keyboardAnimationEnabled; private boolean pausedOnPause = false; @@ -1901,6 +1902,7 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat private Paint paint = new Paint(); private boolean ignoreLayout; private boolean captionAbove; + AdjustPanLayoutHelper adjustPanLayoutHelper = new AdjustPanLayoutHelper(this) { @Override protected void onPanTranslationUpdate(float y, float progress, boolean keyboardVisible) { @@ -2016,7 +2018,7 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat @Override protected boolean heightAnimationEnabled() { - return !captionEditText.isPopupShowing(); + return !captionEditText.isPopupShowing() && keyboardAnimationEnabled; } }; @@ -2897,14 +2899,31 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat final float translationY = captionTextViewSwitcher.getTranslationY(); boolean buttonVisible = scrollY == 0 && translationY == 0; + boolean enalrgeIconVisible = scrollY == 0 && translationY == 0; if (!buttonVisible) { final int progressBottom = photoProgressViews[0].getY() + photoProgressViews[0].size; final int topMargin = (isStatusBarVisible() ? AndroidUtilities.statusBarHeight : 0) + ActionBar.getCurrentActionBarHeight(); final int captionTop = captionContainer.getTop() + (int) translationY - scrollY + topMargin - AndroidUtilities.dp(12); + final int enlargeIconTop = (int) fullscreenButton[0].getY(); + enalrgeIconVisible = captionTop > enlargeIconTop + AndroidUtilities.dp(32); buttonVisible = captionTop > progressBottom; } + if (allowShowFullscreenButton) { + if (fullscreenButton[0].getTag() != null && ((Integer) fullscreenButton[0].getTag()) == 3 && enalrgeIconVisible) { + fullscreenButton[0].setTag(2); + fullscreenButton[0].animate().alpha(1).setDuration(150).setListener(new AnimatorListenerAdapter() { + @Override + public void onAnimationEnd(Animator animation) { + fullscreenButton[0].setTag(null); + } + }).start(); + } else if (fullscreenButton[0].getTag() == null && !enalrgeIconVisible) { + fullscreenButton[0].setTag(3); + fullscreenButton[0].animate().alpha(0).setListener(null).setDuration(150).start(); + } + } photoProgressViews[0].setIndexedAlpha(2, buttonVisible ? 1f : 0f, true); } } @@ -3430,6 +3449,7 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat } public void setParentActivity(final Activity activity) { + Theme.createChatResources(activity, false); currentAccount = UserConfig.selectedAccount; centerImage.setCurrentAccount(currentAccount); leftImage.setCurrentAccount(currentAccount); @@ -5622,6 +5642,9 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat @Override public boolean onTouchEvent(MotionEvent event) { + if (bottomTouchEnabled && event.getAction() == MotionEvent.ACTION_DOWN) { + keyboardAnimationEnabled = true; + } return !bottomTouchEnabled && super.onTouchEvent(event); } @@ -6465,6 +6488,7 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat if (!windowView.isFocusable()) { makeFocusable(); } + keyboardAnimationEnabled = true; selectedPhotosListView.setEnabled(false); photosCounterView.setRotationX(0.0f); isPhotosListViewVisible = false; @@ -7219,7 +7243,7 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat final int duration = currentMessageObject.getDuration(); final String name = currentMessageObject.getFileName(); if (!TextUtils.isEmpty(name)) { - if (duration >= 20 * 60) { + if (duration >= 10 * 60) { if (currentMessageObject.forceSeekTo < 0) { SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("media_saved_pos", Activity.MODE_PRIVATE); float pos = preferences.getFloat(name, -1); @@ -13422,7 +13446,7 @@ public class PhotoViewer implements NotificationCenter.NotificationCenterDelegat if (isActionBarVisible) { if (currentScale <= 1.0001f) { - if (!allowShowFullscreenButton) { + if (!allowShowFullscreenButton && fullscreenButton[0].getTag() == null) { fullscreenButton[0].animate().alpha(1.0f).setDuration(120).setListener(new AnimatorListenerAdapter() { @Override public void onAnimationEnd(Animator animation) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/PrivacyControlActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/PrivacyControlActivity.java index b98baaa65..657683e43 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/PrivacyControlActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/PrivacyControlActivity.java @@ -50,6 +50,7 @@ import org.telegram.ui.Components.BackgroundGradientDrawable; import org.telegram.ui.Components.CombinedDrawable; import org.telegram.ui.Components.HintView; import org.telegram.ui.Components.LayoutHelper; +import org.telegram.ui.Components.MotionBackgroundDrawable; import org.telegram.ui.Components.RecyclerListView; import java.util.ArrayList; @@ -185,7 +186,7 @@ public class PrivacyControlActivity extends BaseFragment implements Notification } backgroundDrawable = newDrawable; } - if (backgroundDrawable instanceof ColorDrawable || backgroundDrawable instanceof GradientDrawable) { + if (backgroundDrawable instanceof ColorDrawable || backgroundDrawable instanceof GradientDrawable || backgroundDrawable instanceof MotionBackgroundDrawable) { backgroundDrawable.setBounds(0, 0, getMeasuredWidth(), getMeasuredHeight()); if (backgroundDrawable instanceof BackgroundGradientDrawable) { backgroundGradientDisposable = ((BackgroundGradientDrawable) backgroundDrawable).drawExactBoundsSize(canvas, this); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/PrivacySettingsActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/PrivacySettingsActivity.java index 41d3786f2..8b73386c5 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/PrivacySettingsActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/PrivacySettingsActivity.java @@ -41,7 +41,6 @@ import org.telegram.ui.Cells.TextCheckCell; import org.telegram.ui.Cells.TextInfoPrivacyCell; import org.telegram.ui.Cells.TextSettingsCell; import org.telegram.ui.Components.AlertsCreator; -import org.telegram.ui.Components.Bulletin; import org.telegram.ui.Components.BulletinFactory; import org.telegram.ui.Components.LayoutHelper; import org.telegram.ui.Components.RecyclerListView; @@ -506,7 +505,9 @@ public class PrivacySettingsActivity extends BaseFragment implements Notificatio listAdapter.notifyItemChanged(passwordRow); } } else { + currentPassword = null; loadPasswordSettings(); + updateRows(); } } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ProfileActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ProfileActivity.java index ffe92e141..c65842a43 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ProfileActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ProfileActivity.java @@ -126,6 +126,7 @@ import org.telegram.ui.Cells.GraySectionCell; import org.telegram.ui.Cells.HeaderCell; import org.telegram.ui.Cells.NotificationsCheckCell; import org.telegram.ui.Cells.SettingsSearchCell; +import org.telegram.ui.Cells.SettingsSuggestionCell; import org.telegram.ui.Cells.ShadowSectionCell; import org.telegram.ui.Cells.TextCell; import org.telegram.ui.Cells.TextDetailCell; @@ -138,6 +139,7 @@ import org.telegram.ui.Components.AudioPlayerAlert; import org.telegram.ui.Components.AvatarDrawable; import org.telegram.ui.Components.BackupImageView; import org.telegram.ui.Components.BulletinFactory; +import org.telegram.ui.Components.ChatAvatarContainer; import org.telegram.ui.Components.CombinedDrawable; import org.telegram.ui.Components.CrossfadeDrawable; import org.telegram.ui.Components.CubicBezierInterpolator; @@ -370,6 +372,10 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. private int numberRow; private int setUsernameRow; private int bioRow; + private int phoneSuggestionSectionRow; + private int phoneSuggestionRow; + private int passwordSuggestionSectionRow; + private int passwordSuggestionRow; private int settingsSectionRow; private int settingsSectionRow2; private int notificationRow; @@ -438,6 +444,8 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. PinchToZoomHelper pinchToZoomHelper; + private View transitionOnlineText; + private final Property HEADER_SHADOW = new AnimationProperties.FloatProperty("headerShadow") { @Override public void setValue(ProfileActivity object, float value) { @@ -1342,6 +1350,7 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. } getNotificationCenter().addObserver(this, NotificationCenter.contactsDidLoad); + getNotificationCenter().addObserver(this, NotificationCenter.newSuggestionsAvailable); getNotificationCenter().addObserver(this, NotificationCenter.encryptedChatCreated); getNotificationCenter().addObserver(this, NotificationCenter.encryptedChatUpdated); getNotificationCenter().addObserver(this, NotificationCenter.blockedUsersDidLoad); @@ -1465,6 +1474,7 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. avatarsViewPager.onDestroy(); } if (user_id != 0) { + getNotificationCenter().removeObserver(this, NotificationCenter.newSuggestionsAvailable); getNotificationCenter().removeObserver(this, NotificationCenter.contactsDidLoad); getNotificationCenter().removeObserver(this, NotificationCenter.encryptedChatCreated); getNotificationCenter().removeObserver(this, NotificationCenter.encryptedChatUpdated); @@ -3022,7 +3032,7 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. LocaleController.getString("DebugMenuClearMediaCache", R.string.DebugMenuClearMediaCache), LocaleController.getString("DebugMenuCallSettings", R.string.DebugMenuCallSettings), null, - BuildVars.DEBUG_PRIVATE_VERSION ? "Check for app updates" : null, + BuildVars.DEBUG_PRIVATE_VERSION || AndroidUtilities.isStandaloneApp() ? LocaleController.getString("DebugMenuCheckAppUpdate", R.string.DebugMenuCheckAppUpdate) : null, LocaleController.getString("DebugMenuReadAllDialogs", R.string.DebugMenuReadAllDialogs), SharedConfig.pauseMusicOnRecord ? LocaleController.getString("DebugMenuDisablePauseMusic", R.string.DebugMenuDisablePauseMusic) : LocaleController.getString("DebugMenuEnablePauseMusic", R.string.DebugMenuEnablePauseMusic), BuildVars.DEBUG_VERSION && !AndroidUtilities.isTablet() && Build.VERSION.SDK_INT >= 23 ? (SharedConfig.smoothKeyboard ? LocaleController.getString("DebugMenuDisableSmoothKeyboard", R.string.DebugMenuDisableSmoothKeyboard) : LocaleController.getString("DebugMenuEnableSmoothKeyboard", R.string.DebugMenuEnableSmoothKeyboard)) : null, @@ -3030,6 +3040,7 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. Build.VERSION.SDK_INT >= 21 ? (SharedConfig.noStatusBar ? "Show status bar background" : "Hide status bar background") : null, "Scan accounts", BuildVars.DEBUG_PRIVATE_VERSION ? "Clean app update" : null, + BuildVars.DEBUG_PRIVATE_VERSION ? "Reset suggestions" : null, }; builder.setItems(items, (dialog, which) -> { if (which == 0) { @@ -3114,6 +3125,11 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. SharedConfig.pendingAppUpdate = null; SharedConfig.saveConfig(); NotificationCenter.getGlobalInstance().postNotificationName(NotificationCenter.appUpdateAvailable); + } else if (which == 16) { + Set suggestions = getMessagesController().pendingSuggestions; + suggestions.add("VALIDATE_PHONE_NUMBER"); + suggestions.add("VALIDATE_PASSWORD"); + getNotificationCenter().postNotificationName(NotificationCenter.newSuggestionsAvailable); } }); builder.setNegativeButton(LocaleController.getString("Cancel", R.string.Cancel), null); @@ -3281,7 +3297,21 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. frameLayout.addView(topView); avatarContainer = new FrameLayout(context); - avatarContainer2 = new FrameLayout(context); + avatarContainer2 = new FrameLayout(context) { + @Override + protected void dispatchDraw(Canvas canvas) { + super.dispatchDraw(canvas); + if (transitionOnlineText != null) { + canvas.save(); + canvas.translate(onlineTextView[0].getX(), onlineTextView[0].getY()); + canvas.saveLayerAlpha(0 ,0, transitionOnlineText.getMeasuredWidth(), transitionOnlineText.getMeasuredHeight(), (int) (255 * (1f - animationProgress)), Canvas.ALL_SAVE_FLAG); + transitionOnlineText.draw(canvas); + canvas.restore(); + canvas.restore(); + invalidate(); + } + } + }; AndroidUtilities.updateViewVisibilityAnimated(avatarContainer2, true, 1f, false); frameLayout.addView(avatarContainer2, LayoutHelper.createFrame(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT)); avatarContainer.setPivotX(0); @@ -3302,6 +3332,7 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. } } }; + avatarImage.getImageReceiver().setAllowDecodeSingleFrame(true); avatarImage.setRoundRadius(AndroidUtilities.dp(21)); avatarImage.setPivotX(0); avatarImage.setPivotY(0); @@ -4979,6 +5010,13 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. listAdapter.notifyItemInserted(emptyRow); } } + } else if (id == NotificationCenter.newSuggestionsAvailable) { + int prevRow1 = passwordSuggestionRow; + int prevRow2 = phoneSuggestionRow; + updateRowsIds(); + if (prevRow1 != passwordSuggestionRow || prevRow2 != phoneSuggestionRow) { + listAdapter.notifyDataSetChanged(); + } } } @@ -5315,9 +5353,7 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. animators.add(ObjectAnimator.ofFloat(idTextView, View.ALPHA, 0.0f, 1.0f)); } for (int a = 0; a < 2; a++) { - onlineTextView[a].setAlpha(a == 0 ? 1.0f : 0.0f); nameTextView[a].setAlpha(a == 0 ? 1.0f : 0.0f); - animators.add(ObjectAnimator.ofFloat(onlineTextView[a], View.ALPHA, a == 0 ? 0.0f : 1.0f)); animators.add(ObjectAnimator.ofFloat(nameTextView[a], View.ALPHA, a == 0 ? 0.0f : 1.0f)); } if (timeItem.getTag() != null) { @@ -5341,6 +5377,26 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. editItem.setAlpha(0.0f); animators.add(ObjectAnimator.ofFloat(editItem, View.ALPHA, 1.0f)); } + + boolean onlineTextCrosafade = false; + BaseFragment previousFragment = parentLayout.fragmentsStack.size() > 1 ? parentLayout.fragmentsStack.get(parentLayout.fragmentsStack.size() - 2) : null; + if (previousFragment instanceof ChatActivity) { + ChatAvatarContainer avatarContainer = ((ChatActivity) previousFragment).getAvatarContainer(); + if (avatarContainer.getSubtitleTextView().getLeftDrawable() != null) { + transitionOnlineText = avatarContainer.getSubtitleTextView(); + avatarContainer2.invalidate(); + onlineTextCrosafade = true; + onlineTextView[0].setAlpha(0f); + onlineTextView[1].setAlpha(0f); + animators.add(ObjectAnimator.ofFloat(onlineTextView[1], View.ALPHA, 1.0f)); + } + } + if (!onlineTextCrosafade) { + for (int a = 0; a < 2; a++) { + onlineTextView[a].setAlpha(a == 0 ? 1.0f : 0.0f); + animators.add(ObjectAnimator.ofFloat(onlineTextView[a], View.ALPHA, a == 0 ? 0.0f : 1.0f)); + } + } animatorSet.playTogether(animators); } else { initialAnimationExtraHeight = extraHeight; @@ -5352,7 +5408,6 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. animators.add(ObjectAnimator.ofFloat(writeButton, View.ALPHA, 0.0f)); } for (int a = 0; a < 2; a++) { - animators.add(ObjectAnimator.ofFloat(onlineTextView[a], View.ALPHA, a == 0 ? 1.0f : 0.0f)); animators.add(ObjectAnimator.ofFloat(nameTextView[a], View.ALPHA, a == 0 ? 1.0f : 0.0f)); } if (timeItem.getTag() != null) { @@ -5376,6 +5431,24 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. editItem.setAlpha(1.0f); animators.add(ObjectAnimator.ofFloat(editItem, View.ALPHA, 0.0f)); } + + boolean crossfadeOnlineText = false; + BaseFragment previousFragment = parentLayout.fragmentsStack.size() > 1 ? parentLayout.fragmentsStack.get(parentLayout.fragmentsStack.size() - 2) : null; + if (previousFragment instanceof ChatActivity) { + ChatAvatarContainer avatarContainer = ((ChatActivity) previousFragment).getAvatarContainer(); + if (avatarContainer.getSubtitleTextView().getLeftDrawable() != null) { + transitionOnlineText = avatarContainer.getSubtitleTextView(); + avatarContainer2.invalidate(); + crossfadeOnlineText = true; + animators.add(ObjectAnimator.ofFloat(onlineTextView[0], View.ALPHA, 0.0f)); + animators.add(ObjectAnimator.ofFloat(onlineTextView[1], View.ALPHA, 0.0f)); + } + } + if (!crossfadeOnlineText) { + for (int a = 0; a < 2; a++) { + animators.add(ObjectAnimator.ofFloat(onlineTextView[a], View.ALPHA, a == 0 ? 1.0f : 0.0f)); + } + } animatorSet.playTogether(animators); } profileTransitionInProgress = true; @@ -5401,6 +5474,8 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. avatarsViewPager.setVisibility(View.VISIBLE); idTextView.setAlpha(1.0f); } + transitionOnlineText = null; + avatarContainer2.invalidate(); profileTransitionInProgress = false; fragmentView.invalidate(); } @@ -5559,6 +5634,10 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. numberRow = -1; setUsernameRow = -1; bioRow = -1; + phoneSuggestionSectionRow = -1; + phoneSuggestionRow = -1; + passwordSuggestionSectionRow = -1; + passwordSuggestionRow = -1; settingsSectionRow = -1; settingsSectionRow2 = -1; notificationRow = -1; @@ -5640,7 +5719,19 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. numberRow = rowCount++; } bioRow = rowCount++; + settingsSectionRow = rowCount++; + + Set suggestions = getMessagesController().pendingSuggestions; + if (suggestions.contains("VALIDATE_PHONE_NUMBER")) { + phoneSuggestionRow = rowCount++; + phoneSuggestionSectionRow = rowCount++; + } + if (suggestions.contains("VALIDATE_PASSWORD")) { + passwordSuggestionRow = rowCount++; + passwordSuggestionSectionRow = rowCount++; + } + settingsSectionRow2 = rowCount++; notificationRow = rowCount++; dataRow = rowCount++; @@ -6938,6 +7029,30 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. view.setBackgroundDrawable(combinedDrawable); break; } + case 15: { + view = new SettingsSuggestionCell(mContext) { + @Override + protected void onYesClick(int type) { + getNotificationCenter().removeObserver(ProfileActivity.this, NotificationCenter.newSuggestionsAvailable); + getMessagesController().removeSuggestion(0, type == SettingsSuggestionCell.TYPE_PHONE ? "VALIDATE_PHONE_NUMBER" : "VALIDATE_PASSWORD"); + getNotificationCenter().addObserver(ProfileActivity.this, NotificationCenter.newSuggestionsAvailable); + int oldRow = type == SettingsSuggestionCell.TYPE_PHONE ? phoneSuggestionRow : passwordSuggestionRow; + updateRowsIds(); + saveScrollPosition(); + listAdapter.notifyItemRangeRemoved(oldRow, 2); + } + + @Override + protected void onNoClick(int type) { + if (type == SettingsSuggestionCell.TYPE_PHONE) { + presentFragment(new ActionIntroActivity(ActionIntroActivity.ACTION_TYPE_CHANGE_PHONE_NUMBER)); + } else { + presentFragment(new TwoStepVerificationSetupActivity(TwoStepVerificationSetupActivity.TYPE_VERIFY, null)); + } + } + }; + break; + } } if (viewType != 13) { view.setLayoutParams(new RecyclerView.LayoutParams(RecyclerView.LayoutParams.MATCH_PARENT, RecyclerView.LayoutParams.WRAP_CONTENT)); @@ -7272,6 +7387,10 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. case 12: holder.itemView.requestLayout(); break; + case 15: + SettingsSuggestionCell suggestionCell = (SettingsSuggestionCell) holder.itemView; + suggestionCell.setType(position == passwordSuggestionRow ? SettingsSuggestionCell.TYPE_PASSWORD : SettingsSuggestionCell.TYPE_PHONE); + break; } } @@ -7312,7 +7431,8 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. return 6; } else if (position == infoSectionRow || position == lastSectionRow || position == membersSectionRow || position == secretSettingsSectionRow || position == settingsSectionRow || position == devicesSectionRow || - position == helpSectionCell || position == setAvatarSectionRow) { + position == helpSectionCell || position == setAvatarSectionRow || position == passwordSuggestionSectionRow || + position == phoneSuggestionSectionRow) { return 7; } else if (position >= membersStartRow && position < membersEndRow) { return 8; @@ -7324,6 +7444,8 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. return 13; } else if (position == versionRow) { return 14; + } else if (position == passwordSuggestionRow || position == phoneSuggestionRow) { + return 15; } return 4; } @@ -7990,6 +8112,16 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. arrayList.add(new ThemeDescription(listView, 0, new Class[]{HeaderCell.class}, new String[]{"textView"}, null, null, null, Theme.key_windowBackgroundWhiteBlueHeader)); + arrayList.add(new ThemeDescription(listView, 0, new Class[]{SettingsSuggestionCell.class}, new String[]{"textView"}, null, null, null, Theme.key_windowBackgroundWhiteBlueHeader)); + arrayList.add(new ThemeDescription(listView, 0, new Class[]{SettingsSuggestionCell.class}, new String[]{"detailTextView"}, null, null, null, Theme.key_windowBackgroundWhiteGrayText2)); + arrayList.add(new ThemeDescription(listView, ThemeDescription.FLAG_LINKCOLOR, new Class[]{SettingsSuggestionCell.class}, new String[]{"detailTextView"}, null, null, null, Theme.key_windowBackgroundWhiteLinkText)); + arrayList.add(new ThemeDescription(listView, 0, new Class[]{SettingsSuggestionCell.class}, new String[]{"yesButton"}, null, null, null, Theme.key_featuredStickers_buttonText)); + arrayList.add(new ThemeDescription(listView, ThemeDescription.FLAG_USEBACKGROUNDDRAWABLE, new Class[]{SettingsSuggestionCell.class}, new String[]{"yesButton"}, null, null, null, Theme.key_featuredStickers_addButton)); + arrayList.add(new ThemeDescription(listView, ThemeDescription.FLAG_USEBACKGROUNDDRAWABLE | ThemeDescription.FLAG_DRAWABLESELECTEDSTATE, new Class[]{SettingsSuggestionCell.class}, new String[]{"yesButton"}, null, null, null, Theme.key_featuredStickers_addButtonPressed)); + arrayList.add(new ThemeDescription(listView, 0, new Class[]{SettingsSuggestionCell.class}, new String[]{"noButton"}, null, null, null, Theme.key_featuredStickers_buttonText)); + arrayList.add(new ThemeDescription(listView, ThemeDescription.FLAG_USEBACKGROUNDDRAWABLE, new Class[]{SettingsSuggestionCell.class}, new String[]{"noButton"}, null, null, null, Theme.key_featuredStickers_addButton)); + arrayList.add(new ThemeDescription(listView, ThemeDescription.FLAG_USEBACKGROUNDDRAWABLE | ThemeDescription.FLAG_DRAWABLESELECTEDSTATE, new Class[]{SettingsSuggestionCell.class}, new String[]{"noButton"}, null, null, null, Theme.key_featuredStickers_addButtonPressed)); + arrayList.add(new ThemeDescription(listView, 0, new Class[]{NotificationsCheckCell.class}, new String[]{"textView"}, null, null, null, Theme.key_windowBackgroundWhiteBlackText)); arrayList.add(new ThemeDescription(listView, 0, new Class[]{NotificationsCheckCell.class}, new String[]{"valueTextView"}, null, null, null, Theme.key_windowBackgroundWhiteGrayText2)); arrayList.add(new ThemeDescription(listView, 0, new Class[]{NotificationsCheckCell.class}, new String[]{"checkBox"}, null, null, null, Theme.key_switchTrack)); @@ -8170,6 +8302,10 @@ public class ProfileActivity extends BaseFragment implements NotificationCenter. put(++pointer, numberRow, sparseIntArray); put(++pointer, setUsernameRow, sparseIntArray); put(++pointer, bioRow, sparseIntArray); + put(++pointer, phoneSuggestionRow, sparseIntArray); + put(++pointer, phoneSuggestionSectionRow, sparseIntArray); + put(++pointer, passwordSuggestionRow, sparseIntArray); + put(++pointer, passwordSuggestionSectionRow, sparseIntArray); put(++pointer, settingsSectionRow, sparseIntArray); put(++pointer, settingsSectionRow2, sparseIntArray); put(++pointer, notificationRow, sparseIntArray); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/StroageUsageView.java b/TMessagesProj/src/main/java/org/telegram/ui/StroageUsageView.java index deb9377c2..601e1fd48 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/StroageUsageView.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/StroageUsageView.java @@ -3,7 +3,9 @@ package org.telegram.ui; import android.animation.ValueAnimator; import android.content.Context; import android.graphics.Canvas; +import android.graphics.Color; import android.graphics.Paint; +import android.text.SpannableString; import android.view.View; import android.view.ViewGroup; import android.widget.FrameLayout; @@ -17,11 +19,14 @@ import org.telegram.messenger.LocaleController; import org.telegram.messenger.R; import org.telegram.ui.ActionBar.Theme; import org.telegram.ui.Cells.TextSettingsCell; +import org.telegram.ui.Components.EllipsizeSpanAnimator; import org.telegram.ui.Components.LayoutHelper; +import org.telegram.ui.Components.voip.CellFlickerDrawable; class StroageUsageView extends FrameLayout { private Paint paintFill = new Paint(Paint.ANTI_ALIAS_FLAG); + private Paint paintCalculcating = new Paint(Paint.ANTI_ALIAS_FLAG); private Paint paintProgress = new Paint(Paint.ANTI_ALIAS_FLAG); private Paint paintProgress2 = new Paint(Paint.ANTI_ALIAS_FLAG); private Paint bgPaint = new Paint(); @@ -48,14 +53,24 @@ class StroageUsageView extends FrameLayout { ValueAnimator valueAnimator2; ViewGroup legendLayout; + EllipsizeSpanAnimator ellipsizeSpanAnimator; + + float calculatingProgress; + boolean calculatingProgressIncrement; + + CellFlickerDrawable cellFlickerDrawable = new CellFlickerDrawable(220, 255); + public StroageUsageView(Context context) { super(context); setWillNotDraw(false); + cellFlickerDrawable.drawFrame = false; paintFill.setStrokeWidth(AndroidUtilities.dp(6)); + paintCalculcating.setStrokeWidth(AndroidUtilities.dp(6)); paintProgress.setStrokeWidth(AndroidUtilities.dp(6)); paintProgress2.setStrokeWidth(AndroidUtilities.dp(6)); paintFill.setStrokeCap(Paint.Cap.ROUND); + paintCalculcating.setStrokeCap(Paint.Cap.ROUND); paintProgress.setStrokeCap(Paint.Cap.ROUND); paintProgress2.setStrokeCap(Paint.Cap.ROUND); @@ -115,7 +130,18 @@ class StroageUsageView extends FrameLayout { calculatingTextView = new TextView(context); calculatingTextView.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteGrayText)); - calculatingTextView.setText(LocaleController.getString("CalculatingSize",R.string.CalculatingSize)); + + String calculatingString = LocaleController.getString("CalculatingSize",R.string.CalculatingSize); + int indexOfDots = calculatingString.indexOf("..."); + if (indexOfDots >= 0) { + SpannableString spannableString = new SpannableString(calculatingString); + ellipsizeSpanAnimator = new EllipsizeSpanAnimator(calculatingTextView); + ellipsizeSpanAnimator.wrap(spannableString, indexOfDots); + calculatingTextView.setText(spannableString); + } else { + calculatingTextView.setText(calculatingString); + } + telegramCacheTextView = new TextView(context); telegramCacheTextView.setCompoundDrawablePadding(AndroidUtilities.dp(6)); @@ -182,7 +208,13 @@ class StroageUsageView extends FrameLayout { textSettingsCell.setVisibility(GONE); progress = 0f; progress2 = 0; + if (ellipsizeSpanAnimator != null) { + ellipsizeSpanAnimator.addView(calculatingTextView); + } } else { + if (ellipsizeSpanAnimator != null) { + ellipsizeSpanAnimator.removeView(calculatingTextView); + } calculatingTextView.setVisibility(View.GONE); if (totalSize > 0) { divider.setVisibility(VISIBLE); @@ -286,7 +318,35 @@ class StroageUsageView extends FrameLayout { bgPaint.setColor(Theme.getColor(Theme.key_windowBackgroundWhite)); canvas.drawLine(AndroidUtilities.dp(24), AndroidUtilities.dp(20), getMeasuredWidth() - AndroidUtilities.dp(24), AndroidUtilities.dp(20), paintFill); + if (calculating || calculatingProgress != 0) { + if (calculating) { + if (calculatingProgressIncrement) { + calculatingProgress += 16f / 650; + if (calculatingProgress > 1f) { + calculatingProgress = 1f; + calculatingProgressIncrement = false; + } + } else { + calculatingProgress -= 16f / 650; + if (calculatingProgress < 0) { + calculatingProgress = 0; + calculatingProgressIncrement = true; + } + } + } else { + calculatingProgress -= 16f / 150; + if (calculatingProgress < 0) { + calculatingProgress = 0; + } + } + invalidate(); +// paintCalculcating.setColor(ColorUtils.setAlphaComponent(Color.WHITE, (int) (150 * calculatingProgress))); +// canvas.drawLine(AndroidUtilities.dp(24), AndroidUtilities.dp(20), getMeasuredWidth() - AndroidUtilities.dp(24), AndroidUtilities.dp(20), paintCalculcating); + AndroidUtilities.rectTmp.set(AndroidUtilities.dp(24), AndroidUtilities.dp(17), getMeasuredWidth() - AndroidUtilities.dp(24), AndroidUtilities.dp(23)); + cellFlickerDrawable.setParentWidth(getMeasuredWidth()); + cellFlickerDrawable.draw(canvas, AndroidUtilities.rectTmp, AndroidUtilities.dp(3)); + } int currentP = AndroidUtilities.dp(24); if (!calculating) { int progressWidth = (int) ((getMeasuredWidth() - AndroidUtilities.dp(24) * 2) * progress2); @@ -306,4 +366,20 @@ class StroageUsageView extends FrameLayout { } } } + + @Override + protected void onAttachedToWindow() { + super.onAttachedToWindow(); + if (ellipsizeSpanAnimator != null) { + ellipsizeSpanAnimator.onAttachedToWindow(); + } + } + + @Override + protected void onDetachedFromWindow() { + super.onDetachedFromWindow(); + if (ellipsizeSpanAnimator != null) { + ellipsizeSpanAnimator.onDetachedFromWindow(); + } + } } diff --git a/TMessagesProj/src/main/java/org/telegram/ui/TextMessageEnterTransition.java b/TMessagesProj/src/main/java/org/telegram/ui/TextMessageEnterTransition.java new file mode 100644 index 000000000..d457d8ed7 --- /dev/null +++ b/TMessagesProj/src/main/java/org/telegram/ui/TextMessageEnterTransition.java @@ -0,0 +1,642 @@ +package org.telegram.ui; + +import android.animation.Animator; +import android.animation.AnimatorListenerAdapter; +import android.animation.ValueAnimator; +import android.annotation.SuppressLint; +import android.graphics.Bitmap; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.LinearGradient; +import android.graphics.Matrix; +import android.graphics.Paint; +import android.graphics.PorterDuff; +import android.graphics.PorterDuffXfermode; +import android.graphics.Shader; +import android.graphics.drawable.Drawable; +import android.os.Build; +import android.text.Layout; +import android.text.SpannableString; +import android.text.StaticLayout; +import android.text.TextPaint; +import android.text.TextUtils; +import android.view.View; +import android.view.animation.LinearInterpolator; + +import androidx.core.graphics.ColorUtils; +import androidx.recyclerview.widget.ChatListItemAnimator; + +import org.telegram.messenger.AndroidUtilities; +import org.telegram.messenger.Emoji; +import org.telegram.messenger.MessageObject; +import org.telegram.messenger.NotificationCenter; +import org.telegram.messenger.SharedConfig; +import org.telegram.messenger.UserConfig; +import org.telegram.tgnet.TLRPC; +import org.telegram.ui.ActionBar.SimpleTextView; +import org.telegram.ui.ActionBar.Theme; +import org.telegram.ui.Cells.ChatMessageCell; +import org.telegram.ui.Components.ChatActivityEnterView; +import org.telegram.ui.Components.CubicBezierInterpolator; +import org.telegram.ui.Components.EmptyStubSpan; +import org.telegram.ui.Components.RecyclerListView; + +public class TextMessageEnterTransition implements MessageEnterTransitionContainer.Transition { + + float fromRadius; + float progress; + + Paint bitmapPaint = new Paint(Paint.ANTI_ALIAS_FLAG); + Bitmap textLayoutBitmap; + Bitmap textLayoutBitmapRtl; + Bitmap crossfadeTextBitmap; + + boolean hasReply; + private ValueAnimator animator; + boolean initBitmaps = false; + + float replyFromStartX; + float replyFromStartY; + float replyFromObjectStartY; + int replayFromColor; + int replayObjectFromColor; + float crossfadeTextOffset; + + float drawableFromTop; + + MessageObject currentMessageObject; + + boolean drawBitmaps = false; + float toXOffset; + float toXOffsetRtl; + + boolean crossfade; + + StaticLayout layout; + StaticLayout rtlLayout; + + ChatMessageCell messageView; + RecyclerListView listView; + MessageEnterTransitionContainer container; + private Matrix gradientMatrix; + private Paint gradientPaint; + private int messageId; + private float drawableFromBottom; + private float scaleY; + private float fromStartX; + private float fromStartY; + private ChatActivity chatActivity; + private LinearGradient gradientShader; + private float scaleFrom; + + private final int currentAccount; + private int animationIndex = -1; + MessageObject.TextLayoutBlock textLayoutBlock; + Drawable fromMessageDrawable; + ChatActivityEnterView enterView; + + float textX; + float textY; + + float replyNameDx; + float replyMessageDx; + + @SuppressLint("WrongConstant") + public TextMessageEnterTransition(ChatMessageCell messageView, ChatActivity chatActivity, RecyclerListView listView, MessageEnterTransitionContainer container) { + currentAccount = UserConfig.selectedAccount; + if (messageView.getMessageObject().textLayoutBlocks.size() > 1 || messageView.getMessageObject().textLayoutBlocks.get(0).textLayout.getLineCount() > 10) { + return; + } + this.messageView = messageView; + this.listView = listView; + this.container = container; + this.chatActivity = chatActivity; + enterView = chatActivity.getChatActivityEnterView(); + + ChatActivityEnterView chatActivityEnterView = chatActivity.getChatActivityEnterView(); + + fromRadius = chatActivityEnterView.getRecordCicle().drawingCircleRadius; + bitmapPaint.setFilterBitmap(true); + currentMessageObject = messageView.getMessageObject(); + + if (!messageView.getTransitionParams().wasDraw) { + messageView.draw(new Canvas()); + } + + messageView.setEnterTransitionInProgress(true); + + CharSequence editText = chatActivityEnterView.getEditField().getLayout().getText(); + CharSequence text = messageView.getMessageObject().messageText; + + crossfade = false; + int linesOffset = 0; + int layoutH = chatActivityEnterView.getEditField().getLayout().getHeight(); + TextPaint textPaint = Theme.chat_msgTextPaint; + int emojiSize = AndroidUtilities.dp(20); + if (messageView.getMessageObject().getEmojiOnlyCount() != 0) { + if (messageView.getMessageObject().getEmojiOnlyCount() == 1) { + textPaint = Theme.chat_msgTextPaintOneEmoji; + emojiSize = AndroidUtilities.dp(32); + } else if (messageView.getMessageObject().getEmojiOnlyCount() == 2) { + textPaint = Theme.chat_msgTextPaintTwoEmoji; + emojiSize = AndroidUtilities.dp(28); + } else if (messageView.getMessageObject().getEmojiOnlyCount() == 3) { + textPaint = Theme.chat_msgTextPaintThreeEmoji; + emojiSize = AndroidUtilities.dp(24); + } + } + if (editText.length() != text.length()) { + crossfade = true; + String str = editText.toString(); + String trimmedStr = str.trim(); + int i = str.indexOf(trimmedStr); + if (i > 0) { + linesOffset = chatActivityEnterView.getEditField().getLayout().getLineTop(chatActivityEnterView.getEditField().getLayout().getLineForOffset(i)); + layoutH = chatActivityEnterView.getEditField().getLayout().getLineBottom(chatActivityEnterView.getEditField().getLayout().getLineForOffset(i + trimmedStr.length())) - linesOffset; + } + text = Emoji.replaceEmoji(trimmedStr, textPaint.getFontMetricsInt(), emojiSize, false); + } + + + scaleFrom = chatActivityEnterView.getEditField().getTextSize() / textPaint.getTextSize(); + + int n = chatActivityEnterView.getEditField().getLayout().getLineCount(); + int width = (int) (chatActivityEnterView.getEditField().getLayout().getWidth() / scaleFrom); + + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) { + layout = StaticLayout.Builder.obtain(text, 0, text.length(), textPaint, width) + .setBreakStrategy(StaticLayout.BREAK_STRATEGY_HIGH_QUALITY) + .setHyphenationFrequency(StaticLayout.HYPHENATION_FREQUENCY_NONE) + .setAlignment(Layout.Alignment.ALIGN_NORMAL) + .build(); + } else { + layout = new StaticLayout(text, textPaint, width, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, false); + } + float textViewY = chatActivityEnterView.getY() + chatActivityEnterView.getEditField().getY() + ((View) chatActivityEnterView.getEditField().getParent()).getY() + ((View) chatActivityEnterView.getEditField().getParent().getParent()).getY(); + fromStartX = chatActivityEnterView.getX() + chatActivityEnterView.getEditField().getX() + ((View) chatActivityEnterView.getEditField().getParent()).getX() + ((View) chatActivityEnterView.getEditField().getParent().getParent()).getX(); + fromStartY = textViewY + AndroidUtilities.dp(10) - chatActivityEnterView.getEditField().getScrollY() + linesOffset; + toXOffset = 0; + float minX = Float.MAX_VALUE; + for (int i = 0; i < layout.getLineCount(); i++) { + float begin = layout.getLineLeft(i); + if (begin < minX) { + minX = begin; + } + } + if (minX != Float.MAX_VALUE) { + toXOffset = minX; + } + + scaleY = (layoutH) / (layout.getHeight() * scaleFrom); + + drawableFromTop = textViewY + AndroidUtilities.dp(4); + if (enterView.isTopViewVisible()) { + drawableFromTop -= AndroidUtilities.dp(12); + } + drawableFromBottom = textViewY + chatActivityEnterView.getEditField().getMeasuredHeight(); + textLayoutBlock = messageView.getMessageObject().textLayoutBlocks.get(0); + StaticLayout messageTextLayout = textLayoutBlock.textLayout; + int normalLinesCount = 0; + int rtlLinesCount = 0; + + if (messageTextLayout.getLineCount() == layout.getLineCount()) { + n = messageTextLayout.getLineCount(); + for (int i = 0; i < n; i++) { + if (isRtlLine(layout, i)) { + rtlLinesCount++; + } else { + normalLinesCount++; + } + if (messageTextLayout.getLineEnd(i) != layout.getLineEnd(i)) { + crossfade = true; + break; + } + } + } else { + crossfade = true; + } + + minX = Float.MAX_VALUE; + if (!crossfade && rtlLinesCount > 0 && normalLinesCount > 0) { + SpannableString normalText = new SpannableString(text); + SpannableString rtlText = new SpannableString(text); + for (int i = 0; i < n; i++) { + if (isRtlLine(layout, i)) { + normalText.setSpan(new EmptyStubSpan(), layout.getLineStart(i), layout.getLineEnd(i), 0); + float begin = layout.getLineLeft(i); + if (begin < minX) { + minX = begin; + } + } else { + rtlText.setSpan(new EmptyStubSpan(), layout.getLineStart(i), layout.getLineEnd(i), 0); + } + } + + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) { + layout = StaticLayout.Builder.obtain(normalText, 0, normalText.length(), textPaint, width) + .setBreakStrategy(StaticLayout.BREAK_STRATEGY_HIGH_QUALITY) + .setHyphenationFrequency(StaticLayout.HYPHENATION_FREQUENCY_NONE) + .setAlignment(Layout.Alignment.ALIGN_NORMAL) + .build(); + + rtlLayout = StaticLayout.Builder.obtain(rtlText, 0, rtlText.length(), textPaint, width) + .setBreakStrategy(StaticLayout.BREAK_STRATEGY_HIGH_QUALITY) + .setHyphenationFrequency(StaticLayout.HYPHENATION_FREQUENCY_NONE) + .setAlignment(Layout.Alignment.ALIGN_NORMAL) + .build(); + } else { + layout = new StaticLayout(normalText, textPaint, width, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0, false); + rtlLayout = new StaticLayout(rtlText, textPaint, width, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0, false); + } + + + } + + toXOffsetRtl = layout.getWidth() - messageView.getMessageObject().textLayoutBlocks.get(0).textLayout.getWidth(); + + try { + if (drawBitmaps) { + textLayoutBitmap = Bitmap.createBitmap(layout.getWidth(), layout.getHeight(), Bitmap.Config.ARGB_8888); + Canvas bitmapCanvas = new Canvas(textLayoutBitmap); + layout.draw(bitmapCanvas); + + if (rtlLayout != null) { + textLayoutBitmapRtl = Bitmap.createBitmap(rtlLayout.getWidth(), rtlLayout.getHeight(), Bitmap.Config.ARGB_8888); + bitmapCanvas = new Canvas(textLayoutBitmapRtl); + rtlLayout.draw(bitmapCanvas); + } + + if (crossfade) { + if (messageView.getMeasuredHeight() < listView.getMeasuredHeight()) { + crossfadeTextOffset = 0; + crossfadeTextBitmap = Bitmap.createBitmap(messageView.getMeasuredWidth(), messageView.getMeasuredHeight(), Bitmap.Config.ARGB_8888); + } else { + crossfadeTextOffset = messageView.getTop(); + crossfadeTextBitmap = Bitmap.createBitmap(messageView.getMeasuredWidth(), listView.getMeasuredHeight(), Bitmap.Config.ARGB_8888); + } + } + } + } catch (Exception e) { + drawBitmaps = false; + } + + hasReply = messageView.getMessageObject().getReplyMsgId() != 0 && messageView.replyNameLayout != null; + + if (hasReply) { + SimpleTextView replyNameTextView = chatActivity.getReplyNameTextView(); + replyFromStartX = replyNameTextView.getX() + ((View) replyNameTextView.getParent()).getX(); + replyFromStartY = replyNameTextView.getY() + ((View) replyNameTextView.getParent().getParent()).getY() + ((View) replyNameTextView.getParent().getParent().getParent()).getY(); + replyNameTextView = chatActivity.getReplyObjectTextView(); + replyFromObjectStartY = replyNameTextView.getY() + ((View) replyNameTextView.getParent().getParent()).getY() + ((View) replyNameTextView.getParent().getParent().getParent()).getY(); + + replayFromColor = chatActivity.getReplyNameTextView().getTextColor(); + replayObjectFromColor = chatActivity.getReplyObjectTextView().getTextColor(); + drawableFromTop -= AndroidUtilities.dp(46); + } + + + gradientMatrix = new Matrix(); + gradientPaint = new Paint(Paint.ANTI_ALIAS_FLAG); + gradientPaint.setXfermode(new PorterDuffXfermode(PorterDuff.Mode.DST_IN)); + + gradientShader = new LinearGradient(0, AndroidUtilities.dp(12), 0, 0, 0, 0xFF000000, Shader.TileMode.CLAMP); + gradientPaint.setShader(gradientShader); + + messageId = messageView.getMessageObject().stableId; + + chatActivityEnterView.getEditField().setAlpha(0f); + chatActivityEnterView.setTextTransitionIsRunning(true); + + if (messageView.replyNameLayout != null && messageView.replyNameLayout.getText().length() > 1) { + if (messageView.replyNameLayout.getPrimaryHorizontal(0) != 0) { + replyNameDx = messageView.replyNameLayout.getWidth() - messageView.replyNameLayout.getLineWidth(0); + } + } + if (messageView.replyTextLayout != null && messageView.replyTextLayout.getText().length() > 1) { + if (messageView.replyTextLayout.getPrimaryHorizontal(0) != 0) { + replyMessageDx = messageView.replyTextLayout.getWidth() - messageView.replyTextLayout.getLineWidth(0); + } + } + + animator = ValueAnimator.ofFloat(0f, 1f); + animator.addUpdateListener(valueAnimator -> { + progress = (float) valueAnimator.getAnimatedValue(); + chatActivityEnterView.getEditField().setAlpha(progress); + container.invalidate(); + }); + + + animator.setInterpolator(new LinearInterpolator()); + animator.setDuration(ChatListItemAnimator.DEFAULT_DURATION); + + container.addTransition(this); + animationIndex = NotificationCenter.getInstance(currentAccount).setAnimationInProgress(animationIndex, null); + + animator.addListener(new AnimatorListenerAdapter() { + @Override + public void onAnimationEnd(Animator animation) { + + NotificationCenter.getInstance(currentAccount).onAnimationFinish(animationIndex); + container.removeTransition(TextMessageEnterTransition.this); + messageView.setEnterTransitionInProgress(false); + chatActivityEnterView.setTextTransitionIsRunning(false); + chatActivityEnterView.getEditField().setAlpha(1f); + chatActivity.getReplyNameTextView().setAlpha(1f); + chatActivity.getReplyObjectTextView().setAlpha(1f); + } + }); + + if (SharedConfig.getDevicePerformanceClass() == SharedConfig.PERFORMANCE_CLASS_HIGH) { + Theme.MessageDrawable drawable = messageView.getCurrentBackgroundDrawable(false); + fromMessageDrawable = drawable.getTransitionDrawable(Theme.getColor(Theme.key_chat_messagePanelBackground)); + } + } + + public void start() { + if (animator != null) { + animator.start(); + } + } + + private boolean isRtlLine(Layout layout, int line) { + return layout.getLineRight(line) == layout.getWidth() && layout.getLineLeft(line) != 0; + } + + float lastMessageX; + float lastMessageY; + + public void onDraw(Canvas canvas) { + if (drawBitmaps && !initBitmaps && crossfadeTextBitmap != null && messageView.getTransitionParams().wasDraw) { + initBitmaps = true; + Canvas bitmapCanvas = new Canvas(crossfadeTextBitmap); + bitmapCanvas.translate(0, crossfadeTextOffset); + messageView.drawMessageText(bitmapCanvas, messageView.getMessageObject().textLayoutBlocks, true, 1f, true); + } + float listViewBottom = listView.getY() - container.getY() + listView.getMeasuredHeight(); + + float fromX = fromStartX - container.getX(); + float fromY = fromStartY - container.getY(); + + textX = messageView.getTextX(); + textY = messageView.getTextY(); + + float messageViewX; + float messageViewY; + + if (messageView.getMessageObject().stableId != messageId) { + return; + } else { + messageViewX = messageView.getX() + listView.getX() - container.getX(); + messageViewY = messageView.getTop() + listView.getTop() - container.getY(); + messageViewY += enterView.getTopViewHeight(); + + lastMessageX = messageViewX; + lastMessageY = messageViewY; + } + + float progress = ChatListItemAnimator.DEFAULT_INTERPOLATOR.getInterpolation(this.progress); + float alphaProgress = this.progress > 0.6f ? 1f : this.progress / 0.6f; + + float p2 = CubicBezierInterpolator.EASE_OUT_QUINT.getInterpolation(this.progress); + float progressX = CubicBezierInterpolator.EASE_OUT.getInterpolation(p2); + + float toX = messageViewX + textX; + float toY = messageViewY + textY; + + int clipBottom = (int) (container.getMeasuredHeight() * (1f - progressX) + listViewBottom * progressX); + boolean messageViewOverscrolled = messageView.getBottom() - AndroidUtilities.dp(4) > listView.getMeasuredHeight(); + boolean clipBottomWithAlpha = messageViewOverscrolled && (messageViewY + messageView.getMeasuredHeight() - AndroidUtilities.dp(8) > clipBottom) && container.getMeasuredHeight() > 0; + + + if (clipBottomWithAlpha) { + canvas.saveLayerAlpha(0, Math.max(0, messageViewY), container.getMeasuredWidth(), container.getMeasuredHeight(), 255, Canvas.ALL_SAVE_FLAG); + } + canvas.save(); + canvas.clipRect(0, listView.getTop() + chatActivity.getChatListViewPadding() - container.getY() - AndroidUtilities.dp(3), container.getMeasuredWidth(), container.getMeasuredHeight()); + canvas.save(); + float drawableX = messageViewX + messageView.getBackgroundDrawableLeft() + (fromX - (toX - toXOffset)) * (1f - progressX); + float drawableToTop = messageViewY + messageView.getBackgroundDrawableTop(); + float drawableTop = (drawableFromTop - container.getY()) * (1f - progress) + (drawableToTop) * progress; + float drawableH = messageView.getBackgroundDrawableBottom() - messageView.getBackgroundDrawableTop(); + float drawableBottom = (drawableFromBottom - container.getY()) * (1f - progress) + (drawableToTop + drawableH) * progress; + int drawableRight = (int) (messageViewX + messageView.getBackgroundDrawableRight() + AndroidUtilities.dp(4) * (1f - progressX)); + Theme.MessageDrawable drawable = messageView.getCurrentBackgroundDrawable(true); + + if (drawable != null) { + canvas.save(); + canvas.translate(drawableX, drawableTop); + int heightLocal = (int) (drawableBottom - drawableTop); + int widthLocal = (int) (drawableRight - drawableX); + + messageView.setBackgroundTopY(false); + Drawable shadowDrawable = drawable.getShadowDrawable(); + + if (alphaProgress != 1f && fromMessageDrawable != null) { + fromMessageDrawable.setBounds(0, 0, widthLocal, heightLocal); + fromMessageDrawable.draw(canvas); + } + + if (shadowDrawable != null) { + shadowDrawable.setAlpha((int) (255 * progressX)); + shadowDrawable.setBounds(0, 0, widthLocal, heightLocal); + shadowDrawable.draw(canvas); + shadowDrawable.setAlpha(255); + } + + drawable.setAlpha((int) (255 * alphaProgress)); + drawable.setBounds(0, 0, widthLocal, heightLocal); + drawable.draw(canvas); + drawable.setAlpha(255); + canvas.restore(); + } + canvas.restore(); + + + canvas.save(); + if (currentMessageObject.isOutOwner()) { + canvas.clipRect( + drawableX + AndroidUtilities.dp(4), drawableTop + AndroidUtilities.dp(4), + drawableRight - AndroidUtilities.dp(10), drawableBottom - AndroidUtilities.dp(4) + ); + } else { + canvas.clipRect( + drawableX + AndroidUtilities.dp(4), drawableTop + AndroidUtilities.dp(4), + drawableRight - AndroidUtilities.dp(4), drawableBottom - AndroidUtilities.dp(4) + ); + } + canvas.translate(messageView.getLeft() + listView.getX() - container.getX(), messageViewY + (fromY - toY) * (1f - progress)); + messageView.drawTime(canvas, alphaProgress, false); + messageView.drawNamesLayout(canvas, alphaProgress); + messageView.drawCommentButton(canvas, alphaProgress); + messageView.drawCaptionLayout(canvas, false, alphaProgress); + messageView.drawLinkPreview(canvas, alphaProgress); + canvas.restore(); + + + if (hasReply) { + chatActivity.getReplyNameTextView().setAlpha(0f); + chatActivity.getReplyObjectTextView().setAlpha(0f); + + float fromReplayX = replyFromStartX - container.getX(); + float fromReplayY = replyFromStartY - container.getY(); + float toReplayX = messageViewX + messageView.replyStartX; + float toReplayY = messageViewY + messageView.replyStartY; + + int replyMessageColor; + int replyOwnerMessageColor; + int replyLineColor; + if (currentMessageObject.hasValidReplyMessageObject() && (currentMessageObject.replyMessageObject.type == 0 || !TextUtils.isEmpty(currentMessageObject.replyMessageObject.caption)) && !(currentMessageObject.replyMessageObject.messageOwner.media instanceof TLRPC.TL_messageMediaGame || currentMessageObject.replyMessageObject.messageOwner.media instanceof TLRPC.TL_messageMediaInvoice)) { + replyMessageColor = Theme.getColor(Theme.key_chat_outReplyMessageText); + } else { + replyMessageColor = Theme.getColor(Theme.key_chat_outReplyMediaMessageText); + } + + if (currentMessageObject.isOutOwner()) { + replyOwnerMessageColor = Theme.getColor(Theme.key_chat_outReplyNameText); + replyLineColor = Theme.getColor(Theme.key_chat_outReplyLine); + } else { + replyOwnerMessageColor = Theme.getColor(Theme.key_chat_inReplyNameText); + replyLineColor = Theme.getColor(Theme.key_chat_inReplyLine); + } + + Theme.chat_replyTextPaint.setColor(ColorUtils.blendARGB(replayObjectFromColor, replyMessageColor, progress)); + Theme.chat_replyNamePaint.setColor(ColorUtils.blendARGB(replayFromColor, replyOwnerMessageColor, progress)); + + if (messageView.needReplyImage) { + fromReplayX -= AndroidUtilities.dp(44); + } + float replyX = fromReplayX * (1f - progressX) + toReplayX * progressX; + float replyY = (fromReplayY + AndroidUtilities.dp(12) * progress) * (1f - progress) + toReplayY * progress; + + Theme.chat_replyLinePaint.setColor(ColorUtils.setAlphaComponent(replyLineColor, (int) (Color.alpha(replyLineColor) * progressX))); + canvas.drawRect(replyX, replyY, replyX + AndroidUtilities.dp(2), replyY + AndroidUtilities.dp(35), Theme.chat_replyLinePaint); + + canvas.save(); + canvas.translate(AndroidUtilities.dp(10) * progressX, 0); + + if (messageView.needReplyImage) { + canvas.save(); + messageView.replyImageReceiver.setImageCoords(replyX, replyY, AndroidUtilities.dp(35), AndroidUtilities.dp(35)); + messageView.replyImageReceiver.draw(canvas); + canvas.translate(replyX, replyY); + canvas.restore(); + canvas.translate(AndroidUtilities.dp(44), 0); + } + + float replyToMessageX = toReplayX - replyMessageDx; + float replyToNameX = toReplayX - replyNameDx; + + float replyMessageX = fromReplayX * (1f - progressX) + replyToMessageX * progressX; + float replyNameX = fromReplayX * (1f - progressX) + replyToNameX * progressX; + + canvas.save(); + canvas.translate(replyNameX, replyY); + messageView.replyNameLayout.draw(canvas); + canvas.restore(); + + canvas.save(); + canvas.translate(replyMessageX, replyY + AndroidUtilities.dp(19)); + messageView.replyTextLayout.draw(canvas); + canvas.restore(); + + canvas.restore(); + } + + canvas.save(); + { + canvas.clipRect(drawableX + AndroidUtilities.dp(4), drawableTop + AndroidUtilities.dp(4), drawableRight - AndroidUtilities.dp(4), drawableBottom - AndroidUtilities.dp(4)); + + float scale = progressX + scaleFrom * (1f - progressX); + float scale2; + if (drawBitmaps) { + scale2 = progressX + scaleY * (1f - progressX); + } else { + scale2 = 1f; + } + + canvas.save(); + canvas.translate(fromX * (1f - progressX) + (toX - toXOffset) * progressX, fromY * (1f - progress) + (toY + textLayoutBlock.textYOffset) * progress); + canvas.scale(scale, scale * scale2, 0, 0); + // canvas.translate(0, textLayoutBlock.textYOffset / 2); + if (drawBitmaps) { + if (crossfade) { + bitmapPaint.setAlpha((int) (255 * (1f - alphaProgress))); + } + canvas.drawBitmap(textLayoutBitmap, 0, 0, bitmapPaint); + } else { + if (crossfade) { + int oldAlpha = Theme.chat_msgTextPaint.getAlpha(); + Theme.chat_msgTextPaint.setAlpha((int) (oldAlpha * (1f - alphaProgress))); + layout.draw(canvas); + Theme.chat_msgTextPaint.setAlpha(oldAlpha); + } else { + layout.draw(canvas); + } + + } + canvas.restore(); + + if (rtlLayout != null) { + canvas.save(); + canvas.translate(fromX * (1f - progressX) + (toX - toXOffsetRtl) * progressX, fromY * (1f - progress) + (toY + textLayoutBlock.textYOffset) * progress); + canvas.scale(scale, scale * scale2, 0, 0); + if (drawBitmaps) { + if (crossfade) { + bitmapPaint.setAlpha((int) (255 * (1f - alphaProgress))); + } + canvas.drawBitmap(textLayoutBitmapRtl, 0, 0, bitmapPaint); + } else { + if (crossfade) { + int oldAlpha = Theme.chat_msgTextPaint.getAlpha(); + Theme.chat_msgTextPaint.setAlpha((int) (oldAlpha * (1f - alphaProgress))); + rtlLayout.draw(canvas); + Theme.chat_msgTextPaint.setAlpha(oldAlpha); + } else { + rtlLayout.draw(canvas); + } + + } + canvas.restore(); + } + + if (crossfade) { + canvas.save(); + canvas.translate(messageView.getLeft() + listView.getX() - container.getX() + (fromX - toX) * (1f - progressX), messageViewY + (fromY - toY) * (1f - progress)); + canvas.scale(scale, scale * scale2, messageView.getTextX(), messageView.getTextY()); + canvas.translate(0, -crossfadeTextOffset); + + if (drawBitmaps) { + bitmapPaint.setAlpha((int) (255 * alphaProgress)); + canvas.drawBitmap(crossfadeTextBitmap, 0, 0, bitmapPaint); + } else { + messageView.drawMessageText(canvas, messageView.getMessageObject().textLayoutBlocks, true, alphaProgress, true); + } + canvas.restore(); + } + + } + canvas.restore(); + + if (clipBottomWithAlpha) { + gradientMatrix.setTranslate(0, clipBottom); + gradientShader.setLocalMatrix(gradientMatrix); + canvas.drawRect(0, clipBottom, container.getMeasuredWidth(), container.getMeasuredHeight(), gradientPaint); + + canvas.restore(); + } + + float sendProgress = this.progress > 0.4f ? 1f : this.progress / 0.4f; + if (sendProgress == 1f) { + enterView.setTextTransitionIsRunning(false); + } + if (enterView.getSendButton().getVisibility() == View.VISIBLE && sendProgress < 1f) { + canvas.save(); + canvas.translate(enterView.getX() + enterView.getSendButton().getX() + ((View) enterView.getSendButton().getParent()).getX() + ((View) enterView.getSendButton().getParent().getParent()).getX() - container.getX() + AndroidUtilities.dp(52) * sendProgress, enterView.getY() + enterView.getSendButton().getY() + ((View) enterView.getSendButton().getParent()).getY() + ((View) enterView.getSendButton().getParent().getParent()).getY()- container.getY()); + // canvas.saveLayerAlpha(0, 0, enterView.getSendButton().getWidth(), enterView.getSendButton().getHeight(), (int) (enterView.getSendButton().getAlpha() * 255), Canvas.ALL_SAVE_FLAG); + //canvas.scale(enterView.getSendButton().getScaleX(), enterView.getSendButton().getScaleY(), enterView.getSendButton().getWidth() / 2f, enterView.getSendButton().getHeight() / 2f); + enterView.getSendButton().draw(canvas); + canvas.restore(); + canvas.restore(); + } + } +} diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ThemeActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ThemeActivity.java index 62cec18de..44323c643 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ThemeActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ThemeActivity.java @@ -720,7 +720,7 @@ public class ThemeActivity extends BaseFragment implements NotificationCenter.No Theme.ThemeInfo themeInfo = (Theme.ThemeInfo) args[0]; Theme.ThemeAccent accent = (Theme.ThemeAccent) args[1]; if (themeInfo == sharingTheme && accent == sharingAccent) { - String link = "https://" + MessagesController.getInstance(currentAccount).linkPrefix + "/addtheme/" + (accent != null ? accent.info.slug : themeInfo.info.slug); + String link = "https://" + getMessagesController().linkPrefix + "/addtheme/" + (accent != null ? accent.info.slug : themeInfo.info.slug); showDialog(new ShareAlert(getParentActivity(), null, link, false, link, false)); if (sharingProgressDialog != null) { sharingProgressDialog.dismiss(); @@ -789,10 +789,10 @@ public class ThemeActivity extends BaseFragment implements NotificationCenter.No Theme.ThemeInfo currentTheme = Theme.getCurrentTheme(); Theme.ThemeAccent accent = currentTheme.getAccent(false); if (accent.info == null) { - MessagesController.getInstance(currentAccount).saveThemeToServer(accent.parentTheme, accent); + getMessagesController().saveThemeToServer(accent.parentTheme, accent); NotificationCenter.getGlobalInstance().postNotificationName(NotificationCenter.needShareTheme, accent.parentTheme, accent); } else { - String link = "https://" + MessagesController.getInstance(currentAccount).linkPrefix + "/addtheme/" + accent.info.slug; + String link = "https://" + getMessagesController().linkPrefix + "/addtheme/" + accent.info.slug; showDialog(new ShareAlert(getParentActivity(), null, link, false, link, false)); } } else if (id == edit_theme) { @@ -1545,10 +1545,10 @@ public class ThemeActivity extends BaseFragment implements NotificationCenter.No } if (which == 0) { if (themeInfo.info == null) { - MessagesController.getInstance(themeInfo.account).saveThemeToServer(themeInfo, null); + getMessagesController().saveThemeToServer(themeInfo, null); NotificationCenter.getGlobalInstance().postNotificationName(NotificationCenter.needShareTheme, themeInfo, null); } else { - String link = "https://" + MessagesController.getInstance(currentAccount).linkPrefix + "/addtheme/" + themeInfo.info.slug; + String link = "https://" + getMessagesController().linkPrefix + "/addtheme/" + themeInfo.info.slug; showDialog(new ShareAlert(getParentActivity(), null, link, false, link, false)); } } else if (which == 1) { @@ -1811,10 +1811,10 @@ public class ThemeActivity extends BaseFragment implements NotificationCenter.No AlertsCreator.createThemeCreateDialog(ThemeActivity.this, which == 1 ? 2 : 1, accent.parentTheme, accent); } else if (which == 1) { if (accent.info == null) { - MessagesController.getInstance(currentAccount).saveThemeToServer(accent.parentTheme, accent); + getMessagesController().saveThemeToServer(accent.parentTheme, accent); NotificationCenter.getGlobalInstance().postNotificationName(NotificationCenter.needShareTheme, accent.parentTheme, accent); } else { - String link = "https://" + MessagesController.getInstance(currentAccount).linkPrefix + "/addtheme/" + accent.info.slug; + String link = "https://" + getMessagesController().linkPrefix + "/addtheme/" + accent.info.slug; showDialog(new ShareAlert(getParentActivity(), null, link, false, link, false)); } } else if (which == 2) { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/ThemePreviewActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/ThemePreviewActivity.java index 4694cbf01..e9478acfb 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/ThemePreviewActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/ThemePreviewActivity.java @@ -3187,7 +3187,7 @@ public class ThemePreviewActivity extends BaseFragment implements DownloadContro motionBackgroundDrawable = new MotionBackgroundDrawable(); motionBackgroundDrawable.setParentView(backgroundImage); if (rotatePreview) { - motionBackgroundDrawable.rotatePreview(); + motionBackgroundDrawable.rotatePreview(false); } } motionBackgroundDrawable.setColors(backgroundColor, backgroundGradientColor1, backgroundGradientColor2, backgroundGradientColor3); @@ -3348,7 +3348,7 @@ public class ThemePreviewActivity extends BaseFragment implements DownloadContro motionBackgroundDrawable = new MotionBackgroundDrawable(); motionBackgroundDrawable.setParentView(backgroundImage); if (rotatePreview) { - motionBackgroundDrawable.rotatePreview(); + motionBackgroundDrawable.rotatePreview(false); } } motionBackgroundDrawable.setColors(backgroundColor, color1, color2, color3); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationActivity.java index ba0e00e54..5363c97b5 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationActivity.java @@ -47,6 +47,7 @@ import org.telegram.ui.ActionBar.ActionBarMenu; import org.telegram.ui.ActionBar.ActionBarMenuItem; import org.telegram.ui.ActionBar.AlertDialog; import org.telegram.ui.ActionBar.BaseFragment; +import org.telegram.ui.ActionBar.SimpleTextView; import org.telegram.ui.ActionBar.Theme; import org.telegram.ui.ActionBar.ThemeDescription; import org.telegram.ui.Cells.EditTextSettingsCell; @@ -59,6 +60,7 @@ import org.telegram.ui.Components.LayoutHelper; import org.telegram.ui.Components.RecyclerListView; import java.util.ArrayList; +import java.util.Locale; import tw.nekomimi.nekogram.EditTextAutoFill; import tw.nekomimi.nekogram.utils.VibrateUtil; @@ -69,7 +71,8 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific private RecyclerListView listView; private TextView titleTextView; private TextView bottomTextView; - private TextView bottomButton; + private SimpleTextView bottomButton; + private TextView cancelResetButton; private EditTextBoldCursor passwordEditText; private AlertDialog progressDialog; private EmptyTextProgressView emptyView; @@ -89,6 +92,8 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific private long currentSecretId; private byte[] currentSecret; + private boolean resetPasswordOnShow; + private int setPasswordRow; private int setPasswordDetailRow; private int changePasswordRow; @@ -98,6 +103,8 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific private int passwordEnabledDetailRow; private int rowCount; + private boolean forgotPasswordOnShow; + private TwoStepVerificationActivityDelegate delegate; public interface TwoStepVerificationActivityDelegate { @@ -118,6 +125,7 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific public void setPassword(TLRPC.TL_account_password password) { currentPassword = password; + passwordEntered = false; } public TwoStepVerificationActivity(int account, TLRPC.TL_account_password password) { @@ -139,7 +147,7 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific public boolean onFragmentCreate() { super.onFragmentCreate(); if (currentPassword == null || currentPassword.current_algo == null || currentPasswordHash == null || currentPasswordHash.length <= 0) { - loadPasswordInfo(false); + loadPasswordInfo(true, currentPassword != null); } updateRows(); NotificationCenter.getInstance(currentAccount).addObserver(this, NotificationCenter.twoStepPasswordChanged); @@ -149,6 +157,7 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific @Override public void onFragmentDestroy() { super.onFragmentDestroy(); + AndroidUtilities.cancelRunOnUIThread(updateTimeRunnable); NotificationCenter.getInstance(currentAccount).removeObserver(this, NotificationCenter.twoStepPasswordChanged); destroyed = true; if (progressDialog != null) { @@ -246,16 +255,16 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific linearLayout.addView(bottomTextView, LayoutHelper.createLinear(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, (LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.TOP, 40, 30, 40, 0)); LinearLayout linearLayout2 = new LinearLayout(context); + linearLayout2.setOrientation(LinearLayout.VERTICAL); linearLayout2.setGravity(Gravity.BOTTOM | Gravity.CENTER_VERTICAL); + linearLayout2.setClipChildren(false); linearLayout.addView(linearLayout2, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, LayoutHelper.MATCH_PARENT)); - bottomButton = new TextView(context); - bottomButton.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlueText4)); - bottomButton.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); + bottomButton = new SimpleTextView(context); + bottomButton.setTextSize(14); bottomButton.setGravity((LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.BOTTOM); - bottomButton.setText(LocaleController.getString("YourEmailSkip", R.string.YourEmailSkip)); bottomButton.setPadding(0, AndroidUtilities.dp(10), 0, 0); - linearLayout2.addView(bottomButton, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT, (LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.BOTTOM, 40, 0, 40, 14)); + linearLayout2.addView(bottomButton, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, 40, (LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.BOTTOM, 40, 0, 40, 14)); bottomButton.setOnClickListener(v -> { if (currentPassword.has_recovery) { needShowProgress(); @@ -295,6 +304,14 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific showDialog(builder.create()); } }); + cancelResetButton = new TextView(context); + cancelResetButton.setTextSize(TypedValue.COMPLEX_UNIT_DIP, 14); + cancelResetButton.setGravity((LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.BOTTOM); + cancelResetButton.setPadding(0, AndroidUtilities.dp(10), 0, 0); + cancelResetButton.setText(LocaleController.getString("CancelReset", R.string.CancelReset)); + cancelResetButton.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlueText4)); + linearLayout2.addView(cancelResetButton, LayoutHelper.createLinear(LayoutHelper.MATCH_PARENT, LayoutHelper.WRAP_CONTENT, (LocaleController.isRTL ? Gravity.RIGHT : Gravity.LEFT) | Gravity.BOTTOM, 40, 0, 40, 26)); + cancelResetButton.setOnClickListener(v -> cancelPasswordReset()); emptyView = new EmptyTextProgressView(context); emptyView.showProgress(); @@ -360,13 +377,183 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific return fragmentView; } + private Runnable updateTimeRunnable = this::updateBottomButton; + + private void cancelPasswordReset() { + if (getParentActivity() == null) { + return; + } + AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); + builder.setPositiveButton(LocaleController.getString("CancelPasswordResetYes", R.string.CancelPasswordResetYes), (dialog, which) -> { + TLRPC.TL_account_declinePasswordReset req = new TLRPC.TL_account_declinePasswordReset(); + getConnectionsManager().sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { + if (response instanceof TLRPC.TL_boolTrue) { + currentPassword.pending_reset_date = 0; + updateBottomButton(); + } + })); + }); + builder.setNegativeButton(LocaleController.getString("CancelPasswordResetNo", R.string.CancelPasswordResetNo), null); + builder.setTitle(LocaleController.getString("CancelReset", R.string.CancelReset)); + builder.setMessage(LocaleController.getString("CancelPasswordReset", R.string.CancelPasswordReset)); + showDialog(builder.create()); + } + + public void setForgotPasswordOnShow() { + forgotPasswordOnShow = true; + } + + private void resetPassword() { + needShowProgress(true); + TLRPC.TL_account_resetPassword req = new TLRPC.TL_account_resetPassword(); + getConnectionsManager().sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { + needHideProgress(); + if (response instanceof TLRPC.TL_account_resetPasswordOk) { + AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); + builder.setNegativeButton(LocaleController.getString("OK", R.string.OK), null); + builder.setTitle(LocaleController.getString("ResetPassword", R.string.ResetPassword)); + builder.setMessage(LocaleController.getString("RestorePasswordResetPasswordOk", R.string.RestorePasswordResetPasswordOk)); + showDialog(builder.create(), dialog -> { + getNotificationCenter().postNotificationName(NotificationCenter.didSetOrRemoveTwoStepPassword); + finishFragment(); + }); + } else if (response instanceof TLRPC.TL_account_resetPasswordRequestedWait) { + TLRPC.TL_account_resetPasswordRequestedWait res = (TLRPC.TL_account_resetPasswordRequestedWait) response; + currentPassword.pending_reset_date = res.until_date; + updateBottomButton(); + } else if (response instanceof TLRPC.TL_account_resetPasswordFailedWait) { + TLRPC.TL_account_resetPasswordFailedWait res = (TLRPC.TL_account_resetPasswordFailedWait) response; + int time = res.retry_date - getConnectionsManager().getCurrentTime(); + String timeString; + if (time > 24 * 60 * 60) { + timeString = LocaleController.formatPluralString("Days", time / (24 * 60 * 60)); + } else if (time > 60 * 60) { + timeString = LocaleController.formatPluralString("Hours", time / (24 * 60 * 60)); + } else if (time > 60) { + timeString = LocaleController.formatPluralString("Minutes", time / 60); + } else { + timeString = LocaleController.formatPluralString("Seconds", Math.max(1, time)); + } + showAlertWithText(LocaleController.getString("ResetPassword", R.string.ResetPassword), LocaleController.formatString("ResetPasswordWait", R.string.ResetPasswordWait, timeString)); + } + })); + } + + private void updateBottomButton() { + if (currentPassword == null || bottomButton == null || bottomButton.getVisibility() != View.VISIBLE) { + AndroidUtilities.cancelRunOnUIThread(updateTimeRunnable); + if (cancelResetButton != null) { + cancelResetButton.setVisibility(View.GONE); + } + return; + } + LinearLayout.LayoutParams layoutParams = (LinearLayout.LayoutParams) bottomButton.getLayoutParams(); + if (currentPassword.pending_reset_date == 0 || getConnectionsManager().getCurrentTime() > currentPassword.pending_reset_date) { + if (currentPassword.pending_reset_date == 0) { + bottomButton.setText(LocaleController.getString("ForgotPassword", R.string.ForgotPassword)); + cancelResetButton.setVisibility(View.GONE); + layoutParams.bottomMargin = AndroidUtilities.dp(14); + layoutParams.height = AndroidUtilities.dp(40); + } else { + bottomButton.setText(LocaleController.getString("ResetPassword", R.string.ResetPassword)); + cancelResetButton.setVisibility(View.VISIBLE); + layoutParams.bottomMargin = 0; + layoutParams.height = AndroidUtilities.dp(22); + } + bottomButton.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlueText4)); + AndroidUtilities.cancelRunOnUIThread(updateTimeRunnable); + } else { + int t = Math.max(1, currentPassword.pending_reset_date - getConnectionsManager().getCurrentTime()); + String time; + if (t > 24 * 60 * 60) { + time = LocaleController.formatPluralString("Days", t / (24 * 60 * 60)); + } else if (t >= 60 * 60) { + time = LocaleController.formatPluralString("Hours", t / (60 * 60)); + } else { + time = String.format(Locale.US, "%02d:%02d", t / 60, t % 60); + } + bottomButton.setText(LocaleController.formatString("RestorePasswordResetIn", R.string.RestorePasswordResetIn, time)); + bottomButton.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteGrayText6)); + cancelResetButton.setVisibility(View.VISIBLE); + layoutParams.bottomMargin = 0; + layoutParams.height = AndroidUtilities.dp(22); + AndroidUtilities.cancelRunOnUIThread(updateTimeRunnable); + AndroidUtilities.runOnUIThread(updateTimeRunnable, 1000); + } + bottomButton.setLayoutParams(layoutParams); + } + + private void onPasswordForgot() { + if (currentPassword.pending_reset_date == 0 && currentPassword.has_recovery) { + needShowProgress(true); + TLRPC.TL_auth_requestPasswordRecovery req = new TLRPC.TL_auth_requestPasswordRecovery(); + ConnectionsManager.getInstance(currentAccount).sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { + needHideProgress(); + if (error == null) { + final TLRPC.TL_auth_passwordRecovery res = (TLRPC.TL_auth_passwordRecovery) response; + currentPassword.email_unconfirmed_pattern = res.email_pattern; + TwoStepVerificationSetupActivity fragment = new TwoStepVerificationSetupActivity(currentAccount, TwoStepVerificationSetupActivity.TYPE_EMAIL_RECOVERY, currentPassword) { + @Override + protected void onReset() { + resetPasswordOnShow = true; + } + }; + fragment.addFragmentToClose(this); + fragment.setCurrentPasswordParams(currentPasswordHash, currentSecretId, currentSecret, false); + presentFragment(fragment); + } else { + if (error.text.startsWith("FLOOD_WAIT")) { + int time = Utilities.parseInt(error.text); + String timeString; + if (time < 60) { + timeString = LocaleController.formatPluralString("Seconds", time); + } else { + timeString = LocaleController.formatPluralString("Minutes", time / 60); + } + showAlertWithText(LocaleController.getString("AppName", R.string.AppName), LocaleController.formatString("FloodWaitTime", R.string.FloodWaitTime, timeString)); + } else { + showAlertWithText(LocaleController.getString("AppName", R.string.AppName), error.text); + } + } + }), ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); + } else { + if (getParentActivity() == null) { + return; + } + if (currentPassword.pending_reset_date != 0) { + if (getConnectionsManager().getCurrentTime() > currentPassword.pending_reset_date) { + AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); + builder.setPositiveButton(LocaleController.getString("Reset", R.string.Reset), (dialog, which) -> resetPassword()); + builder.setNegativeButton(LocaleController.getString("Cancel", R.string.Cancel), null); + builder.setTitle(LocaleController.getString("ResetPassword", R.string.ResetPassword)); + builder.setMessage(LocaleController.getString("RestorePasswordResetPasswordText", R.string.RestorePasswordResetPasswordText)); + AlertDialog dialog = builder.create(); + showDialog(dialog); + TextView button = (TextView) dialog.getButton(DialogInterface.BUTTON_POSITIVE); + if (button != null) { + button.setTextColor(Theme.getColor(Theme.key_dialogTextRed2)); + } + } else { + cancelPasswordReset(); + } + } else { + AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); + builder.setPositiveButton(LocaleController.getString("Reset", R.string.Reset), (dialog, which) -> resetPassword()); + builder.setNegativeButton(LocaleController.getString("Cancel", R.string.Cancel), null); + builder.setTitle(LocaleController.getString("ResetPassword", R.string.ResetPassword)); + builder.setMessage(LocaleController.getString("RestorePasswordNoEmailText2", R.string.RestorePasswordNoEmailText2)); + showDialog(builder.create()); + } + } + } + @Override public void didReceivedNotification(int id, int account, Object... args) { if (id == NotificationCenter.twoStepPasswordChanged) { if (args != null && args.length > 0 && args[0] != null) { currentPasswordHash = (byte[]) args[0]; } - loadPasswordInfo(false); + loadPasswordInfo(false, false); updateRows(); } } @@ -428,7 +615,7 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific } } - private void loadPasswordInfo(final boolean silent) { + private void loadPasswordInfo(boolean first, final boolean silent) { if (!silent) { loading = true; if (listAdapter != null) { @@ -444,7 +631,7 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific AlertsCreator.showUpdateAppAlert(getParentActivity(), LocaleController.getString("UpdateAppAlert", R.string.UpdateAppAlert), true); return; } - if (!silent) { + if (!silent || first) { passwordEntered = currentPasswordHash != null && currentPasswordHash.length > 0 || !currentPassword.has_password; } initPasswordNewAlgo(currentPassword); @@ -454,6 +641,20 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific }), ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); } + @Override + protected void onTransitionAnimationEnd(boolean isOpen, boolean backward) { + super.onTransitionAnimationEnd(isOpen, backward); + if (isOpen) { + if (forgotPasswordOnShow) { + onPasswordForgot(); + forgotPasswordOnShow = false; + } else if (resetPasswordOnShow) { + resetPassword(); + resetPasswordOnShow = false; + } + } + } + private void updateRows() { StringBuilder lastValue = new StringBuilder(); lastValue.append(setPasswordRow); @@ -473,7 +674,7 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific setRecoveryEmailRow = -1; changeRecoveryEmailRow = -1; passwordEnabledDetailRow = -1; - if (!loading && currentPassword != null) { + if (!loading && currentPassword != null && passwordEntered) { if (currentPassword.has_password) { changePasswordRow = rowCount++; turnPasswordOffRow = rowCount++; @@ -513,6 +714,7 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific titleTextView.setVisibility(View.INVISIBLE); bottomTextView.setVisibility(View.INVISIBLE); bottomButton.setVisibility(View.INVISIBLE); + updateBottomButton(); } fragmentView.setBackgroundColor(Theme.getColor(Theme.key_windowBackgroundGray)); fragmentView.setTag(Theme.key_windowBackgroundGray); @@ -530,8 +732,8 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific fragmentView.setTag(Theme.key_windowBackgroundWhite); titleTextView.setVisibility(View.VISIBLE); bottomButton.setVisibility(View.VISIBLE); + updateBottomButton(); bottomTextView.setVisibility(View.INVISIBLE); - bottomButton.setText(LocaleController.getString("ForgotPassword", R.string.ForgotPassword)); if (!TextUtils.isEmpty(currentPassword.hint)) { passwordEditText.setHint(currentPassword.hint); } else { @@ -549,12 +751,20 @@ public class TwoStepVerificationActivity extends BaseFragment implements Notific } private void needShowProgress() { + needShowProgress(false); + } + + private void needShowProgress(boolean delay) { if (getParentActivity() == null || getParentActivity().isFinishing() || progressDialog != null) { return; } progressDialog = new AlertDialog(getParentActivity(), 3); progressDialog.setCanCacnel(false); - progressDialog.show(); + if (delay) { + progressDialog.showDelayed(300); + } else { + progressDialog.show(); + } } public void needHideProgress() { diff --git a/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationSetupActivity.java b/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationSetupActivity.java index 71b4728be..3570936da 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationSetupActivity.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/TwoStepVerificationSetupActivity.java @@ -53,6 +53,7 @@ import org.telegram.messenger.UserConfig; import org.telegram.messenger.Utilities; import org.telegram.tgnet.ConnectionsManager; import org.telegram.tgnet.RequestDelegate; +import org.telegram.tgnet.TLObject; import org.telegram.tgnet.TLRPC; import org.telegram.ui.ActionBar.ActionBar; import org.telegram.ui.ActionBar.ActionBarMenu; @@ -111,6 +112,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { private byte[] currentSecret; private boolean closeAfterSet; private boolean emailOnly; + private String emailCode; private RLottieDrawable[] animationDrawables; private Runnable setAnimationRunnable; @@ -136,6 +138,8 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { public static final int TYPE_EMAIL_CONFIRM = 5; public static final int TYPE_INTRO = 6; public static final int TYPE_PASSWORD_SET = 7; + public static final int TYPE_VERIFY = 8; + public static final int TYPE_VERIFY_OK = 9; private static final int item_abort = 1; private static final int item_resend = 2; @@ -144,7 +148,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { super(); currentType = type; currentPassword = password; - if (currentPassword == null && currentType == TYPE_INTRO) { + if (currentPassword == null && (currentType == TYPE_INTRO || currentType == TYPE_VERIFY)) { loadPasswordInfo(); } else { waitingForEmail = !TextUtils.isEmpty(currentPassword.email_unconfirmed_pattern); @@ -157,7 +161,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { currentType = type; currentPassword = password; waitingForEmail = !TextUtils.isEmpty(currentPassword.email_unconfirmed_pattern); - if (currentPassword == null && currentType == TYPE_INTRO) { + if (currentPassword == null && (currentType == TYPE_INTRO || currentType == TYPE_VERIFY)) { loadPasswordInfo(); } } @@ -169,6 +173,10 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { emailOnly = email; } + public void setCurrentEmailCode(String code) { + emailCode = code; + } + public void addFragmentToClose(BaseFragment fragment) { fragmentsToClose.add(fragment); } @@ -264,7 +272,45 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { topButton.setVisibility(View.GONE); actionBar.addView(topButton, LayoutHelper.createFrame(LayoutHelper.WRAP_CONTENT, LayoutHelper.MATCH_PARENT, Gravity.TOP | Gravity.RIGHT, 0, 0, 22, 0)); topButton.setOnClickListener(v -> { - if (currentType == TYPE_ENTER_EMAIL) { + if (currentType == TYPE_ENTER_FIRST) { + needShowProgress(); + TLRPC.TL_auth_recoverPassword req = new TLRPC.TL_auth_recoverPassword(); + req.code = emailCode; + getConnectionsManager().sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { + needHideProgress(); + if (error == null) { + getMessagesController().removeSuggestion(0, "VALIDATE_PASSWORD"); + AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); + builder.setPositiveButton(LocaleController.getString("OK", R.string.OK), (dialogInterface, i) -> { + for (int a = 0, N = fragmentsToClose.size(); a < N; a++) { + fragmentsToClose.get(a).removeSelfFromStack(); + } + NotificationCenter.getInstance(currentAccount).postNotificationName(NotificationCenter.twoStepPasswordChanged); + finishFragment(); + }); + builder.setMessage(LocaleController.getString("PasswordReset", R.string.PasswordReset)); + builder.setTitle(LocaleController.getString("TwoStepVerificationTitle", R.string.TwoStepVerificationTitle)); + Dialog dialog = showDialog(builder.create()); + if (dialog != null) { + dialog.setCanceledOnTouchOutside(false); + dialog.setCancelable(false); + } + } else { + if (error.text.startsWith("FLOOD_WAIT")) { + int time = Utilities.parseInt(error.text); + String timeString; + if (time < 60) { + timeString = LocaleController.formatPluralString("Seconds", time); + } else { + timeString = LocaleController.formatPluralString("Minutes", time / 60); + } + showAlertWithText(LocaleController.getString("TwoStepVerificationTitle", R.string.TwoStepVerificationTitle), LocaleController.formatString("FloodWaitTime", R.string.FloodWaitTime, timeString)); + } else { + showAlertWithText(LocaleController.getString("TwoStepVerificationTitle", R.string.TwoStepVerificationTitle), error.text); + } + } + })); + } else if (currentType == TYPE_ENTER_EMAIL) { AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); builder.setMessage(LocaleController.getString("YourEmailSkipWarningText", R.string.YourEmailSkipWarningText)); builder.setTitle(LocaleController.getString("YourEmailSkipWarning", R.string.YourEmailSkipWarning)); @@ -308,6 +354,14 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { descriptionText2.setLineSpacing(AndroidUtilities.dp(2), 1); descriptionText2.setPadding(AndroidUtilities.dp(32), 0, AndroidUtilities.dp(32), 0); descriptionText2.setVisibility(View.GONE); + descriptionText2.setOnClickListener(v -> { + if (currentType == TYPE_VERIFY) { + TwoStepVerificationActivity fragment = new TwoStepVerificationActivity(); + fragment.setForgotPasswordOnShow(); + fragment.setPassword(currentPassword); + presentFragment(fragment, true); + } + }); buttonTextView = new TextView(context); buttonTextView.setMinWidth(AndroidUtilities.dp(220)); @@ -343,6 +397,96 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { } break; } + case TYPE_VERIFY_OK: { + finishFragment(); + break; + } + case TYPE_VERIFY: { + if (currentPassword == null) { + needShowProgress(); + doneAfterPasswordLoad = true; + return; + } + String oldPassword = passwordEditText.getText().toString(); + if (oldPassword.length() == 0) { + onFieldError(passwordEditText, false); + return; + } + final byte[] oldPasswordBytes = AndroidUtilities.getStringBytes(oldPassword); + + needShowProgress(); + Utilities.globalQueue.postRunnable(() -> { + final TLRPC.TL_account_getPasswordSettings req = new TLRPC.TL_account_getPasswordSettings(); + final byte[] x_bytes; + if (currentPassword.current_algo instanceof TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) { + TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) currentPassword.current_algo; + x_bytes = SRPHelper.getX(oldPasswordBytes, algo); + } else { + x_bytes = null; + } + + RequestDelegate requestDelegate = (response, error) -> { + if (error == null) { + AndroidUtilities.runOnUIThread(() -> { + needHideProgress(); + currentPasswordHash = x_bytes; + getMessagesController().removeSuggestion(0, "VALIDATE_PASSWORD"); + presentFragment(new TwoStepVerificationSetupActivity(TYPE_VERIFY_OK, currentPassword), true); + }); + } else { + AndroidUtilities.runOnUIThread(() -> { + if ("SRP_ID_INVALID".equals(error.text)) { + TLRPC.TL_account_getPassword getPasswordReq = new TLRPC.TL_account_getPassword(); + ConnectionsManager.getInstance(currentAccount).sendRequest(getPasswordReq, (response2, error2) -> AndroidUtilities.runOnUIThread(() -> { + if (error2 == null) { + currentPassword = (TLRPC.TL_account_password) response2; + TwoStepVerificationActivity.initPasswordNewAlgo(currentPassword); + NotificationCenter.getInstance(currentAccount).postNotificationName(NotificationCenter.didSetOrRemoveTwoStepPassword, currentPassword); + buttonTextView.callOnClick(); + } + }), ConnectionsManager.RequestFlagWithoutLogin); + return; + } + needHideProgress(); + if ("PASSWORD_HASH_INVALID".equals(error.text)) { + descriptionText.setText(LocaleController.getString("CheckPasswordWrong", R.string.CheckPasswordWrong)); + descriptionText.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteRedText4)); + onFieldError(passwordEditText, true); + showDoneButton(false); + } else if (error.text.startsWith("FLOOD_WAIT")) { + int time = Utilities.parseInt(error.text); + String timeString; + if (time < 60) { + timeString = LocaleController.formatPluralString("Seconds", time); + } else { + timeString = LocaleController.formatPluralString("Minutes", time / 60); + } + showAlertWithText(LocaleController.getString("AppName", R.string.AppName), LocaleController.formatString("FloodWaitTime", R.string.FloodWaitTime, timeString)); + } else { + showAlertWithText(LocaleController.getString("AppName", R.string.AppName), error.text); + } + }); + } + }; + + if (currentPassword.current_algo instanceof TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) { + TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) currentPassword.current_algo; + req.password = SRPHelper.startCheck(x_bytes, currentPassword.srp_id, currentPassword.srp_B, algo); + if (req.password == null) { + TLRPC.TL_error error = new TLRPC.TL_error(); + error.text = "ALGO_INVALID"; + requestDelegate.run(null, error); + return; + } + ConnectionsManager.getInstance(currentAccount).sendRequest(req, requestDelegate, ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); + } else { + TLRPC.TL_error error = new TLRPC.TL_error(); + error.text = "PASSWORD_HASH_INVALID"; + requestDelegate.run(null, error); + } + }); + break; + } case TYPE_ENTER_FIRST: { if (passwordEditText.length() == 0) { onFieldError(passwordEditText, false); @@ -350,6 +494,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { } TwoStepVerificationSetupActivity fragment = new TwoStepVerificationSetupActivity(currentAccount, TYPE_ENTER_SECOND, currentPassword); fragment.setCurrentPasswordParams(currentPasswordHash, currentSecretId, currentSecret, emailOnly); + fragment.setCurrentEmailCode(emailCode); fragment.firstPassword = passwordEditText.getText().toString(); fragment.fragmentsToClose.addAll(fragmentsToClose); fragment.fragmentsToClose.add(this); @@ -369,6 +514,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { } TwoStepVerificationSetupActivity fragment = new TwoStepVerificationSetupActivity(currentAccount, TYPE_ENTER_HINT, currentPassword); fragment.setCurrentPasswordParams(currentPasswordHash, currentSecretId, currentSecret, emailOnly); + fragment.setCurrentEmailCode(emailCode); fragment.firstPassword = firstPassword; fragment.fragmentsToClose.addAll(fragmentsToClose); fragment.fragmentsToClose.add(this); @@ -405,27 +551,17 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { onFieldError(passwordEditText, false); return; } - TLRPC.TL_auth_recoverPassword req = new TLRPC.TL_auth_recoverPassword(); + TLRPC.TL_auth_checkRecoveryPassword req = new TLRPC.TL_auth_checkRecoveryPassword(); req.code = code; ConnectionsManager.getInstance(currentAccount).sendRequest(req, (response, error) -> AndroidUtilities.runOnUIThread(() -> { - if (error == null) { - AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); - builder.setPositiveButton(LocaleController.getString("OK", R.string.OK), (dialogInterface, i) -> { - for (int a = 0, N = fragmentsToClose.size(); a < N; a++) { - fragmentsToClose.get(a).removeSelfFromStack(); - } - NotificationCenter.getInstance(currentAccount).postNotificationName(NotificationCenter.twoStepPasswordChanged); - finishFragment(); - }); - builder.setMessage(LocaleController.getString("PasswordReset", R.string.PasswordReset)); - builder.setTitle(LocaleController.getString("TwoStepVerificationTitle", R.string.TwoStepVerificationTitle)); - Dialog dialog = showDialog(builder.create()); - if (dialog != null) { - dialog.setCanceledOnTouchOutside(false); - dialog.setCancelable(false); - } + if (response instanceof TLRPC.TL_boolTrue) { + TwoStepVerificationSetupActivity fragment = new TwoStepVerificationSetupActivity(currentAccount, TYPE_ENTER_FIRST, currentPassword); + fragment.fragmentsToClose.addAll(fragmentsToClose); + fragment.addFragmentToClose(TwoStepVerificationSetupActivity.this); + fragment.setCurrentEmailCode(code); + presentFragment(fragment, true); } else { - if (error.text.startsWith("CODE_INVALID")) { + if (error == null || error.text.startsWith("CODE_INVALID")) { onFieldError(passwordEditText, true); } else if (error.text.startsWith("FLOOD_WAIT")) { int time = Utilities.parseInt(error.text); @@ -491,6 +627,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { currentPassword.email_unconfirmed_pattern = ""; TwoStepVerificationSetupActivity fragment = new TwoStepVerificationSetupActivity(TYPE_PASSWORD_SET, currentPassword); fragment.setCurrentPasswordParams(currentPasswordHash, currentSecretId, currentSecret, emailOnly); + fragment.fragmentsToClose.addAll(fragmentsToClose); fragment.closeAfterSet = closeAfterSet; presentFragment(fragment, true); NotificationCenter.getInstance(currentAccount).postNotificationName(NotificationCenter.twoStepPasswordChanged, currentPasswordHash, currentPassword.new_algo, currentPassword.new_secure_algo, currentPassword.secure_random, email, hint, null, firstPassword); @@ -520,7 +657,8 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { switch (currentType) { case TYPE_INTRO: - case TYPE_PASSWORD_SET: { + case TYPE_PASSWORD_SET: + case TYPE_VERIFY_OK: { ViewGroup container = new ViewGroup(context) { @Override @@ -588,6 +726,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { fragmentView = container; break; } + case TYPE_VERIFY: case TYPE_ENTER_FIRST: case TYPE_ENTER_SECOND: case TYPE_EMAIL_CONFIRM: @@ -835,7 +974,17 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { descriptionText3.setPadding(AndroidUtilities.dp(32), 0, AndroidUtilities.dp(32), 0); descriptionText3.setText(LocaleController.getString("RestoreEmailTroubleNoEmail", R.string.RestoreEmailTroubleNoEmail)); scrollViewLinearLayout.addView(descriptionText3, LayoutHelper.createLinear(LayoutHelper.WRAP_CONTENT, LayoutHelper.WRAP_CONTENT, Gravity.CENTER_HORIZONTAL | Gravity.TOP, 0, 0, 0, 25)); - descriptionText3.setOnClickListener(v -> showAlertWithText(LocaleController.getString("RestorePasswordNoEmailTitle", R.string.RestorePasswordNoEmailTitle), LocaleController.getString("RestoreEmailTroubleText", R.string.RestoreEmailTroubleText))); + descriptionText3.setOnClickListener(v -> { + AlertDialog.Builder builder = new AlertDialog.Builder(getParentActivity()); + builder.setNegativeButton(LocaleController.getString("Cancel", R.string.Cancel), null); + builder.setPositiveButton(LocaleController.getString("Reset", R.string.Reset), (dialog, which) -> { + onReset(); + finishFragment(); + }); + builder.setTitle(LocaleController.getString("ResetPassword", R.string.ResetPassword)); + builder.setMessage(LocaleController.getString("RestoreEmailTroubleText2", R.string.RestoreEmailTroubleText2)); + showDialog(builder.create()); + }); } fragmentView = container; @@ -886,6 +1035,37 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { imageView.playAnimation(); break; } + case TYPE_VERIFY_OK: { + titleTextView.setText(LocaleController.getString("CheckPasswordPerfect", R.string.CheckPasswordPerfect)); + descriptionText.setText(LocaleController.getString("CheckPasswordPerfectInfo", R.string.CheckPasswordPerfectInfo)); + buttonTextView.setText(LocaleController.getString("CheckPasswordBackToSettings", R.string.CheckPasswordBackToSettings)); + descriptionText.setVisibility(View.VISIBLE); + + imageView.setAnimation(R.raw.wallet_perfect, 120, 120); + imageView.playAnimation(); + break; + } + case TYPE_VERIFY: { + actionBar.setTitle(LocaleController.getString("PleaseEnterCurrentPassword", R.string.PleaseEnterCurrentPassword)); + titleTextView.setText(LocaleController.getString("PleaseEnterCurrentPassword", R.string.PleaseEnterCurrentPassword)); + descriptionText.setText(LocaleController.getString("CheckPasswordInfo", R.string.CheckPasswordInfo)); + + descriptionText.setVisibility(View.VISIBLE); + actionBar.getTitleTextView().setAlpha(0.0f); + buttonTextView.setText(LocaleController.getString("CheckPassword", R.string.CheckPassword)); + descriptionText2.setText(LocaleController.getString("ForgotPassword", R.string.ForgotPassword)); + descriptionText2.setTextColor(Theme.getColor(Theme.key_windowBackgroundWhiteBlueText2)); + passwordEditText.setHint(LocaleController.getString("LoginPassword", R.string.LoginPassword)); + passwordEditText.setImeOptions(EditorInfo.IME_ACTION_DONE | EditorInfo.IME_FLAG_NO_EXTRACT_UI); + passwordEditText.setInputType(InputType.TYPE_CLASS_TEXT | InputType.TYPE_TEXT_VARIATION_PASSWORD); + passwordEditText.setTransformationMethod(PasswordTransformationMethod.getInstance()); + passwordEditText.setTypeface(Typeface.DEFAULT); + passwordEditText.setPadding(0, AndroidUtilities.dp(2), AndroidUtilities.dp(36), 0); + + imageView.setAnimation(R.raw.wallet_science, 120, 120); + imageView.playAnimation(); + break; + } case TYPE_ENTER_FIRST: { if (currentPassword.has_password) { actionBar.setTitle(LocaleController.getString("PleaseEnterNewFirstPassword", R.string.PleaseEnterNewFirstPassword)); @@ -894,6 +1074,10 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { actionBar.setTitle(LocaleController.getString("PleaseEnterFirstPassword", R.string.PleaseEnterFirstPassword)); titleTextView.setText(LocaleController.getString("PleaseEnterFirstPassword", R.string.PleaseEnterFirstPassword)); } + if (!TextUtils.isEmpty(emailCode)) { + topButton.setVisibility(View.VISIBLE); + topButton.setText(LocaleController.getString("YourEmailSkip", R.string.YourEmailSkip)); + } actionBar.getTitleTextView().setAlpha(0.0f); buttonTextView.setText(LocaleController.getString("Continue", R.string.Continue)); passwordEditText.setHint(LocaleController.getString("LoginPassword", R.string.LoginPassword)); @@ -1074,6 +1258,10 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { buttonTextView.callOnClick(); } showDoneButton(s.length() > 0); + } else if (currentType == TYPE_VERIFY) { + if (s.length() > 0) { + showDoneButton(true); + } } } }); @@ -1105,7 +1293,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { @Override protected boolean hideKeyboardOnShow() { - return currentType == TYPE_PASSWORD_SET; + return currentType == TYPE_PASSWORD_SET || currentType == TYPE_VERIFY_OK; } private void onHintDone() { @@ -1307,24 +1495,20 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { return; } final String password = firstPassword; - final TLRPC.TL_account_updatePasswordSettings req = new TLRPC.TL_account_updatePasswordSettings(); - if (currentPasswordHash == null || currentPasswordHash.length == 0) { - req.password = new TLRPC.TL_inputCheckPasswordEmpty(); - } - req.new_settings = new TLRPC.TL_account_passwordInputSettings(); + + TLRPC.TL_account_passwordInputSettings new_settings = new TLRPC.TL_account_passwordInputSettings(); if (clear) { UserConfig.getInstance(currentAccount).resetSavedPassword(); currentSecret = null; if (waitingForEmail) { - req.new_settings.flags = 2; - req.new_settings.email = ""; - req.password = new TLRPC.TL_inputCheckPasswordEmpty(); + new_settings.flags = 2; + new_settings.email = ""; } else { - req.new_settings.flags = 3; - req.new_settings.hint = ""; - req.new_settings.new_password_hash = new byte[0]; - req.new_settings.new_algo = new TLRPC.TL_passwordKdfAlgoUnknown(); - req.new_settings.email = ""; + new_settings.flags = 3; + new_settings.hint = ""; + new_settings.new_password_hash = new byte[0]; + new_settings.new_algo = new TLRPC.TL_passwordKdfAlgoUnknown(); + new_settings.email = ""; } } else { if (hint == null && currentPassword != null) { @@ -1334,19 +1518,39 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { hint = ""; } if (password != null) { - req.new_settings.flags |= 1; - req.new_settings.hint = hint; - req.new_settings.new_algo = currentPassword.new_algo; + new_settings.flags |= 1; + new_settings.hint = hint; + new_settings.new_algo = currentPassword.new_algo; } if (email.length() > 0) { - req.new_settings.flags |= 2; - req.new_settings.email = email.trim(); + new_settings.flags |= 2; + new_settings.email = email.trim(); } } + + TLObject request; + if (emailCode != null) { + TLRPC.TL_auth_recoverPassword req = new TLRPC.TL_auth_recoverPassword(); + req.code = emailCode; + req.new_settings = new_settings; + req.flags |= 1; + request = req; + } else { + TLRPC.TL_account_updatePasswordSettings req = new TLRPC.TL_account_updatePasswordSettings(); + if (currentPasswordHash == null || currentPasswordHash.length == 0 || clear && waitingForEmail) { + req.password = new TLRPC.TL_inputCheckPasswordEmpty(); + } + req.new_settings = new_settings; + request = req; + } + needShowProgress(); Utilities.globalQueue.postRunnable(() -> { - if (req.password == null) { - req.password = getNewSrpPassword(); + if (request instanceof TLRPC.TL_account_updatePasswordSettings) { + TLRPC.TL_account_updatePasswordSettings req = (TLRPC.TL_account_updatePasswordSettings) request; + if (req.password == null) { + req.password = getNewSrpPassword(); + } } byte[] newPasswordBytes; @@ -1378,7 +1582,8 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { return; } needHideProgress(); - if (error == null && response instanceof TLRPC.TL_boolTrue) { + if (error == null && (response instanceof TLRPC.TL_boolTrue || response instanceof TLRPC.auth_Authorization)) { + getMessagesController().removeSuggestion(0, "VALIDATE_PASSWORD"); if (clear) { for (int a = 0, N = fragmentsToClose.size(); a < N; a++) { fragmentsToClose.get(a).removeSelfFromStack(); @@ -1440,7 +1645,7 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { for (int a = 0, N = fragmentsToClose.size(); a < N; a++) { fragmentsToClose.get(a).removeSelfFromStack(); } - NotificationCenter.getInstance(currentAccount).postNotificationName(NotificationCenter.twoStepPasswordChanged, newPasswordHash, req.new_settings.new_algo, currentPassword.new_secure_algo, currentPassword.secure_random, email, hint, email, firstPassword); + NotificationCenter.getInstance(currentAccount).postNotificationName(NotificationCenter.twoStepPasswordChanged, newPasswordHash, new_settings.new_algo, currentPassword.new_secure_algo, currentPassword.secure_random, email, hint, email, firstPassword); currentPassword.email_unconfirmed_pattern = email; TwoStepVerificationSetupActivity fragment = new TwoStepVerificationSetupActivity(TwoStepVerificationSetupActivity.TYPE_EMAIL_CONFIRM, currentPassword); fragment.setCurrentPasswordParams(newPasswordHash != null ? newPasswordHash : currentPasswordHash, currentSecretId, currentSecret, emailOnly); @@ -1480,32 +1685,32 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { System.arraycopy(currentSecret, 0, encryptedSecret, 0, 32); Utilities.aesCbcEncryptionByteArraySafe(encryptedSecret, key, iv, 0, encryptedSecret.length, 0, 1); - req.new_settings.new_secure_settings = new TLRPC.TL_secureSecretSettings(); - req.new_settings.new_secure_settings.secure_algo = newAlgo; - req.new_settings.new_secure_settings.secure_secret = encryptedSecret; - req.new_settings.new_secure_settings.secure_secret_id = currentSecretId; - req.new_settings.flags |= 4; + new_settings.new_secure_settings = new TLRPC.TL_secureSecretSettings(); + new_settings.new_secure_settings.secure_algo = newAlgo; + new_settings.new_secure_settings.secure_secret = encryptedSecret; + new_settings.new_secure_settings.secure_secret_id = currentSecretId; + new_settings.flags |= 4; } } if (currentPassword.new_algo instanceof TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) { if (password != null) { TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow algo = (TLRPC.TL_passwordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow) currentPassword.new_algo; - req.new_settings.new_password_hash = SRPHelper.getVBytes(newPasswordBytes, algo); - if (req.new_settings.new_password_hash == null) { + new_settings.new_password_hash = SRPHelper.getVBytes(newPasswordBytes, algo); + if (new_settings.new_password_hash == null) { TLRPC.TL_error error = new TLRPC.TL_error(); error.text = "ALGO_INVALID"; requestDelegate.run(null, error); } } - ConnectionsManager.getInstance(currentAccount).sendRequest(req, requestDelegate, ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); + ConnectionsManager.getInstance(currentAccount).sendRequest(request, requestDelegate, ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); } else { TLRPC.TL_error error = new TLRPC.TL_error(); error.text = "PASSWORD_HASH_INVALID"; requestDelegate.run(null, error); } } else { - ConnectionsManager.getInstance(currentAccount).sendRequest(req, requestDelegate, ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); + ConnectionsManager.getInstance(currentAccount).sendRequest(request, requestDelegate, ConnectionsManager.RequestFlagFailOnServerErrors | ConnectionsManager.RequestFlagWithoutLogin); } }); } @@ -1518,6 +1723,10 @@ public class TwoStepVerificationSetupActivity extends BaseFragment { return null; } + protected void onReset() { + + } + private void onFieldError(TextView field, boolean clear) { if (getParentActivity() == null) { return; diff --git a/TMessagesProj/src/main/java/org/telegram/ui/VoIPFragment.java b/TMessagesProj/src/main/java/org/telegram/ui/VoIPFragment.java index cdd70046d..220f3ed58 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/VoIPFragment.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/VoIPFragment.java @@ -226,7 +226,7 @@ public class VoIPFragment implements VoIPService.StateListener, NotificationCent return; } boolean transitionFromPip = VoIPPiPView.getInstance() != null; - if (VoIPService.getSharedInstance() == null) { + if (VoIPService.getSharedInstance() == null || VoIPService.getSharedInstance().getUser() == null) { return; } VoIPFragment fragment = new VoIPFragment(account); diff --git a/TMessagesProj/src/main/java/org/telegram/ui/VoiceMessageEnterTransition.java b/TMessagesProj/src/main/java/org/telegram/ui/VoiceMessageEnterTransition.java index 721a29b15..42db77a02 100644 --- a/TMessagesProj/src/main/java/org/telegram/ui/VoiceMessageEnterTransition.java +++ b/TMessagesProj/src/main/java/org/telegram/ui/VoiceMessageEnterTransition.java @@ -10,6 +10,7 @@ import android.graphics.Paint; import android.graphics.PorterDuff; import android.graphics.PorterDuffXfermode; import android.graphics.Shader; +import android.transition.Transition; import android.view.View; import android.view.animation.LinearInterpolator; import android.widget.FrameLayout; @@ -23,129 +24,50 @@ import org.telegram.ui.Components.ChatActivityEnterView; import org.telegram.ui.Components.CubicBezierInterpolator; import org.telegram.ui.Components.RecyclerListView; -public class VoiceMessageEnterTransition { +public class VoiceMessageEnterTransition implements MessageEnterTransitionContainer.Transition { + private final ChatMessageCell messageView; + private final RecyclerListView listView; float fromRadius; - float progress; final Paint circlePaint = new Paint(Paint.ANTI_ALIAS_FLAG); private final ValueAnimator animator; + private final ChatActivityEnterView.RecordCircle recordCircle; + private final Matrix gradientMatrix; + private final Paint gradientPaint; + private final LinearGradient gradientShader; + private final int messageId; + MessageEnterTransitionContainer container; - public VoiceMessageEnterTransition(FrameLayout containerView, ChatMessageCell messageView, ChatActivityEnterView chatActivityEnterView, RecyclerListView listView) { - + public VoiceMessageEnterTransition(ChatMessageCell messageView, ChatActivityEnterView chatActivityEnterView, RecyclerListView listView, MessageEnterTransitionContainer container) { + this.messageView = messageView; + this.container = container; + this.listView = listView; fromRadius = chatActivityEnterView.getRecordCicle().drawingCircleRadius; - messageView.setVoiceTransitionInProgress(true); + messageView.setEnterTransitionInProgress(true); - ChatActivityEnterView.RecordCircle recordCircle = chatActivityEnterView.getRecordCicle(); - chatActivityEnterView.startMessageTransition(); + recordCircle = chatActivityEnterView.getRecordCicle(); recordCircle.voiceEnterTransitionInProgress = true; recordCircle.skipDraw = true; - Matrix gradientMatrix = new Matrix(); - Paint gradientPaint = new Paint(Paint.ANTI_ALIAS_FLAG); + gradientMatrix = new Matrix(); + gradientPaint = new Paint(Paint.ANTI_ALIAS_FLAG); gradientPaint.setXfermode(new PorterDuffXfermode(PorterDuff.Mode.DST_IN)); - LinearGradient gradientShader = new LinearGradient(0, AndroidUtilities.dp(12), 0, 0, 0, 0xFF000000, Shader.TileMode.CLAMP); + gradientShader = new LinearGradient(0, AndroidUtilities.dp(12), 0, 0, 0, 0xFF000000, Shader.TileMode.CLAMP); gradientPaint.setShader(gradientShader); - int messageId = messageView.getMessageObject().stableId; + messageId = messageView.getMessageObject().stableId; - View view = new View(containerView.getContext()) { - - float lastToCx; - float lastToCy; - - @Override - protected void onDraw(Canvas canvas) { - super.onDraw(canvas); - - float step1Time = 0.6f; - float moveProgress = progress; - float hideWavesProgress = progress > step1Time ? 1f : progress / step1Time; - - float fromCx = recordCircle.drawingCx + recordCircle.getX() - getX(); - float fromCy = recordCircle.drawingCy + recordCircle.getY() - getY(); - - float toCy; - float toCx; - - if (messageView.getMessageObject().stableId != messageId) { - toCx = lastToCx; - toCy = lastToCy; - } else { - toCy = messageView.getRadialProgress().getProgressRect().centerY() + messageView.getY() + listView.getY() - getY(); - toCx = messageView.getRadialProgress().getProgressRect().centerX() + messageView.getX() + listView.getX() - getX(); - } - - lastToCx = toCx; - lastToCy = toCy; - - float progress = CubicBezierInterpolator.DEFAULT.getInterpolation(moveProgress); - float xProgress = CubicBezierInterpolator.EASE_OUT_QUINT.getInterpolation(moveProgress); - - float cx = fromCx * (1f - xProgress) + toCx * xProgress; - float cy = fromCy * (1f - progress) + toCy * progress; - - float toRadius = messageView.getRadialProgress().getProgressRect().height() / 2; - float radius = fromRadius * (1f - progress) + toRadius * progress; - - float listViewBottom = listView.getY() - getY() + listView.getMeasuredHeight(); - int clipBottom = 0; - if (getMeasuredHeight() > 0) { - clipBottom = (int) (getMeasuredHeight() * (1f - progress) + listViewBottom * progress); - canvas.saveLayerAlpha(0, getMeasuredHeight() - AndroidUtilities.dp(400), getMeasuredWidth(), getMeasuredHeight(), 255, Canvas.ALL_SAVE_FLAG); - } else { - canvas.save(); - } - - circlePaint.setColor(ColorUtils.blendARGB(Theme.getColor(Theme.key_chat_messagePanelVoiceBackground), Theme.getColor(messageView.getRadialProgress().getCircleColorKey()), progress)); - - recordCircle.drawWaves(canvas, cx, cy, 1f - hideWavesProgress); - - canvas.drawCircle(cx, cy, radius, circlePaint); - - canvas.save(); - - float scale = radius / toRadius; - canvas.scale(scale, scale, cx, cy); - canvas.translate(cx - messageView.getRadialProgress().getProgressRect().centerX(), cy - messageView.getRadialProgress().getProgressRect().centerY()); - - messageView.getRadialProgress().setOverrideAlpha(progress); - messageView.getRadialProgress().setDrawBackground(false); - messageView.getRadialProgress().draw(canvas); - messageView.getRadialProgress().setDrawBackground(true); - messageView.getRadialProgress().setOverrideAlpha(1f); - canvas.restore(); - - if (getMeasuredHeight() > 0) { - gradientMatrix.setTranslate(0, clipBottom); - gradientShader.setLocalMatrix(gradientMatrix); - canvas.drawRect(0, clipBottom, getMeasuredWidth(), getMeasuredHeight(), gradientPaint); - } - - //restore clipRect - canvas.restore(); - - recordCircle.drawIcon(canvas, (int) fromCx, (int) fromCy, 1f - moveProgress); - - recordCircle.skipDraw = false; - canvas.save(); - canvas.translate(recordCircle.getX() - getX(), recordCircle.getY() - getY()); - recordCircle.draw(canvas); - canvas.restore(); - recordCircle.skipDraw = true; - } - }; - - containerView.addView(view); + container.addTransition(this); animator = ValueAnimator.ofFloat(0f, 1f); animator.addUpdateListener(valueAnimator -> { progress = (float) valueAnimator.getAnimatedValue(); - view.invalidate(); + container.invalidate(); }); animator.setInterpolator(new LinearInterpolator()); @@ -153,11 +75,9 @@ public class VoiceMessageEnterTransition { animator.addListener(new AnimatorListenerAdapter() { @Override public void onAnimationEnd(Animator animation) { - if (view.getParent() != null) { - messageView.setVoiceTransitionInProgress(false); - containerView.removeView(view); - recordCircle.skipDraw = false; - } + messageView.setEnterTransitionInProgress(false); + container.removeTransition(VoiceMessageEnterTransition.this); + recordCircle.skipDraw = false; } }); } @@ -165,4 +85,79 @@ public class VoiceMessageEnterTransition { public void start() { animator.start(); } + + float lastToCx; + float lastToCy; + + @Override + public void onDraw(Canvas canvas) { + float step1Time = 0.6f; + float moveProgress = progress; + float hideWavesProgress = progress > step1Time ? 1f : progress / step1Time; + + float fromCx = recordCircle.drawingCx + recordCircle.getX() - container.getX(); + float fromCy = recordCircle.drawingCy + recordCircle.getY() - container.getY(); + + float toCy; + float toCx; + + if (messageView.getMessageObject().stableId != messageId) { + toCx = lastToCx; + toCy = lastToCy; + } else { + toCy = messageView.getRadialProgress().getProgressRect().centerY() + messageView.getY() + listView.getY() - container.getY(); + toCx = messageView.getRadialProgress().getProgressRect().centerX() + messageView.getX() + listView.getX() - container.getX(); + } + + lastToCx = toCx; + lastToCy = toCy; + + float progress = CubicBezierInterpolator.DEFAULT.getInterpolation(moveProgress); + float xProgress = CubicBezierInterpolator.EASE_OUT_QUINT.getInterpolation(moveProgress); + + float cx = fromCx * (1f - xProgress) + toCx * xProgress; + float cy = fromCy * (1f - progress) + toCy * progress; + + float toRadius = messageView.getRadialProgress().getProgressRect().height() / 2; + float radius = fromRadius * (1f - progress) + toRadius * progress; + + float listViewBottom = listView.getY() - container.getY() + listView.getMeasuredHeight(); + int clipBottom = 0; + if (container.getMeasuredHeight() > 0) { + clipBottom = (int) (container.getMeasuredHeight() * (1f - progress) + listViewBottom * progress); + canvas.saveLayerAlpha(0, container.getMeasuredHeight() - AndroidUtilities.dp(400), container.getMeasuredWidth(), container.getMeasuredHeight(), 255, Canvas.ALL_SAVE_FLAG); + } else { + canvas.save(); + } + + circlePaint.setColor(ColorUtils.blendARGB(Theme.getColor(Theme.key_chat_messagePanelVoiceBackground), Theme.getColor(messageView.getRadialProgress().getCircleColorKey()), progress)); + + recordCircle.drawWaves(canvas, cx, cy, 1f - hideWavesProgress); + + canvas.drawCircle(cx, cy, radius, circlePaint); + + canvas.save(); + + float scale = radius / toRadius; + canvas.scale(scale, scale, cx, cy); + canvas.translate(cx - messageView.getRadialProgress().getProgressRect().centerX(), cy - messageView.getRadialProgress().getProgressRect().centerY()); + + messageView.getRadialProgress().setOverrideAlpha(progress); + messageView.getRadialProgress().setDrawBackground(false); + messageView.getRadialProgress().draw(canvas); + messageView.getRadialProgress().setDrawBackground(true); + messageView.getRadialProgress().setOverrideAlpha(1f); + canvas.restore(); + + if (container.getMeasuredHeight() > 0) { + gradientMatrix.setTranslate(0, clipBottom); + gradientShader.setLocalMatrix(gradientMatrix); + canvas.drawRect(0, clipBottom, container.getMeasuredWidth(), container.getMeasuredHeight(), gradientPaint); + } + + //restore clipRect + canvas.restore(); + + recordCircle.drawIcon(canvas, (int) fromCx, (int) fromCy, 1f - moveProgress); + } } diff --git a/TMessagesProj/src/main/java/org/webrtc/HardwareVideoDecoderFactory.java b/TMessagesProj/src/main/java/org/webrtc/HardwareVideoDecoderFactory.java index fc226d24b..62a83c733 100644 --- a/TMessagesProj/src/main/java/org/webrtc/HardwareVideoDecoderFactory.java +++ b/TMessagesProj/src/main/java/org/webrtc/HardwareVideoDecoderFactory.java @@ -23,9 +23,6 @@ public class HardwareVideoDecoderFactory extends MediaCodecVideoDecoderFactory { new Predicate() { @Override public boolean test(MediaCodecInfo arg) { - if (VoIPService.getSharedInstance() != null && VoIPService.getSharedInstance().groupCall != null) { - return false; - } if (!MediaCodecUtils.isHardwareAccelerated(arg)) { return false; } @@ -37,6 +34,9 @@ public class HardwareVideoDecoderFactory extends MediaCodecVideoDecoderFactory { for (int a = 0; a < types.length; a++) { switch (types[a]) { case "video/x-vnd.on2.vp8": + if (VoIPService.getSharedInstance() != null && VoIPService.getSharedInstance().groupCall != null) { + return false; + } return config.enable_vp8_decoder; case "video/x-vnd.on2.vp9": return config.enable_vp9_decoder; diff --git a/TMessagesProj/src/main/java/org/webrtc/HardwareVideoEncoderFactory.java b/TMessagesProj/src/main/java/org/webrtc/HardwareVideoEncoderFactory.java index 8057dc6c4..8e9bac2ec 100644 --- a/TMessagesProj/src/main/java/org/webrtc/HardwareVideoEncoderFactory.java +++ b/TMessagesProj/src/main/java/org/webrtc/HardwareVideoEncoderFactory.java @@ -189,11 +189,11 @@ public class HardwareVideoEncoderFactory implements VideoEncoderFactory { // Returns true if the given MediaCodecInfo indicates a hardware module that is supported on the // current SDK. private boolean isHardwareSupportedInCurrentSdk(MediaCodecInfo info, VideoCodecMimeType type) { - Instance.ServerConfig config = Instance.getGlobalServerConfig(); - if (!config.enable_h264_encoder && !config.enable_h265_encoder && !config.enable_vp8_encoder && !config.enable_vp9_encoder) { + if (VoIPService.getSharedInstance() != null && VoIPService.getSharedInstance().groupCall != null) { return false; } - if (VoIPService.getSharedInstance() != null && VoIPService.getSharedInstance().groupCall != null) { + Instance.ServerConfig config = Instance.getGlobalServerConfig(); + if (!config.enable_h264_encoder && !config.enable_h265_encoder && !config.enable_vp8_encoder && !config.enable_vp9_encoder) { return false; } switch (type) { diff --git a/TMessagesProj/src/main/java/org/webrtc/OpenH264Decoder.java b/TMessagesProj/src/main/java/org/webrtc/OpenH264Decoder.java new file mode 100644 index 000000000..c912b96dd --- /dev/null +++ b/TMessagesProj/src/main/java/org/webrtc/OpenH264Decoder.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc; + +public class OpenH264Decoder extends WrappedNativeVideoDecoder { + @Override + public long createNativeVideoDecoder() { + return nativeCreateDecoder(); + } + + static native long nativeCreateDecoder(); + + static native boolean nativeIsSupported(); +} diff --git a/TMessagesProj/src/main/java/org/webrtc/OpenH264Encoder.java b/TMessagesProj/src/main/java/org/webrtc/OpenH264Encoder.java new file mode 100644 index 000000000..483e5bb19 --- /dev/null +++ b/TMessagesProj/src/main/java/org/webrtc/OpenH264Encoder.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc; + +public class OpenH264Encoder extends WrappedNativeVideoEncoder { + @Override + public long createNativeVideoEncoder() { + return nativeCreateEncoder(); + } + + static native long nativeCreateEncoder(); + + @Override + public boolean isHardwareEncoder() { + return false; + } +} diff --git a/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoDecoderFactory.java b/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoDecoderFactory.java index 6253889fc..610cbb44e 100644 --- a/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoDecoderFactory.java +++ b/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoDecoderFactory.java @@ -32,6 +32,9 @@ public class SoftwareVideoDecoderFactory implements VideoDecoderFactory { if (codecType.getName().equalsIgnoreCase("VP9") && LibvpxVp9Decoder.nativeIsSupported()) { return new LibvpxVp9Decoder(); } + if (codecType.getName().equalsIgnoreCase("H264")) { + return new OpenH264Decoder(); + } return null; } @@ -48,6 +51,7 @@ public class SoftwareVideoDecoderFactory implements VideoDecoderFactory { if (LibvpxVp9Decoder.nativeIsSupported()) { codecs.add(new VideoCodecInfo("VP9", new HashMap<>())); } + codecs.add(new VideoCodecInfo("H264", new HashMap<>())); return codecs.toArray(new VideoCodecInfo[codecs.size()]); } diff --git a/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoEncoderFactory.java b/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoEncoderFactory.java index b020f2efb..d75e171b1 100644 --- a/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoEncoderFactory.java +++ b/TMessagesProj/src/main/java/org/webrtc/SoftwareVideoEncoderFactory.java @@ -25,6 +25,9 @@ public class SoftwareVideoEncoderFactory implements VideoEncoderFactory { if (info.name.equalsIgnoreCase("VP9") && LibvpxVp9Encoder.nativeIsSupported()) { return new LibvpxVp9Encoder(); } + if (info.name.equalsIgnoreCase("H264")) { + return new OpenH264Encoder(); + } return null; } @@ -38,6 +41,7 @@ public class SoftwareVideoEncoderFactory implements VideoEncoderFactory { List codecs = new ArrayList(); codecs.add(new VideoCodecInfo("VP8", new HashMap<>())); + codecs.add(new VideoCodecInfo("H264", new HashMap<>())); if (LibvpxVp9Encoder.nativeIsSupported()) { codecs.add(new VideoCodecInfo("VP9", new HashMap<>())); } diff --git a/TMessagesProj/src/main/java/org/webrtc/voiceengine/WebRtcAudioRecord.java b/TMessagesProj/src/main/java/org/webrtc/voiceengine/WebRtcAudioRecord.java index 317ab60b8..773b5224b 100644 --- a/TMessagesProj/src/main/java/org/webrtc/voiceengine/WebRtcAudioRecord.java +++ b/TMessagesProj/src/main/java/org/webrtc/voiceengine/WebRtcAudioRecord.java @@ -10,21 +10,25 @@ package org.webrtc.voiceengine; +import android.media.AudioAttributes; import android.media.AudioFormat; +import android.media.AudioPlaybackCaptureConfiguration; import android.media.AudioRecord; import android.media.MediaRecorder.AudioSource; +import android.media.projection.MediaProjection; import android.os.Build; import android.os.Process; import androidx.annotation.Nullable; import java.lang.System; import java.nio.ByteBuffer; import java.util.Arrays; -import java.util.concurrent.TimeUnit; + +import org.telegram.messenger.FileLog; +import org.telegram.messenger.voip.VideoCapturerDevice; import org.webrtc.Logging; import org.webrtc.ThreadUtils; public class WebRtcAudioRecord { - private static final boolean DEBUG = false; private static final String TAG = "WebRtcAudioRecord"; @@ -62,13 +66,15 @@ public class WebRtcAudioRecord { private static volatile boolean microphoneMute; private byte[] emptyBytes; + private boolean isScreenCapture; + // Audio recording error handler functions. public enum AudioRecordStartErrorCode { AUDIO_RECORD_START_EXCEPTION, AUDIO_RECORD_START_STATE_MISMATCH, } - public static interface WebRtcAudioRecordErrorCallback { + public interface WebRtcAudioRecordErrorCallback { void onWebRtcAudioRecordInitError(String errorMessage); void onWebRtcAudioRecordStartError(AudioRecordStartErrorCode errorCode, String errorMessage); void onWebRtcAudioRecordError(String errorMessage); @@ -149,7 +155,6 @@ public class WebRtcAudioRecord { public void run() { Process.setThreadPriority(Process.THREAD_PRIORITY_URGENT_AUDIO); Logging.d(TAG, "AudioRecordThread" + WebRtcAudioUtils.getThreadInfo()); - assertTrue(audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING); long lastTime = System.nanoTime(); while (keepAlive) { @@ -180,12 +185,6 @@ public class WebRtcAudioRecord { reportWebRtcAudioRecordError(errorMessage); } } - if (DEBUG) { - long nowTime = System.nanoTime(); - long durationInMs = TimeUnit.NANOSECONDS.toMillis((nowTime - lastTime)); - lastTime = nowTime; - Logging.d(TAG, "bytesRead[" + durationInMs + "] " + bytesRead); - } } try { @@ -205,13 +204,11 @@ public class WebRtcAudioRecord { } } - WebRtcAudioRecord(long nativeAudioRecord) { + WebRtcAudioRecord(long nativeAudioRecord, boolean screenCapture) { Logging.d(TAG, "ctor" + WebRtcAudioUtils.getThreadInfo()); this.nativeAudioRecord = nativeAudioRecord; - if (DEBUG) { - WebRtcAudioUtils.logDeviceInfo(TAG); - } effects = WebRtcAudioEffects.create(); + isScreenCapture = screenCapture; } private boolean enableBuiltInAEC(boolean enable) { @@ -233,6 +230,9 @@ public class WebRtcAudioRecord { } private int initRecording(int sampleRate, int channels) { + if (isScreenCapture && Build.VERSION.SDK_INT < 29) { + return -1; + } Logging.d(TAG, "initRecording(sampleRate=" + sampleRate + ", channels=" + channels + ")"); if (audioRecord != null) { reportWebRtcAudioRecordInitError("InitRecording called twice without StopRecording."); @@ -265,20 +265,46 @@ public class WebRtcAudioRecord { // verified that it does not increase the actual recording latency. int bufferSizeInBytes = Math.max(BUFFER_SIZE_FACTOR * minBufferSize, byteBuffer.capacity()); Logging.d(TAG, "bufferSizeInBytes: " + bufferSizeInBytes); - try { - audioRecord = new AudioRecord(audioSource, sampleRate, channelConfig, - AudioFormat.ENCODING_PCM_16BIT, bufferSizeInBytes); - } catch (IllegalArgumentException e) { - reportWebRtcAudioRecordInitError("AudioRecord ctor error: " + e.getMessage()); - releaseAudioResources(); - return -1; + + if (isScreenCapture) { + if (Build.VERSION.SDK_INT >= 29) { + try { + MediaProjection projection = VideoCapturerDevice.getMediaProjection(); + if (projection == null) { + return -1; + } + AudioPlaybackCaptureConfiguration.Builder builder = new AudioPlaybackCaptureConfiguration.Builder(projection); + builder.addMatchingUsage(AudioAttributes.USAGE_MEDIA); + builder.addMatchingUsage(AudioAttributes.USAGE_GAME); + builder.addMatchingUsage(AudioAttributes.USAGE_UNKNOWN); + + AudioRecord.Builder audioRecordBuilder = new AudioRecord.Builder(); + audioRecordBuilder.setAudioPlaybackCaptureConfig(builder.build()); + audioRecordBuilder.setAudioFormat(new AudioFormat.Builder().setChannelMask(channelConfig).setSampleRate(sampleRate).setEncoding(AudioFormat.ENCODING_PCM_16BIT).build()); + audioRecordBuilder.setBufferSizeInBytes(bufferSizeInBytes); + audioRecord = audioRecordBuilder.build(); + } catch (Throwable e) { + reportWebRtcAudioRecordInitError("AudioRecord ctor error: " + e.getMessage()); + releaseAudioResources(); + return -1; + } + } + } else { + try { + audioRecord = new AudioRecord(audioSource, sampleRate, channelConfig, + AudioFormat.ENCODING_PCM_16BIT, bufferSizeInBytes); + } catch (IllegalArgumentException e) { + reportWebRtcAudioRecordInitError("AudioRecord ctor error: " + e.getMessage()); + releaseAudioResources(); + return -1; + } } if (audioRecord == null || audioRecord.getState() != AudioRecord.STATE_INITIALIZED) { reportWebRtcAudioRecordInitError("Failed to create a new AudioRecord instance"); releaseAudioResources(); return -1; } - if (effects != null) { + if (!isScreenCapture && effects != null) { effects.enable(audioRecord.getAudioSessionId()); } logMainParameters(); @@ -293,15 +319,11 @@ public class WebRtcAudioRecord { try { audioRecord.startRecording(); } catch (IllegalStateException e) { - reportWebRtcAudioRecordStartError(AudioRecordStartErrorCode.AUDIO_RECORD_START_EXCEPTION, - "AudioRecord.startRecording failed: " + e.getMessage()); + reportWebRtcAudioRecordStartError(AudioRecordStartErrorCode.AUDIO_RECORD_START_EXCEPTION, "AudioRecord.startRecording failed: " + e.getMessage()); return false; } if (audioRecord.getRecordingState() != AudioRecord.RECORDSTATE_RECORDING) { - reportWebRtcAudioRecordStartError( - AudioRecordStartErrorCode.AUDIO_RECORD_START_STATE_MISMATCH, - "AudioRecord.startRecording failed - incorrect state :" - + audioRecord.getRecordingState()); + reportWebRtcAudioRecordStartError(AudioRecordStartErrorCode.AUDIO_RECORD_START_STATE_MISMATCH, "AudioRecord.startRecording failed - incorrect state :" + audioRecord.getRecordingState()); return false; } audioThread = new AudioRecordThread("AudioRecordJavaThread"); @@ -321,6 +343,11 @@ public class WebRtcAudioRecord { if (effects != null) { effects.release(); } + try { + audioRecord.stop(); + } catch (Throwable e) { + FileLog.e(e); + } releaseAudioResources(); return true; } diff --git a/TMessagesProj/src/main/res/drawable-hdpi/fingerprint.png b/TMessagesProj/src/main/res/drawable-hdpi/fingerprint.png new file mode 100644 index 000000000..a4e333471 Binary files /dev/null and b/TMessagesProj/src/main/res/drawable-hdpi/fingerprint.png differ diff --git a/TMessagesProj/src/main/res/drawable-hdpi/passcode_logo.png b/TMessagesProj/src/main/res/drawable-hdpi/passcode_logo.png deleted file mode 100644 index a3d6d895b..000000000 Binary files a/TMessagesProj/src/main/res/drawable-hdpi/passcode_logo.png and /dev/null differ diff --git a/TMessagesProj/src/main/res/drawable-mdpi/fingerprint.png b/TMessagesProj/src/main/res/drawable-mdpi/fingerprint.png new file mode 100644 index 000000000..c811128f5 Binary files /dev/null and b/TMessagesProj/src/main/res/drawable-mdpi/fingerprint.png differ diff --git a/TMessagesProj/src/main/res/drawable-mdpi/passcode_logo.png b/TMessagesProj/src/main/res/drawable-mdpi/passcode_logo.png deleted file mode 100644 index 3dde6781e..000000000 Binary files a/TMessagesProj/src/main/res/drawable-mdpi/passcode_logo.png and /dev/null differ diff --git a/TMessagesProj/src/main/res/drawable-xhdpi/fingerprint.png b/TMessagesProj/src/main/res/drawable-xhdpi/fingerprint.png new file mode 100644 index 000000000..efc0033ca Binary files /dev/null and b/TMessagesProj/src/main/res/drawable-xhdpi/fingerprint.png differ diff --git a/TMessagesProj/src/main/res/drawable-xhdpi/passcode_logo.png b/TMessagesProj/src/main/res/drawable-xhdpi/passcode_logo.png deleted file mode 100644 index 9a00a549f..000000000 Binary files a/TMessagesProj/src/main/res/drawable-xhdpi/passcode_logo.png and /dev/null differ diff --git a/TMessagesProj/src/main/res/drawable-xxhdpi/fingerprint.png b/TMessagesProj/src/main/res/drawable-xxhdpi/fingerprint.png new file mode 100644 index 000000000..fd2fc7ec9 Binary files /dev/null and b/TMessagesProj/src/main/res/drawable-xxhdpi/fingerprint.png differ diff --git a/TMessagesProj/src/main/res/drawable-xxhdpi/passcode_logo.png b/TMessagesProj/src/main/res/drawable-xxhdpi/passcode_logo.png deleted file mode 100644 index 672255d30..000000000 Binary files a/TMessagesProj/src/main/res/drawable-xxhdpi/passcode_logo.png and /dev/null differ diff --git a/TMessagesProj/src/main/res/raw/passcode_lock_open.json b/TMessagesProj/src/main/res/raw/passcode_lock_open.json deleted file mode 100644 index 87b3374d9..000000000 --- a/TMessagesProj/src/main/res/raw/passcode_lock_open.json +++ /dev/null @@ -1 +0,0 @@ -{"v":"5.5.7","meta":{"g":"LottieFiles AE 0.1.20","a":"","k":"","d":"","tc":""},"fr":60,"ip":31,"op":63,"w":512,"h":512,"nm":"Lock Open 2","ddd":0,"assets":[],"layers":[{"ddd":0,"ind":2,"ty":4,"nm":"Top","parent":4,"sr":1,"ks":{"o":{"a":0,"k":100,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"a":1,"k":[{"i":{"x":0.3,"y":1},"o":{"x":0.32,"y":0},"t":30,"s":[61.368,-47.417,0],"to":[0,0,0],"ti":[0,0,0]},{"i":{"x":0.4,"y":1},"o":{"x":0.32,"y":0},"t":36,"s":[60.851,-37.43,0],"to":[0,0,0],"ti":[0,0,0]},{"i":{"x":0.4,"y":1},"o":{"x":0.167,"y":0},"t":40,"s":[62.128,-79.404,0],"to":[0,0,0],"ti":[0,0,0]},{"t":45,"s":[61.647,-67.414,0]}],"ix":2},"a":{"a":0,"k":[61.368,-47.417,0],"ix":1},"s":{"a":0,"k":[100,100,100],"ix":6}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":1,"k":[{"i":{"x":0.3,"y":1},"o":{"x":0.32,"y":0},"t":30,"s":[{"i":[[0,0],[0,0],[-35.659,4.562],[-2.913,0],[-2.893,-0.375],[0.25,-35.208],[0,0]],"o":[[0,0],[0.738,-35.942],[2.929,-0.375],[2.913,0],[34.917,4.523],[0,0],[0,0]],"v":[[-79.499,-57.917],[-79.46,-103.926],[-16.482,-175.094],[-7.708,-175.662],[1.011,-175.094],[61.366,-104.825],[60.805,-34.417]],"c":false}]},{"i":{"x":0.378,"y":1},"o":{"x":0.6,"y":0},"t":36,"s":[{"i":[[0,0],[0,0],[-40.979,3.366],[-3.348,0],[-3.325,-0.276],[0.287,-25.975],[0,0]],"o":[[0,0],[0.848,-26.517],[3.366,-0.276],[3.348,0],[40.126,3.337],[0,0],[0,0]],"v":[[-90.145,-42.983],[-89.974,-80.743],[-17.599,-133.248],[-7.515,-133.667],[2.504,-133.248],[71.864,-81.406],[72.02,-7.337]],"c":false}]},{"i":{"x":0.68,"y":1},"o":{"x":0.3,"y":0},"t":45,"s":[{"i":[[0,0],[0,0],[31.62,4.464],[2.583,0],[2.565,-0.367],[-0.221,-34.453],[0,0]],"o":[[0,0],[-0.654,-35.171],[-2.597,-0.367],[-2.583,0],[-30.962,4.426],[0,0],[0,0]],"v":[[182.722,-62.905],[182.928,-109.081],[127.084,-178.723],[119.303,-179.279],[111.573,-178.723],[58.054,-109.961],[57.052,-23.06]],"c":false}]},{"i":{"x":0.68,"y":1},"o":{"x":0.167,"y":0},"t":53,"s":[{"i":[[0,0],[0,0],[35.335,4.101],[2.887,0],[2.867,-0.337],[-0.247,-31.654],[0,0]],"o":[[0,0],[-0.731,-32.314],[-2.903,-0.337],[-2.887,0],[-34.599,4.066],[0,0],[0,0]],"v":[[200.896,-55.232],[200.909,-99.354],[138.503,-163.338],[129.809,-163.848],[121.17,-163.338],[61.363,-100.162],[61.463,-27.417]],"c":false}]},{"t":59,"s":[{"i":[[0,0],[0,0],[35.335,4.464],[2.887,0],[2.867,-0.367],[-0.247,-34.453],[0,0]],"o":[[0,0],[-0.731,-35.171],[-2.903,-0.367],[-2.887,0],[-34.599,4.426],[0,0],[0,0]],"v":[[200.896,-61.192],[200.909,-109.215],[138.503,-178.857],[129.809,-179.412],[121.17,-178.857],[61.363,-110.094],[61.463,-30.917]],"c":false}]}],"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"st","c":{"a":0,"k":[1,1,1,1],"ix":3},"o":{"a":0,"k":100,"ix":4},"w":{"a":0,"k":36,"ix":5},"lc":1,"lj":2,"bm":0,"nm":"Stroke 1","mn":"ADBE Vector Graphic - Stroke","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"Group 1","np":2,"cix":2,"bm":0,"ix":1,"mn":"ADBE Vector Group","hd":false}],"ip":0,"op":210,"st":30,"bm":0},{"ddd":0,"ind":3,"ty":4,"nm":"Shape Layer 1","parent":2,"sr":1,"ks":{"o":{"a":0,"k":100,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"a":0,"k":[200.763,-57.53,0],"ix":2},"a":{"a":0,"k":[209.042,-77.027,0],"ix":1},"s":{"a":0,"k":[100,100,100],"ix":6}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":1,"k":[{"i":{"x":0.3,"y":1},"o":{"x":0.32,"y":0},"t":30,"s":[{"i":[[0.119,6.275],[0.053,-1.249],[-9.251,0.092]],"o":[[-0.025,-1.318],[-0.348,8.251],[9.375,-0.093]],"v":[[-53.373,-78.093],[-89.044,-77.808],[-70.505,-65.931]],"c":true}]},{"i":{"x":0.378,"y":1},"o":{"x":0.6,"y":0},"t":36,"s":[{"i":[[-0.098,6.275],[0.096,-1.246],[-9.249,-0.228]],"o":[[0.021,-1.318],[-0.633,8.234],[9.372,0.231]],"v":[[-64.132,-63.268],[-99.791,-64.216],[-81.673,-51.705]],"c":true}]},{"i":{"x":0.68,"y":1},"o":{"x":0.3,"y":0},"t":45,"s":[{"i":[[0.548,6.454],[0.072,-1.3],[-9.252,-0.056]],"o":[[-0.116,-1.368],[-0.478,8.589],[9.375,0.057]],"v":[[208.891,-84.917],[173.081,-85.319],[191.43,-72.643]],"c":true}]},{"i":{"x":0.68,"y":1},"o":{"x":0.167,"y":0},"t":53,"s":[{"i":[[0.625,6.188],[0.057,-1.249],[-9.252,0.062]],"o":[[-0.133,-1.312],[-0.375,8.25],[9.375,-0.063]],"v":[[227.06,-75.697],[191.248,-75.634],[209.748,-63.697]],"c":true}]},{"t":59,"s":[{"i":[[0.625,6.188],[0.057,-1.249],[-9.252,0.062]],"o":[[-0.133,-1.312],[-0.375,8.25],[9.375,-0.063]],"v":[[226.938,-82.563],[191.251,-82.5],[209.625,-70.563]],"c":true}]}],"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"fl","c":{"a":0,"k":[1,1,1,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"bm":0,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"Shape 1","np":2,"cix":2,"bm":0,"ix":1,"mn":"ADBE Vector Group","hd":false}],"ip":0,"op":212,"st":32,"bm":0},{"ddd":0,"ind":4,"ty":4,"nm":"Body","sr":1,"ks":{"o":{"a":0,"k":100,"ix":11},"r":{"a":0,"k":0,"ix":10},"p":{"a":1,"k":[{"i":{"x":0.3,"y":1},"o":{"x":0.32,"y":0},"t":30,"s":[255.667,384.667,0],"to":[0,0,0],"ti":[0,0,0]},{"i":{"x":0.378,"y":1},"o":{"x":0.6,"y":0},"t":35,"s":[255.667,440.667,0],"to":[0,0,0],"ti":[0,0,0]},{"i":{"x":0.3,"y":1},"o":{"x":0.3,"y":0},"t":45,"s":[255.667,351.667,0],"to":[0,0,0],"ti":[0,0,0]},{"i":{"x":0.68,"y":1},"o":{"x":0.32,"y":0},"t":52,"s":[255.667,417.667,0],"to":[0,0,0],"ti":[0,0,0]},{"t":57,"s":[255.667,384.667,0]}],"ix":2},"a":{"a":0,"k":[-8.333,125.667,0],"ix":1},"s":{"a":1,"k":[{"i":{"x":[0.3,0.3,0.3],"y":[1,1,1]},"o":{"x":[0.32,0.32,0.32],"y":[0,0,0]},"t":30,"s":[100,100,100]},{"i":{"x":[0.378,0.378,0.378],"y":[1,1,1]},"o":{"x":[0.6,0.6,0.6],"y":[0,0,0]},"t":35,"s":[103,97,100]},{"i":{"x":[0.3,0.3,0.3],"y":[1,1,1]},"o":{"x":[0.3,0.3,0.3],"y":[0,0,0]},"t":45,"s":[98,102,100]},{"i":{"x":[0.7,0.7,0.7],"y":[1,1,1]},"o":{"x":[0.32,0.32,0.32],"y":[0,0,0]},"t":52,"s":[102,98,100]},{"i":{"x":[0.7,0.7,0.7],"y":[1,1,1]},"o":{"x":[0.3,0.3,0.3],"y":[0,0,0]},"t":57,"s":[99,101,100]},{"t":62,"s":[100,100,100]}],"ix":6}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ix":1,"ks":{"a":1,"k":[{"i":{"x":0.3,"y":1},"o":{"x":0.32,"y":0},"t":30,"s":[{"i":[[19.514,0],[0,19.514],[-19.514,0],[0,-19.514]],"o":[[-19.514,0],[0,-19.514],[19.514,0],[0,19.514]],"v":[[-9.333,97],[-44.667,61.667],[-9.333,26.333],[26,61.667]],"c":true}]},{"i":{"x":0.378,"y":1},"o":{"x":0.6,"y":0},"t":36,"s":[{"i":[[20.251,0],[0,18.263],[-20.251,0],[0,-18.263]],"o":[[-20.251,0],[0,-18.263],[20.251,0],[0,18.263]],"v":[[-9.333,102.512],[-46,69.443],[-9.333,36.375],[27.334,69.443]],"c":true}]},{"i":{"x":0.3,"y":1},"o":{"x":0.3,"y":0},"t":45,"s":[{"i":[[18.249,0],[0,20.817],[-18.249,0],[0,-20.817]],"o":[[-18.249,0],[0,-20.817],[18.249,0],[0,20.817]],"v":[[-9.333,94.171],[-42.376,56.478],[-9.333,18.785],[23.71,56.478]],"c":true}]},{"i":{"x":0.68,"y":1},"o":{"x":0.32,"y":0},"t":53,"s":[{"i":[[20.036,0],[0,18.836],[-20.036,0],[0,-18.836]],"o":[[-20.036,0],[0,-18.836],[20.036,0],[0,18.836]],"v":[[-9.333,99.987],[-45.611,65.88],[-9.333,31.774],[26.945,65.88]],"c":true}]},{"t":59,"s":[{"i":[[19.514,0],[0,19.514],[-19.514,0],[0,-19.514]],"o":[[-19.514,0],[0,-19.514],[19.514,0],[0,19.514]],"v":[[-9.333,97],[-44.667,61.667],[-9.333,26.333],[26,61.667]],"c":true}]}],"ix":2},"nm":"Path 1","mn":"ADBE Vector Shape - Group","hd":false},{"ind":1,"ty":"sh","ix":2,"ks":{"a":1,"k":[{"i":{"x":0.3,"y":1},"o":{"x":0.32,"y":0},"t":30,"s":[{"i":[[0,-14.231],[0,0],[-14.543,0],[0,0],[0,14.231],[0,0],[13.543,0],[0,0]],"o":[[0,0],[0,14.231],[0,0],[14.543,0],[0,0],[0,-14.231],[0,0],[-15.543,0]],"v":[[-148.333,-33.9],[-148.333,157.233],[-122,183],[103.333,183],[129.667,157.233],[129.667,-33.9],[104.333,-59.667],[-121,-59.667]],"c":true}]},{"i":{"x":0.378,"y":1},"o":{"x":0.6,"y":0},"t":36,"s":[{"i":[[2.562,-13.213],[-4.644,-40.358],[-15.092,0],[0,0],[-2.776,15.994],[4.861,33.014],[14.055,0],[0,0]],"o":[[-6.102,31.47],[1.682,14.62],[0,0],[15.092,0],[9.236,-53.225],[-1.944,-13.2],[0,0],[-16.13,0]],"v":[[-153.58,-19.998],[-153.58,158.884],[-126.252,183],[107.586,183],[134.913,158.884],[134.913,-19.998],[108.624,-44.113],[-125.215,-44.113]],"c":true}]},{"i":{"x":0.3,"y":1},"o":{"x":0.3,"y":0},"t":45,"s":[{"i":[[0,-15.181],[0,0],[-13.601,0],[0,0],[0,15.181],[0,0],[12.666,0],[0,0]],"o":[[0,0],[0,15.181],[0,0],[13.601,0],[0,0],[0,-15.181],[0,0],[-14.536,0]],"v":[[-139.323,-45.471],[-139.323,158.426],[-114.697,185.914],[96.03,185.914],[120.657,158.426],[120.657,-45.471],[96.966,-72.959],[-113.762,-72.959]],"c":true}]},{"i":{"x":0.68,"y":1},"o":{"x":0.32,"y":0},"t":53,"s":[{"i":[[0,-13.737],[0,0],[-14.932,0],[0,0],[0,13.737],[0,0],[13.905,0],[0,0]],"o":[[0,0],[0,13.737],[0,0],[14.932,0],[0,0],[0,-13.737],[0,0],[-15.959,0]],"v":[[-152.049,-26.367],[-152.049,158.128],[-125.012,183],[106.345,183],[133.382,158.128],[133.382,-26.367],[107.372,-51.239],[-123.985,-51.239]],"c":true}]},{"t":59,"s":[{"i":[[0,-14.231],[0,0],[-14.543,0],[0,0],[0,14.231],[0,0],[13.543,0],[0,0]],"o":[[0,0],[0,14.231],[0,0],[14.543,0],[0,0],[0,-14.231],[0,0],[-15.543,0]],"v":[[-148.333,-33.9],[-148.333,157.233],[-122,183],[103.333,183],[129.667,157.233],[129.667,-33.9],[104.333,-59.667],[-121,-59.667]],"c":true}]}],"ix":2},"nm":"Path 2","mn":"ADBE Vector Shape - Group","hd":false},{"ty":"fl","c":{"a":0,"k":[1,1,1,1],"ix":4},"o":{"a":0,"k":100,"ix":5},"r":1,"bm":0,"nm":"Fill 1","mn":"ADBE Vector Graphic - Fill","hd":false},{"ty":"tr","p":{"a":0,"k":[0,0],"ix":2},"a":{"a":0,"k":[0,0],"ix":1},"s":{"a":0,"k":[100,100],"ix":3},"r":{"a":0,"k":0,"ix":6},"o":{"a":0,"k":100,"ix":7},"sk":{"a":0,"k":0,"ix":4},"sa":{"a":0,"k":0,"ix":5},"nm":"Transform"}],"nm":"Group 1","np":3,"cix":2,"bm":0,"ix":1,"mn":"ADBE Vector Group","hd":false}],"ip":0,"op":210,"st":30,"bm":0}],"markers":[]} \ No newline at end of file diff --git a/TMessagesProj/src/main/res/raw/wallet_perfect.tgs b/TMessagesProj/src/main/res/raw/wallet_perfect.tgs new file mode 100644 index 000000000..b8b091757 --- /dev/null +++ b/TMessagesProj/src/main/res/raw/wallet_perfect.tgs @@ -0,0 +1 @@ +{"tgs":1,"v":"5.5.2","fr":60,"ip":0,"op":120,"w":480,"h":608,"nm":"thumbsup 👍480x608","ddd":0,"assets":[],"layers":[{"ddd":0,"ind":1,"ty":4,"parent":5,"sr":1,"ks":{"p":{"a":0,"k":[110.62,-0.262,0]},"a":{"a":0,"k":[110.62,-0.262,0]}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.124,6.478],[-17.411,11.949],[-8.876,-0.066],[-19.033,0.597]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-31.75,-6.828],[17.411,-11.949],[11.522,0.085],[0,0]],"v":[[107.555,-50.359],[187.305,-8.441],[157.291,41.858],[58.969,46.324],[44.288,-4.629],[98.315,-14.473],[143.72,-23.691]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":10,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.762,1.767],[-17.411,11.949],[-8.535,2.438],[-7.196,4.859]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-32.422,-1.862],[17.411,-11.949],[11.079,-3.165],[0,0]],"v":[[107.555,-50.359],[193.555,-8.441],[168.853,43.108],[86.798,55.885],[72.101,5.058],[120.587,-11.917],[143.252,-21.659]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[123.492,-39.109],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.167},"t":30,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[123.492,-39.109],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":35.398,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[123.492,-39.109],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":39.736,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[110.256,-39.831],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":43.205,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[107.948,-44.294],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":44.072,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[107.716,-45.923],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":44.941,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[107.744,-47.41],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.33,"y":0},"t":45.809,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.542,14.612],[-19.466,8.186],[-8.68,-1.857],[-15.165,1.89]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.463,-11.257],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[107.398,-48.953],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[136.064,-24.941]],"c":false}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":46.676,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[39.412,14.422],[-19.436,8.241],[-8.678,-1.794],[-15.047,1.934]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.492,-11.118],[19.436,-8.241],[11.265,2.328],[0,0]],"v":[[107.869,-50.224],[180.933,-9.057],[150.997,41.26],[44.188,39.478],[40.08,-13.721],[95.193,-17.952],[136.171,-24.892]],"c":false}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.762,1.767],[-17.411,11.949],[-8.535,2.438],[-7.196,4.859]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-32.422,-1.862],[17.411,-11.949],[11.079,-3.165],[0,0]],"v":[[107.555,-50.359],[193.555,-8.441],[168.853,43.108],[86.798,55.885],[72.101,5.058],[120.587,-11.917],[143.252,-21.659]],"c":false}]},{"t":62.3828125,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.124,6.478],[-17.411,11.949],[-8.876,-0.066],[-19.033,0.597]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-31.75,-6.828],[17.411,-11.949],[11.522,0.085],[0,0]],"v":[[107.555,-50.359],[187.305,-8.441],[157.291,41.858],[58.969,46.324],[44.288,-4.629],[98.315,-14.473],[143.72,-23.691]],"c":false}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[0.96862745285,0.494117647409,0.254901975393,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":8},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[-4.855,0.482],[19.901,4.272],[-17.412,11.952],[-8.88,-0.067],[-2.333,-23.454]],"o":[[-22.301,2.691],[-31.752,-6.828],[17.412,-11.952],[0,0],[2.484,24.97]],"v":[[126.828,48.021],[58.973,46.328],[44.286,-4.63],[101.986,-14.712],[89.178,22.458]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":10,"s":[{"i":[[-4.855,0.482],[20.321,1.167],[-17.412,11.952],[-8.539,2.438],[-2.333,-23.454]],"o":[[-20.616,5.648],[-32.425,-1.862],[17.412,-11.952],[0,0],[2.484,24.97]],"v":[[153.078,46.771],[86.804,55.887],[72.098,5.057],[124.042,-13.182],[111.366,24.645]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.167},"t":30,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":35.398,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":39.736,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":43.205,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":44.072,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":44.941,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.33,"y":0},"t":45.809,"s":[{"i":[[-4.855,0.482],[19.257,6.594],[-19.467,8.188],[-8.88,-0.067],[2.453,-23.442]],"o":[[-22.301,2.691],[-30.727,-10.521],[19.467,-8.188],[0,0],[-2.611,24.957]],"v":[[120.265,47.396],[43.552,39.236],[39.596,-14.004],[95.893,-17.681],[82.154,16.906]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":46.676,"s":[{"i":[[-4.855,0.482],[19.273,6.513],[-19.437,8.244],[-8.875,-0.03],[2.382,-23.442]],"o":[[-22.276,2.735],[-30.752,-10.393],[19.437,-8.244],[0,0],[-2.536,24.957]],"v":[[120.751,47.387],[44.193,39.482],[40.077,-13.722],[96.309,-17.615],[82.586,17.021]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[-4.855,0.482],[20.321,1.167],[-17.412,11.952],[-8.539,2.438],[-2.333,-23.454]],"o":[[-20.616,5.648],[-32.425,-1.862],[17.412,-11.952],[0,0],[2.484,24.97]],"v":[[153.078,46.771],[86.804,55.887],[72.098,5.057],[124.042,-13.182],[111.366,24.645]],"c":true}]},{"t":62.3828125,"s":[{"i":[[-4.855,0.482],[19.901,4.272],[-17.412,11.952],[-8.88,-0.067],[-2.333,-23.454]],"o":[[-22.301,2.691],[-31.752,-6.828],[17.412,-11.952],[0,0],[2.484,24.97]],"v":[[126.828,48.021],[58.973,46.328],[44.286,-4.63],[101.986,-14.712],[89.178,22.458]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.694117665291,0.223529413342,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.685],[14.273,2.254],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[-19.823,-3.128],[32.896,1.155],[4.081,25.698]],"v":[[168.725,37.472],[155.346,40.517],[168.169,-12.176],[128.207,-32.895],[108.059,-50.341],[187.305,-8.442]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":10,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.685],[14.273,2.254],[0,0],[-5.335,-29.988]],"o":[[0,0],[25.553,-8.411],[-8.386,-11.182],[-19.823,-3.128],[32.896,1.155],[5.157,28.986]],"v":[[176.85,40.597],[167.534,43.955],[174.419,-12.176],[128.207,-32.895],[108.059,-50.341],[193.555,-8.442]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[-10.511,9.915],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[8.942,-8.436],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[136.645,-24.458],[123.997,-39.091],[180.742,-9.067]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.167},"t":30,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[-10.511,9.915],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[8.942,-8.436],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[136.645,-24.458],[123.997,-39.091],[180.742,-9.067]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":35.398,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[-10.511,9.915],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[8.942,-8.436],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[136.645,-24.458],[123.997,-39.091],[180.742,-9.067]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":39.736,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.685],[6.913,4.529],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[-11.281,-4.704],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[130.713,-30.39],[110.761,-39.813],[180.742,-9.067]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":43.205,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[13.2,2.585],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[-18.578,-3.358],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[128.572,-32.53],[108.453,-44.276],[180.742,-9.067]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":44.072,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[13.834,2.389],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[-19.313,-3.222],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[128.356,-32.746],[108.22,-45.905],[180.742,-9.067]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":44.941,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[14.182,2.282],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[-19.717,-3.148],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[128.238,-32.864],[108.249,-47.391],[180.742,-9.067]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.33,"y":0},"t":45.809,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[14.273,2.254],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[-19.823,-3.128],[32.896,1.155],[4.081,25.698]],"v":[[162.162,36.847],[151.128,41.611],[161.607,-12.801],[128.207,-32.895],[107.903,-48.935],[180.742,-9.067]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":46.676,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.686],[14.273,2.254],[0,0],[-4.784,-30.08]],"o":[[0,0],[23.254,-7.475],[-8.386,-11.182],[-19.823,-3.128],[32.896,1.155],[4.097,25.746]],"v":[[162.38,36.903],[151.37,41.646],[161.797,-12.792],[128.207,-32.895],[108.374,-50.206],[180.932,-9.058]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.685],[14.273,2.254],[0,0],[-5.335,-29.988]],"o":[[0,0],[25.553,-8.411],[-8.386,-11.182],[-19.823,-3.128],[32.896,1.155],[5.157,28.986]],"v":[[176.85,40.597],[167.534,43.955],[174.419,-12.176],[128.207,-32.895],[108.059,-50.341],[193.555,-8.442]],"c":true}]},{"t":62.3828125,"s":[{"i":[[13.903,-7.366],[0,0],[9.514,12.685],[14.273,2.254],[0,0],[-4.776,-30.082]],"o":[[0,0],[23.22,-7.461],[-8.386,-11.182],[-19.823,-3.128],[32.896,1.155],[4.081,25.698]],"v":[[168.725,37.472],[155.346,40.517],[168.169,-12.176],[128.207,-32.895],[108.059,-50.341],[187.305,-8.442]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.874509811401,0.474509805441,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.124,6.478],[-17.411,11.949],[-8.876,-0.066],[-19.033,0.597]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-31.75,-6.828],[17.411,-11.949],[11.522,0.085],[0,0]],"v":[[107.555,-50.359],[187.305,-8.441],[157.291,41.858],[58.969,46.324],[44.288,-4.629],[98.315,-14.473],[143.72,-23.691]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":10,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.762,1.767],[-17.411,11.949],[-8.535,2.438],[-9.071,4.234]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-32.422,-1.862],[17.411,-11.949],[11.079,-3.165],[0,0]],"v":[[107.555,-50.359],[193.555,-8.441],[168.853,43.108],[86.798,55.885],[72.101,5.058],[120.587,-11.917],[143.252,-21.659]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-7.977,1.422]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[10.929,-5.766]],"v":[[123.492,-39.109],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.167},"t":30,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-7.977,1.422]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[10.929,-5.766]],"v":[[123.492,-39.109],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":35.398,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-7.977,1.422]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[10.929,-5.766]],"v":[[123.492,-39.109],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":39.736,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-15.75,0.842]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[3.245,-1.712]],"v":[[110.256,-39.831],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":43.205,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-18.555,0.633]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[0.473,-0.249]],"v":[[107.948,-44.294],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":44.072,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-18.837,0.612]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[0.194,-0.102]],"v":[[107.716,-45.923],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":44.941,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-18.992,0.6]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[0.04,-0.021]],"v":[[107.744,-47.41],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.33,"y":0},"t":45.809,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[37.025,14.513],[-19.466,8.186],[-8.68,-1.857],[-19.033,0.597]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.236,-11.852],[19.466,-8.186],[11.267,2.411],[0,0]],"v":[[107.398,-48.953],[180.743,-9.066],[150.728,41.233],[43.548,39.232],[39.598,-14.003],[94.812,-18.043],[137.158,-24.941]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.167,"y":0.192},"t":46.676,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[36.932,14.325],[-19.436,8.241],[-8.678,-1.794],[-18.885,0.651]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-30.268,-11.704],[19.436,-8.241],[11.265,2.328],[0,0]],"v":[[107.869,-50.224],[180.933,-9.057],[150.997,41.26],[44.188,39.478],[40.08,-13.721],[95.193,-17.952],[137.248,-24.892]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.762,1.767],[-17.411,11.949],[-8.535,2.438],[-9.071,4.234]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-32.422,-1.862],[17.411,-11.949],[11.079,-3.165],[0,0]],"v":[[107.555,-50.359],[193.555,-8.441],[168.853,43.108],[86.798,55.885],[72.101,5.058],[120.587,-11.917],[143.252,-21.659]],"c":true}]},{"t":62.3828125,"s":[{"i":[[0,0],[-4.8,-30.236],[22.645,-7.162],[30.124,6.478],[-17.411,11.949],[-8.876,-0.066],[-19.033,0.597]],"o":[[32.969,1.056],[5.235,32.973],[-24.922,7.882],[-31.75,-6.828],[17.411,-11.949],[11.522,0.085],[0,0]],"v":[[107.555,-50.359],[187.305,-8.441],[157.291,41.858],[58.969,46.324],[44.288,-4.629],[98.315,-14.473],[143.72,-23.691]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.819607853889,0.250980407,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false}],"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":2,"ty":4,"parent":5,"sr":1,"ks":{"p":{"a":0,"k":[109.483,63.81,0]},"a":{"a":0,"k":[109.483,63.81,0]}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":2,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[67.845,33.323],[25.512,61.686],[67.845,102.285],[130.662,104.675],[186.31,82.171],[167.533,24.475]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":12,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[88.157,41.135],[45.824,69.498],[88.157,110.097],[144.412,106.862],[195.685,81.234],[176.908,23.537]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":22,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[62.845,28.948],[20.512,57.311],[62.845,97.91],[126.599,104.987],[182.247,82.484],[163.47,24.787]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[62.845,28.948],[20.512,57.311],[62.845,97.91],[126.599,104.987],[182.247,82.484],[163.47,24.787]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54.146,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[88.157,41.135],[45.824,69.498],[88.157,110.097],[144.412,106.862],[195.685,81.234],[176.908,23.537]],"c":true}]},{"t":61.875,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[67.845,33.323],[25.512,61.686],[67.845,102.285],[130.662,104.675],[186.31,82.171],[167.533,24.475]],"c":true}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[0.96862745285,0.494117647409,0.254901975393,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":8},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":2,"s":[{"i":[[-0.011,-0.493],[36.528,-5.651],[6.532,-13.936],[-25.831,0.044],[9.923,2.938],[-2.186,24.296],[-15.024,-0.875],[-32.784,-10.584],[-3.072,-2.041]],"o":[[0,0],[-36.517,5.651],[-4.341,9.262],[-15.181,0.258],[-15.024,-4.44],[2.164,-24.274],[52.573,3.072],[4.227,1.368],[0.067,0.471]],"v":[[178.571,31.08],[138.982,59.827],[88.248,77.744],[110.616,105.953],[67.842,102.287],[25.517,61.688],[67.842,33.322],[167.538,24.476],[178.448,29.633]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":12,"s":[{"i":[[-0.011,-0.493],[36.528,-5.651],[6.532,-13.936],[-21.279,2.247],[9.923,2.938],[-2.186,24.296],[-15.024,-0.875],[-32.784,-10.584],[-3.072,-2.041]],"o":[[0,0],[-36.517,5.651],[-4.341,9.262],[-15.498,3.341],[-15.024,-4.44],[2.164,-24.274],[52.573,3.072],[4.227,1.368],[0.067,0.471]],"v":[[187.946,30.142],[152.732,62.015],[108.56,85.556],[132.647,108.297],[88.155,110.099],[45.83,69.501],[88.155,41.135],[176.913,23.538],[187.823,28.696]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":22,"s":[{"i":[[-0.011,-0.493],[36.528,-5.651],[6.532,-13.936],[-25.831,0.044],[9.923,2.938],[-2.186,24.296],[-15.024,-0.875],[-32.784,-10.584],[-3.072,-2.041]],"o":[[0,0],[-36.517,5.651],[-4.341,9.262],[-15.181,0.258],[-15.024,-4.44],[2.164,-24.274],[52.573,3.072],[4.227,1.368],[0.067,0.471]],"v":[[174.508,31.392],[134.919,60.14],[83.248,73.369],[104.053,104.078],[62.842,97.912],[20.517,57.313],[62.842,28.947],[163.476,24.788],[174.385,29.946]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[-0.011,-0.493],[36.528,-5.651],[6.532,-13.936],[-25.831,0.044],[9.923,2.938],[-2.186,24.296],[-15.024,-0.875],[-32.784,-10.584],[-3.072,-2.041]],"o":[[0,0],[-36.517,5.651],[-4.341,9.262],[-15.181,0.258],[-15.024,-4.44],[2.164,-24.274],[52.573,3.072],[4.227,1.368],[0.067,0.471]],"v":[[174.508,31.392],[134.919,60.14],[83.248,73.369],[104.053,104.078],[62.842,97.912],[20.517,57.313],[62.842,28.947],[163.476,24.788],[174.385,29.946]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54.146,"s":[{"i":[[-0.011,-0.493],[36.528,-5.651],[6.532,-13.936],[-21.279,2.247],[9.923,2.938],[-2.186,24.296],[-15.024,-0.875],[-32.784,-10.584],[-3.072,-2.041]],"o":[[0,0],[-36.517,5.651],[-4.341,9.262],[-15.498,3.341],[-15.024,-4.44],[2.164,-24.274],[52.573,3.072],[4.227,1.368],[0.067,0.471]],"v":[[187.946,30.142],[152.732,62.015],[108.56,85.556],[132.647,108.297],[88.155,110.099],[45.83,69.501],[88.155,41.135],[176.913,23.538],[187.823,28.696]],"c":true}]},{"t":61.875,"s":[{"i":[[-0.011,-0.493],[36.528,-5.651],[6.532,-13.936],[-25.831,0.044],[9.923,2.938],[-2.186,24.296],[-15.024,-0.875],[-32.784,-10.584],[-3.072,-2.041]],"o":[[0,0],[-36.517,5.651],[-4.341,9.262],[-15.181,0.258],[-15.024,-4.44],[2.164,-24.274],[52.573,3.072],[4.227,1.368],[0.067,0.471]],"v":[[178.571,31.08],[138.982,59.827],[88.248,77.744],[110.616,105.953],[67.842,102.287],[25.517,61.688],[67.842,33.322],[167.538,24.476],[178.448,29.633]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.694117665291,0.223529413342,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":2,"s":[{"i":[[10.248,-13.32],[6.357,-3.061],[0,0],[0,0],[-2.433,-0.785]],"o":[[-3.319,4.317],[26.476,-24.62],[0,0],[2.254,0.437],[32.772,10.584]],"v":[[186.307,82.172],[165.238,95.81],[167.527,40.52],[160.509,22.648],[167.538,24.476]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":12,"s":[{"i":[[10.248,-13.32],[6.357,-3.061],[0,0],[0,0],[-2.433,-0.785]],"o":[[-3.319,4.317],[26.476,-24.62],[0,0],[2.254,0.437],[32.772,10.584]],"v":[[195.682,81.235],[175.394,96.748],[176.902,39.583],[169.884,21.711],[176.913,23.538]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":22,"s":[{"i":[[10.248,-13.32],[6.357,-3.061],[0,0],[0,0],[-2.433,-0.785]],"o":[[-3.319,4.317],[26.476,-24.62],[0,0],[2.254,0.437],[32.772,10.584]],"v":[[182.245,82.485],[161.176,96.123],[163.465,40.833],[156.446,22.961],[163.476,24.788]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[10.248,-13.32],[6.357,-3.061],[0,0],[0,0],[-2.433,-0.785]],"o":[[-3.319,4.317],[26.476,-24.62],[0,0],[2.254,0.437],[32.772,10.584]],"v":[[182.245,82.485],[161.176,96.123],[163.465,40.833],[156.446,22.961],[163.476,24.788]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54.146,"s":[{"i":[[10.248,-13.32],[6.357,-3.061],[0,0],[0,0],[-2.433,-0.785]],"o":[[-3.319,4.317],[26.476,-24.62],[0,0],[2.254,0.437],[32.772,10.584]],"v":[[195.682,81.235],[175.394,96.748],[176.902,39.583],[169.884,21.711],[176.913,23.538]],"c":true}]},{"t":61.875,"s":[{"i":[[10.248,-13.32],[6.357,-3.061],[0,0],[0,0],[-2.433,-0.785]],"o":[[-3.319,4.317],[26.476,-24.62],[0,0],[2.254,0.437],[32.772,10.584]],"v":[[186.307,82.172],[165.238,95.81],[167.527,40.52],[160.509,22.648],[167.538,24.476]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.874509811401,0.474509805441,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":2,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[67.845,33.323],[25.512,61.686],[67.845,102.285],[130.662,104.675],[186.31,82.171],[167.533,24.475]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":12,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[88.157,41.135],[45.824,69.498],[88.157,110.097],[144.412,106.862],[195.685,81.234],[176.908,23.537]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":22,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[62.845,28.948],[20.512,57.311],[62.845,97.91],[126.599,104.987],[182.247,82.484],[163.47,24.787]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[62.845,28.948],[20.512,57.311],[62.845,97.91],[126.599,104.987],[182.247,82.484],[163.47,24.787]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54.146,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[88.157,41.135],[45.824,69.498],[88.157,110.097],[144.412,106.862],[195.685,81.234],[176.908,23.537]],"c":true}]},{"t":61.875,"s":[{"i":[[52.575,3.073],[2.174,-24.269],[-15.021,-4.438],[-15.021,2.048],[-10.242,13.314],[32.774,10.583]],"o":[[-15.031,-0.878],[-2.177,24.298],[15.021,4.438],[15.021,-2.048],[10.242,-13.314],[-32.774,-10.583]],"v":[[67.845,33.323],[25.512,61.686],[67.845,102.285],[130.662,104.675],[186.31,82.171],[167.533,24.475]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.819607853889,0.250980407,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false}],"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":3,"ty":4,"parent":5,"sr":1,"ks":{"p":{"a":0,"k":[100.972,119.826,0]},"a":{"a":0,"k":[100.972,119.826,0]}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":4,"s":[{"i":[[67.938,13.314],[2.641,-16.864],[-13.656,-10.754],[-10.56,-0.707],[-20.479,6.503],[-0.541,8.689],[19.118,1.366]],"o":[[-8.436,-1.653],[-2.219,14.168],[16.175,12.738],[14.722,0.986],[19.172,-6.088],[1.053,-16.899],[-27.992,-1.999]],"v":[[49.068,84.874],[23.805,103.992],[41.827,138.865],[86.252,154.889],[152.511,147.378],[178.429,117.164],[155.584,84.874]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":14,"s":[{"i":[[68.551,9.673],[1.738,-16.981],[-14.21,-10.01],[-10.582,-0.143],[-20.103,7.586],[-0.077,8.705],[19.164,0.344]],"o":[[-8.512,-1.201],[-1.46,14.266],[16.832,11.857],[14.754,0.2],[18.82,-7.102],[0.15,-16.931],[-28.059,-0.504]],"v":[[63.148,96.615],[38.94,117.053],[58.796,150.915],[104.012,164.548],[161.863,148.251],[186.132,116.699],[161.598,85.672]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":24,"s":[{"i":[[67.167,16.775],[3.501,-16.707],[-13.087,-11.439],[-10.51,-1.247],[-20.785,5.446],[-0.985,8.65],[19.023,2.343]],"o":[[-8.34,-2.083],[-2.941,14.036],[15.502,13.549],[14.652,1.739],[19.459,-5.099],[1.916,-16.823],[-27.853,-3.43]],"v":[[45.405,74.329],[19.196,92.129],[35.409,127.878],[78.955,146.156],[145.945,145.824],[173.375,116.977],[152.213,83.559]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[67.167,16.775],[3.501,-16.707],[-13.087,-11.439],[-10.51,-1.247],[-20.785,5.446],[-0.985,8.65],[19.023,2.343]],"o":[[-8.34,-2.083],[-2.941,14.036],[15.502,13.549],[14.652,1.739],[19.459,-5.099],[1.916,-16.823],[-27.853,-3.43]],"v":[[45.405,74.329],[19.196,92.129],[35.409,127.878],[78.955,146.156],[145.945,145.824],[173.375,116.977],[152.213,83.559]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[68.551,9.673],[1.738,-16.981],[-14.21,-10.01],[-10.582,-0.143],[-20.103,7.586],[-0.077,8.705],[19.164,0.344]],"o":[[-8.512,-1.201],[-1.46,14.266],[16.832,11.857],[14.754,0.2],[18.82,-7.102],[0.15,-16.931],[-28.059,-0.504]],"v":[[63.148,96.615],[38.94,117.053],[58.796,150.915],[104.012,164.548],[161.863,148.251],[186.132,116.699],[161.598,85.672]],"c":true}]},{"t":61.134765625,"s":[{"i":[[67.938,13.314],[2.641,-16.864],[-13.656,-10.754],[-10.56,-0.707],[-20.479,6.503],[-0.541,8.689],[19.118,1.366]],"o":[[-8.436,-1.653],[-2.219,14.168],[16.175,12.738],[14.722,0.986],[19.172,-6.088],[1.053,-16.899],[-27.992,-1.999]],"v":[[49.068,84.874],[23.805,103.992],[41.827,138.865],[86.252,154.889],[152.511,147.378],[178.429,117.164],[155.584,84.874]],"c":true}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[0.96862745285,0.494117647409,0.254901975393,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":8},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":4,"s":[{"i":[[-3.565,-3.397],[14.529,-5.28],[11.912,-12.427],[-14.191,-1.329],[1.312,0.09],[16.168,12.748],[-2.22,14.172],[-8.443,-1.659],[-27.996,-1.996]],"o":[[-1.267,5.853],[-29.788,10.826],[-10.361,10.809],[-1.547,-0.045],[-10.562,-0.706],[-13.656,-10.752],[2.646,-16.863],[67.933,13.309],[6.974,0.505]],"v":[[171.182,90.951],[150.9,112.759],[79.792,124.191],[90.535,155.084],[86.252,154.893],[41.83,138.86],[23.802,103.991],[49.073,84.874],[155.586,84.874]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":14,"s":[{"i":[[-3.742,-3.202],[14.226,-6.047],[11.233,-13.044],[-14.241,-0.571],[1.315,0.018],[16.833,11.856],[-1.465,14.27],[-8.52,-1.202],[-28.062,-0.53]],"o":[[-0.953,5.912],[-29.169,12.399],[-9.77,11.346],[-1.547,0.038],[-10.584,-0.142],[-14.21,-10.009],[1.743,-16.98],[68.546,9.667],[6.991,0.132]],"v":[[177.498,90.909],[158.407,113.767],[95.924,134.238],[108.299,164.514],[104.012,164.552],[58.799,150.91],[38.937,117.052],[63.153,96.616],[161.6,85.673]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":24,"s":[{"i":[[-3.387,-3.575],[14.78,-4.529],[12.533,-11.801],[-14.104,-2.054],[1.306,0.155],[15.503,13.548],[-2.946,14.039],[-8.348,-2.084],[-27.854,-3.456]],"o":[[-1.565,5.78],[-30.304,9.287],[-10.901,10.264],[-1.543,-0.124],[-10.512,-1.246],[-13.088,-11.437],[3.506,-16.705],[67.163,16.769],[6.939,0.861]],"v":[[167.48,90.427],[146.108,111.167],[74.075,115.167],[83.223,146.569],[78.955,146.16],[35.413,127.874],[19.193,92.128],[45.41,74.33],[152.215,83.56]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[-3.387,-3.575],[14.78,-4.529],[12.533,-11.801],[-14.104,-2.054],[1.306,0.155],[15.503,13.548],[-2.946,14.039],[-8.348,-2.084],[-27.854,-3.456]],"o":[[-1.565,5.78],[-30.304,9.287],[-10.901,10.264],[-1.543,-0.124],[-10.512,-1.246],[-13.088,-11.437],[3.506,-16.705],[67.163,16.769],[6.939,0.861]],"v":[[167.48,90.427],[146.108,111.167],[74.075,115.167],[83.223,146.569],[78.955,146.16],[35.413,127.874],[19.193,92.128],[45.41,74.33],[152.215,83.56]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[-3.742,-3.202],[14.226,-6.047],[11.233,-13.044],[-14.241,-0.571],[1.315,0.018],[16.833,11.856],[-1.465,14.27],[-8.52,-1.202],[-28.062,-0.53]],"o":[[-0.953,5.912],[-29.169,12.399],[-9.77,11.346],[-1.547,0.038],[-10.584,-0.142],[-14.21,-10.009],[1.743,-16.98],[68.546,9.667],[6.991,0.132]],"v":[[177.498,90.909],[158.407,113.767],[95.924,134.238],[108.299,164.514],[104.012,164.552],[58.799,150.91],[38.937,117.052],[63.153,96.616],[161.6,85.673]],"c":true}]},{"t":61.134765625,"s":[{"i":[[-3.565,-3.397],[14.529,-5.28],[11.912,-12.427],[-14.191,-1.329],[1.312,0.09],[16.168,12.748],[-2.22,14.172],[-8.443,-1.659],[-27.996,-1.996]],"o":[[-1.267,5.853],[-29.788,10.826],[-10.361,10.809],[-1.547,-0.045],[-10.562,-0.706],[-13.656,-10.752],[2.646,-16.863],[67.933,13.309],[6.974,0.505]],"v":[[171.182,90.951],[150.9,112.759],[79.792,124.191],[90.535,155.084],[86.252,154.893],[41.83,138.86],[23.802,103.991],[49.073,84.874],[155.586,84.874]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.694117665291,0.223529413342,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":4,"s":[{"i":[[1.031,-16.594],[14.665,-7.052],[0,0],[0,0]],"o":[[-0.482,7.747],[15.573,-26.942],[0,0],[18.264,1.783]],"v":[[178.425,117.165],[158.3,145.083],[158.3,93.216],[156.607,84.964]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":14,"s":[{"i":[[0.146,-16.625],[14.268,-7.824],[0,0],[0,0]],"o":[[-0.068,7.762],[14.114,-27.734],[0,0],[18.333,0.806]],"v":[[186.128,116.699],[167.52,145.651],[164.754,93.858],[162.624,85.708]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":24,"s":[{"i":[[1.881,-16.519],[15.007,-6.292],[0,0],[0,0]],"o":[[-0.878,7.713],[16.932,-26.11],[0,0],[18.149,2.715]],"v":[[173.371,116.977],[151.843,143.828],[154.498,92.029],[153.23,83.702]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[1.881,-16.519],[15.007,-6.292],[0,0],[0,0]],"o":[[-0.878,7.713],[16.932,-26.11],[0,0],[18.149,2.715]],"v":[[173.371,116.977],[151.843,143.828],[154.498,92.029],[153.23,83.702]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[0.146,-16.625],[14.268,-7.824],[0,0],[0,0]],"o":[[-0.068,7.762],[14.114,-27.734],[0,0],[18.333,0.806]],"v":[[186.128,116.699],[167.52,145.651],[164.754,93.858],[162.624,85.708]],"c":true}]},{"t":61.134765625,"s":[{"i":[[1.031,-16.594],[14.665,-7.052],[0,0],[0,0]],"o":[[-0.482,7.747],[15.573,-26.942],[0,0],[18.264,1.783]],"v":[[178.425,117.165],[158.3,145.083],[158.3,93.216],[156.607,84.964]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.874509811401,0.474509805441,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":4,"s":[{"i":[[67.938,13.314],[2.641,-16.864],[-13.656,-10.754],[-10.56,-0.707],[-20.479,6.503],[-0.541,8.689],[19.118,1.366]],"o":[[-8.436,-1.653],[-2.219,14.168],[16.175,12.738],[14.722,0.986],[19.172,-6.088],[1.053,-16.899],[-27.992,-1.999]],"v":[[49.068,84.874],[23.805,103.992],[41.827,138.865],[86.252,154.889],[152.511,147.378],[178.429,117.164],[155.584,84.874]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":14,"s":[{"i":[[68.551,9.673],[1.738,-16.981],[-14.21,-10.01],[-10.582,-0.143],[-20.103,7.586],[-0.077,8.705],[19.164,0.344]],"o":[[-8.512,-1.201],[-1.46,14.266],[16.832,11.857],[14.754,0.2],[18.82,-7.102],[0.15,-16.931],[-28.059,-0.504]],"v":[[63.148,96.615],[38.94,117.053],[58.796,150.915],[104.012,164.548],[161.863,148.251],[186.132,116.699],[161.598,85.672]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":24,"s":[{"i":[[67.167,16.775],[3.501,-16.707],[-13.087,-11.439],[-10.51,-1.247],[-20.785,5.446],[-0.985,8.65],[19.023,2.343]],"o":[[-8.34,-2.083],[-2.941,14.036],[15.502,13.549],[14.652,1.739],[19.459,-5.099],[1.916,-16.823],[-27.853,-3.43]],"v":[[45.405,74.329],[19.196,92.129],[35.409,127.878],[78.955,146.156],[145.945,145.824],[173.375,116.977],[152.213,83.559]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":46,"s":[{"i":[[67.167,16.775],[3.501,-16.707],[-13.087,-11.439],[-10.51,-1.247],[-20.785,5.446],[-0.985,8.65],[19.023,2.343]],"o":[[-8.34,-2.083],[-2.941,14.036],[15.502,13.549],[14.652,1.739],[19.459,-5.099],[1.916,-16.823],[-27.853,-3.43]],"v":[[45.405,74.329],[19.196,92.129],[35.409,127.878],[78.955,146.156],[145.945,145.824],[173.375,116.977],[152.213,83.559]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[68.551,9.673],[1.738,-16.981],[-14.21,-10.01],[-10.582,-0.143],[-20.103,7.586],[-0.077,8.705],[19.164,0.344]],"o":[[-8.512,-1.201],[-1.46,14.266],[16.832,11.857],[14.754,0.2],[18.82,-7.102],[0.15,-16.931],[-28.059,-0.504]],"v":[[63.148,96.615],[38.94,117.053],[58.796,150.915],[104.012,164.548],[161.863,148.251],[186.132,116.699],[161.598,85.672]],"c":true}]},{"t":61.134765625,"s":[{"i":[[67.938,13.314],[2.641,-16.864],[-13.656,-10.754],[-10.56,-0.707],[-20.479,6.503],[-0.541,8.689],[19.118,1.366]],"o":[[-8.436,-1.653],[-2.219,14.168],[16.175,12.738],[14.722,0.986],[19.172,-6.088],[1.053,-16.899],[-27.992,-1.999]],"v":[[49.068,84.874],[23.805,103.992],[41.827,138.865],[86.252,154.889],[152.511,147.378],[178.429,117.164],[155.584,84.874]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.819607853889,0.250980407,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false}],"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":4,"ty":4,"parent":5,"sr":1,"ks":{"p":{"a":0,"k":[90.532,164.194,0]},"a":{"a":0,"k":[90.532,164.194,0]}},"ao":0,"shapes":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":6,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[42.923,134.319],[25.337,151.446],[35.581,174.615],[66.999,191.998],[132.71,187.663],[152.511,139.839]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":16,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[58.86,140.257],[41.275,157.384],[51.518,180.552],[82.936,197.935],[140.523,188.288],[160.324,140.464]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":26,"s":[{"i":[[71.426,30.165],[2.302,-12.636],[-4.237,-4.886],[-12.975,-4.048],[-24.088,7.53],[6.041,11.587]],"o":[[-7.845,-3.313],[-1.631,8.952],[4.237,4.886],[12.975,4.048],[24.088,-7.53],[-6.041,-11.587]],"v":[[39.599,121.849],[20.389,137.132],[28.265,161.209],[57.789,181.645],[123.604,183.901],[148.087,138.296]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":44,"s":[{"i":[[71.426,30.165],[2.302,-12.636],[-4.237,-4.886],[-12.975,-4.048],[-24.088,7.53],[6.041,11.587]],"o":[[-7.845,-3.313],[-1.631,8.952],[4.237,4.886],[12.975,4.048],[24.088,-7.53],[-6.041,-11.587]],"v":[[39.599,121.849],[20.389,137.132],[28.265,161.209],[57.789,181.645],[123.604,183.901],[148.087,138.296]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[58.86,140.257],[41.275,157.384],[51.518,180.552],[82.936,197.935],[140.523,188.288],[160.324,140.464]],"c":true}]},{"t":61,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[42.923,134.319],[25.337,151.446],[35.581,174.615],[66.999,191.998],[132.71,187.663],[152.511,139.839]],"c":true}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[0.96862745285,0.494117647409,0.254901975393,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":8},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":6,"s":[{"i":[[-0.011,-0.045],[22.392,-4.874],[4.521,-13.076],[-9.923,-2.971],[5.785,1.188],[4.698,4.44],[-0.729,9.07],[-8.129,-2.512],[-7.176,-10.92],[-0.549,-2.489]],"o":[[0,0],[-25.599,5.572],[-3.456,9.996],[-9.99,-0.168],[-13.32,-2.725],[-4.709,-4.44],[1.031,-12.804],[74.088,22.872],[1.166,1.783],[0.011,0.045]],"v":[[155.138,146.45],[128.992,165.073],[75.556,176.151],[91.555,194.561],[67.001,191.993],[35.585,174.615],[25.338,151.451],[42.918,134.319],[152.514,139.835],[155.104,146.305]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":16,"s":[{"i":[[-0.011,-0.045],[22.392,-4.874],[4.521,-13.076],[-9.923,-2.971],[5.785,1.188],[4.698,4.44],[-0.729,9.07],[-8.129,-2.512],[-7.176,-10.92],[-0.549,-2.489]],"o":[[0,0],[-25.599,5.572],[-3.456,9.996],[-9.719,1.608],[-13.32,-2.725],[-4.709,-4.44],[1.031,-12.804],[74.088,22.872],[1.166,1.783],[0.011,0.045]],"v":[[162.951,147.075],[136.804,165.698],[91.493,182.088],[106.243,197.686],[82.939,197.931],[51.523,180.552],[41.275,157.388],[58.855,140.257],[160.327,140.46],[162.917,146.93]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":26,"s":[{"i":[[-0.007,-0.046],[22.767,-2.611],[5.806,-12.558],[-9.576,-3.948],[5.639,1.756],[4.236,4.882],[-1.634,8.952],[-7.838,-3.31],[-6.024,-11.596],[-0.298,-2.532]],"o":[[0,0],[-26.028,2.985],[-4.438,9.601],[-9.923,-1.166],[-12.981,-4.042],[-4.242,-4.888],[2.306,-12.637],[71.431,30.164],[0.982,1.89],[0.007,0.046]],"v":[[150.039,145.137],[122.163,161.053],[67.887,166.733],[81.966,186.65],[57.791,181.641],[28.27,161.209],[20.389,137.137],[39.594,121.848],[148.09,138.293],[150.02,144.988]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":44,"s":[{"i":[[-0.007,-0.046],[22.767,-2.611],[5.806,-12.558],[-9.576,-3.948],[5.639,1.756],[4.236,4.882],[-1.634,8.952],[-7.838,-3.31],[-6.024,-11.596],[-0.298,-2.532]],"o":[[0,0],[-26.028,2.985],[-4.438,9.601],[-9.923,-1.166],[-12.981,-4.042],[-4.242,-4.888],[2.306,-12.637],[71.431,30.164],[0.982,1.89],[0.007,0.046]],"v":[[150.039,145.137],[122.163,161.053],[67.887,166.733],[81.966,186.65],[57.791,181.641],[28.27,161.209],[20.389,137.137],[39.594,121.848],[148.09,138.293],[150.02,144.988]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[-0.011,-0.045],[22.392,-4.874],[4.521,-13.076],[-9.923,-2.971],[5.785,1.188],[4.698,4.44],[-0.729,9.07],[-8.129,-2.512],[-7.176,-10.92],[-0.549,-2.489]],"o":[[0,0],[-25.599,5.572],[-3.456,9.996],[-9.719,1.608],[-13.32,-2.725],[-4.709,-4.44],[1.031,-12.804],[74.088,22.872],[1.166,1.783],[0.011,0.045]],"v":[[162.951,147.075],[136.804,165.698],[91.493,182.088],[106.243,197.686],[82.939,197.931],[51.523,180.552],[41.275,157.388],[58.855,140.257],[160.327,140.46],[162.917,146.93]],"c":true}]},{"t":61,"s":[{"i":[[-0.011,-0.045],[22.392,-4.874],[4.521,-13.076],[-9.923,-2.971],[5.785,1.188],[4.698,4.44],[-0.729,9.07],[-8.129,-2.512],[-7.176,-10.92],[-0.549,-2.489]],"o":[[0,0],[-25.599,5.572],[-3.456,9.996],[-9.99,-0.168],[-13.32,-2.725],[-4.709,-4.44],[1.031,-12.804],[74.088,22.872],[1.166,1.783],[0.011,0.045]],"v":[[155.138,146.45],[128.992,165.073],[75.556,176.151],[91.555,194.561],[67.001,191.993],[35.585,174.615],[25.338,151.451],[42.918,134.319],[152.514,139.835],[155.104,146.305]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.694117665291,0.223529413342,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":6,"s":[{"i":[[23.209,-9.9],[5.101,-1.065],[0,0],[-0.28,1.973],[-1.57,-2.388]],"o":[[-4.776,2.041],[28.377,-16.952],[0.123,-2.31],[3.857,-0.426],[7.164,10.932]],"v":[[132.714,187.665],[117.802,192.262],[143.892,143.771],[144.498,137.357],[152.514,139.835]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":16,"s":[{"i":[[23.209,-9.9],[5.101,-1.065],[0,0],[-0.28,1.973],[-1.57,-2.388]],"o":[[-4.776,2.041],[28.377,-16.952],[0.123,-2.31],[3.857,-0.426],[7.164,10.932]],"v":[[140.527,188.29],[125.615,192.887],[151.705,144.396],[152.31,137.982],[160.327,140.46]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":26,"s":[{"i":[[24.078,-7.544],[5.182,-0.55],[0,0],[-0.476,1.935],[-1.32,-2.535]],"o":[[-4.956,1.553],[29.93,-14.031],[0.354,-2.286],[3.88,-0.038],[6.036,11.593]],"v":[[123.608,183.904],[108.311,186.987],[139.118,141.346],[140.361,135.026],[148.09,138.293]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":44,"s":[{"i":[[24.078,-7.544],[5.182,-0.55],[0,0],[-0.476,1.935],[-1.32,-2.535]],"o":[[-4.956,1.553],[29.93,-14.031],[0.354,-2.286],[3.88,-0.038],[6.036,11.593]],"v":[[123.608,183.904],[108.311,186.987],[139.118,141.346],[140.361,135.026],[148.09,138.293]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[23.209,-9.9],[5.101,-1.065],[0,0],[-0.28,1.973],[-1.57,-2.388]],"o":[[-4.776,2.041],[28.377,-16.952],[0.123,-2.31],[3.857,-0.426],[7.164,10.932]],"v":[[140.527,188.29],[125.615,192.887],[151.705,144.396],[152.31,137.982],[160.327,140.46]],"c":true}]},{"t":61,"s":[{"i":[[23.209,-9.9],[5.101,-1.065],[0,0],[-0.28,1.973],[-1.57,-2.388]],"o":[[-4.776,2.041],[28.377,-16.952],[0.123,-2.31],[3.857,-0.426],[7.164,10.932]],"v":[[132.714,187.665],[117.802,192.262],[143.892,143.771],[144.498,137.357],[152.514,139.835]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.874509811401,0.474509805441,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":6,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[42.923,134.319],[25.337,151.446],[35.581,174.615],[66.999,191.998],[132.71,187.663],[152.511,139.839]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":16,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[58.86,140.257],[41.275,157.384],[51.518,180.552],[82.936,197.935],[140.523,188.288],[160.324,140.464]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":26,"s":[{"i":[[71.426,30.165],[2.302,-12.636],[-4.237,-4.886],[-12.975,-4.048],[-24.088,7.53],[6.041,11.587]],"o":[[-7.845,-3.313],[-1.631,8.952],[4.237,4.886],[12.975,4.048],[24.088,-7.53],[-6.041,-11.587]],"v":[[39.599,121.849],[20.389,137.132],[28.265,161.209],[57.789,181.645],[123.604,183.901],[148.087,138.296]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":44,"s":[{"i":[[71.426,30.165],[2.302,-12.636],[-4.237,-4.886],[-12.975,-4.048],[-24.088,7.53],[6.041,11.587]],"o":[[-7.845,-3.313],[-1.631,8.952],[4.237,4.886],[12.975,4.048],[24.088,-7.53],[-6.041,-11.587]],"v":[[39.599,121.849],[20.389,137.132],[28.265,161.209],[57.789,181.645],[123.604,183.901],[148.087,138.296]],"c":true}]},{"i":{"x":0.38,"y":1},"o":{"x":0.33,"y":0},"t":54,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[58.86,140.257],[41.275,157.384],[51.518,180.552],[82.936,197.935],[140.523,188.288],[160.324,140.464]],"c":true}]},{"t":61,"s":[{"i":[[74.083,22.874],[1.028,-12.802],[-4.704,-4.438],[-13.314,-2.731],[-23.215,9.901],[7.169,10.925]],"o":[[-8.137,-2.512],[-0.728,9.07],[4.704,4.438],[13.314,2.731],[23.215,-9.901],[-7.169,-10.925]],"v":[[42.923,134.319],[25.337,151.446],[35.581,174.615],[66.999,191.998],[132.71,187.663],[152.511,139.839]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.819607853889,0.250980407,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false}],"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":5,"ty":4,"parent":9,"sr":1,"ks":{"r":{"a":1,"k":[{"i":{"x":[0.667],"y":[1]},"o":{"x":[0.333],"y":[0]},"t":0,"s":[0]},{"i":{"x":[0.667],"y":[1]},"o":{"x":[0.333],"y":[0]},"t":12,"s":[-20.177]},{"i":{"x":[0.34],"y":[1]},"o":{"x":[0.333],"y":[0]},"t":27,"s":[14.417]},{"i":{"x":[0.667],"y":[1]},"o":{"x":[0.67],"y":[0]},"t":47.266,"s":[-21.806]},{"i":{"x":[0],"y":[1]},"o":{"x":[0.333],"y":[0]},"t":58,"s":[3]},{"t":86,"s":[0]}]},"p":{"a":0,"k":[-72.859,72.455,0]},"a":{"a":0,"k":[-72.859,72.455,0]}},"ao":0,"shapes":[{"ty":"gr","it":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[0,0],[-2.582,-2.853]],"o":[[0,0],[0,0]],"v":[[23.016,-197.92],[27.525,-193.816]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[0,0],[-1.335,-3.243]],"o":[[0,0],[0,0]],"v":[[66.204,-170.767],[68.852,-165.849]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[0,0],[-0.608,-3.471]],"o":[[0,0],[0,0]],"v":[[91.403,-154.924],[92.965,-149.531]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[0,0],[0.714,-3.344]],"o":[[0,0],[0,0]],"v":[[114.19,-117.544],[113.613,-112.153]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[0,0],[1.61,-3.259]],"o":[[0,0],[0,0]],"v":[[129.635,-92.207],[127.609,-86.818]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[0,0],[1.74,-3.247]],"o":[[0,0],[0,0]],"v":[[131.869,-88.543],[129.633,-83.153]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[0,0],[1.862,-3.235]],"o":[[0,0],[0,0]],"v":[[133.977,-85.086],[131.542,-79.697]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[0,0],[1.973,-3.224]],"o":[[0,0],[0,0]],"v":[[135.898,-81.935],[133.283,-76.545]],"c":false}]},{"t":29,"s":[{"i":[[0,0],[2.067,-3.215]],"o":[[0,0],[0,0]],"v":[[137.514,-79.284],[134.748,-73.894]],"c":false}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[0,0],[2.12,-3.21]],"o":[[0,0],[0,0]],"v":[[138.435,-77.773],[135.582,-72.384]],"c":false}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[0,0],[2.12,-3.21]],"o":[[0,0],[0,0]],"v":[[138.435,-77.773],[135.582,-72.384]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[0,0],[2.067,-3.215]],"o":[[0,0],[0,0]],"v":[[137.514,-79.284],[134.748,-73.894]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[0,0],[1.973,-3.224]],"o":[[0,0],[0,0]],"v":[[135.898,-81.935],[133.283,-76.545]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[0,0],[1.862,-3.235]],"o":[[0,0],[0,0]],"v":[[133.977,-85.086],[131.542,-79.697]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[0,0],[1.74,-3.247]],"o":[[0,0],[0,0]],"v":[[131.869,-88.543],[129.633,-83.153]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[0,0],[1.61,-3.259]],"o":[[0,0],[0,0]],"v":[[129.635,-92.207],[127.609,-86.818]],"c":false}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[0,0],[-2.582,-2.853]],"o":[[0,0],[0,0]],"v":[[23.016,-197.92],[27.525,-193.816]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[0,0],[-3.212,-2.118]],"o":[[0,0],[0,0]],"v":[[-19.625,-195.276],[-14.233,-192.427]],"c":false}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[0,0],[-2.582,-2.853]],"o":[[0,0],[0,0]],"v":[[23.016,-197.92],[27.525,-193.816]],"c":false}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[0,0],[-1.897,-3.347]],"o":[[0,0],[0,0]],"v":[[47.981,-188.203],[51.486,-183.214]],"c":false}]},{"t":77,"s":[{"i":[[0,0],[-2.582,-2.853]],"o":[[0,0],[0,0]],"v":[[23.016,-197.92],[27.525,-193.816]],"c":false}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[1,1,1,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":6},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[0,0],[-0.975,-7.313]],"o":[[2.2,4.707],[0,0]],"v":[[34.895,-182.535],[39.982,-164.594]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[0,0],[1.737,-6.524]],"o":[[0.32,4.698],[0,0]],"v":[[71.535,-153.925],[69.737,-137.064]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[0,0],[3.319,-6.064]],"o":[[-0.777,4.693],[0,0]],"v":[[92.913,-137.231],[87.099,-121.001]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[0,0],[5.025,-4.465]],"o":[[-2.344,4.019],[0,0]],"v":[[109.181,-100.994],[98.378,-88.077]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[0,0],[6.181,-3.381]],"o":[[-3.407,3.563],[0,0]],"v":[[120.208,-76.432],[106.023,-65.761]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[0,0],[6.349,-3.224]],"o":[[-3.56,3.497],[0,0]],"v":[[121.802,-72.879],[107.129,-62.534]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[0,0],[6.506,-3.077]],"o":[[-3.705,3.434],[0,0]],"v":[[123.307,-69.528],[108.172,-59.489]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[0,0],[6.65,-2.942]],"o":[[-3.837,3.377],[0,0]],"v":[[124.678,-66.473],[109.123,-56.713]],"c":false}]},{"t":29,"s":[{"i":[[0,0],[6.771,-2.828]],"o":[[-3.948,3.33],[0,0]],"v":[[125.832,-63.903],[109.923,-54.378]],"c":false}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[0,0],[6.84,-2.764]],"o":[[-4.012,3.302],[0,0]],"v":[[126.489,-62.439],[110.379,-53.048]],"c":false}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[0,0],[6.84,-2.764]],"o":[[-4.012,3.302],[0,0]],"v":[[126.489,-62.439],[110.379,-53.048]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[0,0],[6.771,-2.828]],"o":[[-3.948,3.33],[0,0]],"v":[[125.832,-63.903],[109.923,-54.378]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[0,0],[6.65,-2.942]],"o":[[-3.837,3.377],[0,0]],"v":[[124.678,-66.473],[109.123,-56.713]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[0,0],[6.506,-3.077]],"o":[[-3.705,3.434],[0,0]],"v":[[123.307,-69.528],[108.172,-59.489]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[0,0],[6.349,-3.224]],"o":[[-3.56,3.497],[0,0]],"v":[[121.802,-72.879],[107.129,-62.534]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[0,0],[6.181,-3.381]],"o":[[-3.407,3.563],[0,0]],"v":[[120.208,-76.432],[106.023,-65.761]],"c":false}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[0,0],[-0.975,-7.313]],"o":[[2.2,4.707],[0,0]],"v":[[34.895,-182.535],[39.982,-164.594]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[0,0],[-2.769,-6.838]],"o":[[3.305,4.009],[0,0]],"v":[[-4.282,-183.342],[5.122,-167.238]],"c":false}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[0,0],[-0.975,-7.313]],"o":[[2.2,4.707],[0,0]],"v":[[34.895,-182.535],[39.982,-164.594]],"c":false}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[0,0],[0.645,-7.349]],"o":[[1.12,5.074],[0,0]],"v":[[56.215,-170.596],[57.262,-151.977]],"c":false}]},{"t":77,"s":[{"i":[[0,0],[-0.975,-7.313]],"o":[[2.2,4.707],[0,0]],"v":[[34.895,-182.535],[39.982,-164.594]],"c":false}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[1,1,1,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":6},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"t":29,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]},{"t":77,"s":[{"i":[[-23.929,48.852],[2.114,1.922],[24.913,-24.689],[0.384,-3.939]],"o":[[6.888,-14.061],[-3.077,-2.797],[-21.335,21.142],[-0.384,3.939]],"v":[[3.194,15.822],[7.903,-12.977],[-29.769,36.228],[-60.524,64.578]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.874509811401,0.474509805441,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-65.927],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[0,0],[-35.335,53.002],[-13.167,17.34],[-24.248,3.318],[11.988,-36.978],[4.187,-15.792],[-32.894,-2.889],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[21.17,-27.879],[27.32,-3.739],[-12.152,37.484],[-4.187,15.792],[40.158,3.527],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[9.789,-127.255],[49.868,-192.704],[83.798,-126.069],[42.366,-55.178],[112.216,-49.497],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[0,0],[-35.335,53.002],[-17.602,14.342],[-25.679,-1.767],[21.013,-33.924],[8.581,-11.56],[-32.915,-2.617],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[28.3,-23.058],[28.933,1.991],[-21.301,34.389],[-8.581,11.56],[40.184,3.194],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[23.233,-123.789],[79.793,-179.918],[98.949,-107.254],[44.391,-48.906],[112.11,-47.457],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[0,0],[-35.335,53.002],[-20.294,7.491],[-21.512,-9.687],[30.22,-24.21],[15.673,-6.821],[-59.212,-6.062],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[32.627,-12.044],[24.238,10.914],[-30.634,24.542],[-15.673,6.821],[40.103,4.105],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[44.302,-110.709],[113.095,-143.907],[103.692,-71.858],[47.694,-34.844],[115.111,-39.332],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[0,0],[-35.335,53.002],[-22.118,2.847],[-18.688,-15.055],[36.461,-17.625],[20.48,-3.608],[-22.589,6.762],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.56,-4.578],[21.056,16.963],[-36.961,17.867],[-20.48,3.608],[40.311,0.76],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[58.584,-101.844],[135.667,-119.498],[106.907,-47.866],[49.932,-25.312],[105.988,-39.812],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[0,0],[-35.335,53.002],[-22.382,2.176],[-18.279,-15.832],[37.364,-16.673],[21.358,-1.449],[-31.587,12.499],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.985,-3.498],[20.595,17.838],[-37.876,16.901],[-8.42,0.571],[35.601,-2.189],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[60.649,-100.562],[138.932,-115.968],[107.372,-44.396],[50.256,-23.934],[108.58,-39.299],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[0,0],[-35.335,53.002],[-22.631,1.542],[-17.894,-16.564],[38.215,-15.775],[21.955,-1.554],[-29.58,10.587],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.385,-2.48],[20.161,18.663],[-38.739,15.991],[-13.169,0.959],[37.099,-0.815],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[62.598,-99.352],[142.011,-112.638],[107.811,-41.123],[50.562,-22.633],[113.135,-39.262],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[0,0],[-35.335,53.002],[-22.858,0.965],[-17.543,-17.232],[38.991,-14.956],[22.499,-1.651],[-34.396,15.776],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.75,-1.551],[19.765,19.415],[-39.526,15.161],[-17.5,1.312],[38.465,0.438],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[64.374,-98.249],[144.819,-109.602],[108.21,-38.139],[50.84,-21.448],[114.827,-39.608],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"t":29,"s":[{"i":[[0,0],[-35.335,53.002],[-23.049,0.479],[-17.247,-17.793],[39.644,-14.267],[22.957,-1.732],[-31.785,18.865],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.057,-0.77],[19.432,20.048],[-40.188,14.462],[-21.143,1.609],[39.614,1.491],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[65.868,-97.322],[147.181,-107.048],[108.547,-35.628],[51.074,-20.45],[118.778,-38.79],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[0,0],[-35.335,53.002],[-23.157,0.202],[-17.079,-18.113],[40.016,-13.874],[23.218,-1.778],[-32.985,-1.713],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.231,-0.325],[19.243,20.408],[-40.565,14.064],[-23.218,1.778],[40.269,2.092],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[66.72,-96.793],[148.526,-105.593],[108.739,-34.198],[51.208,-19.882],[113.79,-17.766],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[0,0],[-35.335,53.002],[-23.157,0.202],[-17.079,-18.113],[40.016,-13.874],[23.218,-1.778],[-32.985,-1.713],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.231,-0.325],[19.243,20.408],[-40.565,14.064],[-23.218,1.778],[40.269,2.092],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[66.72,-96.793],[148.526,-105.593],[108.739,-34.198],[51.208,-19.882],[113.79,-17.766],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[0,0],[-35.335,53.002],[-23.049,0.479],[-17.247,-17.793],[39.644,-14.267],[22.957,-1.732],[-31.785,18.865],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.057,-0.77],[19.432,20.048],[-40.188,14.462],[-21.143,1.609],[39.614,1.491],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[65.868,-97.322],[147.181,-107.048],[108.547,-35.628],[51.074,-20.45],[118.778,-38.79],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[0,0],[-35.335,53.002],[-22.858,0.965],[-17.543,-17.232],[38.991,-14.956],[22.499,-1.651],[-34.396,15.776],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.75,-1.551],[19.765,19.415],[-39.526,15.161],[-17.5,1.312],[38.465,0.438],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[64.374,-98.249],[144.819,-109.602],[108.21,-38.139],[50.84,-21.448],[114.827,-39.608],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[0,0],[-35.335,53.002],[-22.631,1.542],[-17.894,-16.564],[38.215,-15.775],[21.955,-1.554],[-29.58,10.587],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.385,-2.48],[20.161,18.663],[-38.739,15.991],[-13.169,0.959],[37.099,-0.815],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[62.598,-99.352],[142.011,-112.638],[107.811,-41.123],[50.562,-22.633],[113.135,-39.262],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[0,0],[-35.335,53.002],[-22.382,2.176],[-18.279,-15.832],[37.364,-16.673],[21.358,-1.449],[-31.587,12.499],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.985,-3.498],[20.595,17.838],[-37.876,16.901],[-8.42,0.571],[35.601,-2.189],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[60.649,-100.562],[138.932,-115.968],[107.372,-44.396],[50.256,-23.934],[108.58,-39.299],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[0,0],[-35.335,53.002],[-22.118,2.847],[-18.688,-15.055],[36.461,-17.625],[20.48,-3.608],[-22.589,6.762],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.56,-4.578],[21.056,16.963],[-36.961,17.867],[-20.48,3.608],[40.311,0.76],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[58.584,-101.844],[135.667,-119.498],[106.907,-47.866],[49.932,-25.312],[105.988,-39.812],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-65.927],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[0,0],[-35.335,53.002],[2.736,22.996],[-16.135,18.959],[-18.171,-38.257],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[-4.398,-36.972],[18.179,-21.361],[18.42,38.781],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-36.76,-126.359],[-54.462,-206.711],[20.859,-174.977],[31.082,-66.239],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-65.927],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[0,0],[-35.335,53.002],[-9.606,21.072],[-23.635,7.821],[4.343,-42.13],[0.95,-23.267],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[15.444,-33.878],[26.63,-8.811],[-4.402,42.707],[-0.95,23.267],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[2.674,-129.298],[29.282,-207.156],[77.165,-140.917],[46.09,-65.463],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]},{"t":77,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-65.927],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":false}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[0.96862745285,0.494117647409,0.254901975393,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":8},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[31.58,26.707],[6.919,-11.821],[-10.667,-27.389],[-6.524,18.112]],"o":[[0,0],[-6.919,11.821],[10.667,27.389],[6.524,-18.112]],"v":[[32.756,-210.226],[11.843,-208.256],[16.168,-139.063],[54.043,-121.815]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[19.272,32.711],[10.573,-7.852],[0.091,-26.66],[-12.473,13.325]],"o":[[0,0],[-10.573,7.853],[-0.091,26.66],[12.473,-13.325]],"v":[[79.531,-178.137],[59.705,-183.094],[38.793,-122.915],[67.218,-96.249]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[12.09,36.214],[12.705,-5.537],[6.368,-26.234],[-15.944,10.532]],"o":[[0,0],[-12.705,5.537],[-6.368,26.234],[15.944,-10.532]],"v":[[106.823,-159.414],[87.632,-168.412],[51.994,-113.493],[74.906,-81.331]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[-2.493,36.707],[12.931,-1.033],[14.85,-21.832],[-17.505,4.551]],"o":[[0,0],[-12.931,1.033],[-14.85,21.832],[17.505,-4.551]],"v":[[129.088,-116.771],[115.749,-130.986],[65.43,-92.304],[73.713,-55.872]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[-12.378,37.042],[13.084,2.021],[20.598,-18.847],[-18.563,0.497]],"o":[[0,0],[-13.084,-2.021],[-20.598,18.847],[18.563,-0.497]],"v":[[144.18,-87.867],[134.808,-105.618],[74.536,-77.942],[72.905,-38.615]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[-13.807,37.09],[13.106,2.462],[21.43,-18.416],[-18.716,-0.089]],"o":[[0,0],[-13.106,-2.462],[-21.43,18.416],[18.716,0.089]],"v":[[146.363,-83.687],[137.564,-101.949],[75.854,-75.864],[72.788,-36.119]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[-15.156,37.136],[13.126,2.879],[22.214,-18.009],[-18.861,-0.642]],"o":[[0,0],[-13.126,-2.879],[-22.214,18.009],[18.861,0.642]],"v":[[148.422,-79.743],[140.165,-98.488],[77.096,-73.905],[72.678,-33.765]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[-16.385,37.178],[13.145,3.259],[22.929,-17.637],[-18.992,-1.146]],"o":[[0,0],[-13.145,-3.259],[-22.929,17.637],[18.992,1.146]],"v":[[150.299,-76.148],[142.535,-95.333],[78.229,-72.118],[72.577,-31.619]],"c":true}]},{"t":29,"s":[{"i":[[-17.42,37.213],[13.161,3.578],[23.53,-17.325],[-19.103,-1.571]],"o":[[0,0],[-13.161,-3.578],[-23.53,17.325],[19.103,1.571]],"v":[[151.878,-73.124],[144.529,-92.678],[79.182,-70.616],[72.493,-29.813]],"c":true}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[-18.009,37.233],[13.171,3.76],[23.873,-17.147],[-19.166,-1.812]],"o":[[0,0],[-13.171,-3.76],[-23.873,17.147],[19.166,1.812]],"v":[[152.778,-71.401],[145.666,-91.166],[79.725,-69.759],[72.445,-28.784]],"c":true}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[-18.009,37.233],[13.171,3.76],[23.873,-17.147],[-19.166,-1.812]],"o":[[0,0],[-13.171,-3.76],[-23.873,17.147],[19.166,1.812]],"v":[[152.778,-71.401],[145.666,-91.166],[79.725,-69.759],[72.445,-28.784]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[-17.42,37.213],[13.161,3.578],[23.53,-17.325],[-19.103,-1.571]],"o":[[0,0],[-13.161,-3.578],[-23.53,17.325],[19.103,1.571]],"v":[[151.878,-73.124],[144.529,-92.678],[79.182,-70.616],[72.493,-29.813]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[-16.385,37.178],[13.145,3.259],[22.929,-17.637],[-18.992,-1.146]],"o":[[0,0],[-13.145,-3.259],[-22.929,17.637],[18.992,1.146]],"v":[[150.299,-76.148],[142.535,-95.333],[78.229,-72.118],[72.577,-31.619]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[-15.156,37.136],[13.126,2.879],[22.214,-18.009],[-18.861,-0.642]],"o":[[0,0],[-13.126,-2.879],[-22.214,18.009],[18.861,0.642]],"v":[[148.422,-79.743],[140.165,-98.488],[77.096,-73.905],[72.678,-33.765]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[-13.807,37.09],[13.106,2.462],[21.43,-18.416],[-18.716,-0.089]],"o":[[0,0],[-13.106,-2.462],[-21.43,18.416],[18.716,0.089]],"v":[[146.363,-83.687],[137.564,-101.949],[75.854,-75.864],[72.788,-36.119]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[-12.378,37.042],[13.084,2.021],[20.598,-18.847],[-18.563,0.497]],"o":[[0,0],[-13.084,-2.021],[-20.598,18.847],[18.563,-0.497]],"v":[[144.18,-87.867],[134.808,-105.618],[74.536,-77.942],[72.905,-38.615]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[31.58,26.707],[6.919,-11.821],[-10.667,-27.389],[-6.524,18.112]],"o":[[0,0],[-6.919,11.821],[10.667,27.389],[6.524,-18.112]],"v":[[32.756,-210.226],[11.843,-208.256],[16.168,-139.063],[54.043,-121.815]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[38.98,13.825],[2.296,-13.503],[-19.657,-21.853],[0.297,19.249]],"o":[[0,0],[-2.296,13.503],[19.657,21.853],[-0.297,-19.249]],"v":[[-20.94,-214.679],[-39.808,-205.447],[-11.312,-142.245],[30.213,-139.494]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[31.58,26.707],[6.919,-11.821],[-10.667,-27.389],[-6.524,18.112]],"o":[[0,0],[-6.919,11.821],[10.667,27.389],[6.524,-18.112]],"v":[[32.756,-210.226],[11.843,-208.256],[16.168,-139.063],[54.043,-121.815]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[26.129,32.06],[8.975,-10.346],[-5.448,-28.884],[-9.744,16.603]],"o":[[0,0],[-8.975,10.346],[5.448,28.884],[9.744,-16.603]],"v":[[62.066,-196.554],[41.147,-198.464],[32.672,-129.656],[66.728,-105.737]],"c":true}]},{"t":77,"s":[{"i":[[31.58,26.707],[6.919,-11.821],[-10.667,-27.389],[-6.524,18.112]],"o":[[0,0],[-6.919,11.821],[10.667,27.389],[6.524,-18.112]],"v":[[32.756,-210.226],[11.843,-208.256],[16.168,-139.063],[54.043,-121.815]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.874509811401,0.474509805441,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-5.561,22.48],[-21.796,12.03],[-5.566,0.135],[-3.46,-39.788],[19.369,-26.14],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-20.776,34.618],[-22.051,20.742],[-0.673,-4.675],[-9.283,-1.816],[0,0],[-12.4,-8.936],[-22.869,-2.02],[-11.526,-16.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[8.947,-36.136],[2.825,-1.558],[0,0],[2.572,29.578],[-15.654,21.126],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[17.298,-28.823],[-1.233,4.989],[1.301,8.936],[0,0.011],[0,0],[12.389,8.947],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[-13.254,-133.197],[-1.414,-214.618],[14.726,-218.092],[-1.098,-178.304],[-12.088,-106.356],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[27.892,-4.912],[42.169,-90.511],[38.893,-67.047],[55.879,-52.225],[55.879,-52.214],[57.976,-33.579],[138.791,-26.718],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-13.163,17.34],[-24.246,3.31],[-5.136,-1.65],[11.136,-34.912],[27.973,-19.523],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-20.157,34.964],[-17.512,17.753],[0.854,-3.204],[-9.283,-1.816],[0,0],[-9.377,-6.445],[-22.869,-2.02],[-9.466,-18.224]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[21.165,-27.875],[3.143,-0.429],[0,0],[-8.278,25.953],[-29.257,15.467],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[13.236,-22.277],[0.408,1.592],[-1.621,6.124],[0,0.011],[0,0],[13.19,6.83],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[9.789,-127.258],[49.872,-192.704],[65.876,-190.539],[37.111,-161.741],[2.02,-102.439],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[27.004,-3.668],[40.162,-70.024],[42.673,-55.901],[54.258,-47.489],[55.279,-47.389],[57.976,-38.923],[138.518,-27.147],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-17.599,14.34],[-25.676,-1.777],[-4.886,-2.691],[19.652,-32.068],[32.993,-15.662],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-19.795,35.166],[-14.864,16.009],[1.744,-2.346],[-9.284,-1.816],[0,0],[-7.613,-4.992],[-22.869,-2.02],[-8.264,-19.385]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[28.294,-23.055],[3.328,0.23],[0,0],[-14.609,23.839],[-37.194,12.165],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[10.866,-18.458],[1.366,-0.39],[-3.326,4.484],[0,0.011],[0,0],[13.658,5.595],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[23.233,-123.793],[79.797,-179.917],[95.72,-174.462],[59.405,-152.076],[10.252,-100.154],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[26.486,-2.943],[38.991,-58.07],[44.879,-49.397],[53.312,-44.254],[54.93,-44.102],[57.976,-40.282],[137.526,-26.421],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-20.292,7.491],[-21.507,-9.696],[-3.253,-3.983],[28.384,-22.951],[32.748,-7.238],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-19.214,35.491],[-10.61,13.207],[3.181,-1.385],[-9.303,1.11],[0,0],[-5.197,0.638],[-29.774,-3.569],[-6.333,-21.25]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[32.622,-12.043],[2.788,1.257],[0,0],[-21.101,17.062],[-40.578,5.135],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[7.058,-12.323],[2.905,-3.574],[-6.077,2.647],[0.001,0.001],[0,0],[14.328,-1.63],[8.311,0.996],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[44.304,-110.713],[113.098,-143.905],[124.882,-133.938],[85.583,-125.027],[26.217,-92.321],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[25.653,-1.777],[37.109,-38.865],[48.487,-35.061],[51.96,-33.452],[54.512,-33.794],[59.935,-34.376],[137.236,-27.841],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.117,2.848],[-18.681,-15.064],[-2.146,-4.858],[34.303,-16.772],[32.583,-1.529],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.821,35.711],[-7.726,11.308],[4.155,-0.734],[-9.403,0.34],[0,0],[-3.011,0.327],[-25.375,-2.582],[-5.024,-22.515]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[35.555,-4.579],[2.421,1.952],[0,0],[-25.501,12.468],[-42.872,0.371],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[4.477,-8.164],[3.948,-5.732],[-7.941,1.402],[0.002,0.007],[0,0],[37.323,-15.998],[8.329,0.831],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[58.586,-101.847],[135.669,-119.495],[144.649,-106.47],[103.327,-106.693],[37.038,-87.012],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[25.089,-0.987],[35.834,-25.848],[50.932,-25.344],[50.91,-24.635],[54.124,-24.973],[58.732,-25.646],[141.81,-34.889],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.381,2.176],[-18.272,-15.84],[-1.986,-4.985],[35.159,-15.878],[32.559,-0.703],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.764,35.742],[-7.309,11.033],[4.296,-0.639],[-9.417,0.229],[0,0],[-2.695,0.282],[-31.729,-11.516],[-4.835,-22.697]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[35.979,-3.499],[2.368,2.053],[0,0],[-26.137,11.804],[-43.204,-0.319],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[4.104,-7.562],[4.098,-6.044],[-8.211,1.222],[0.002,0.008],[0,0],[14.984,-1.382],[7.868,2.856],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[60.652,-100.564],[138.934,-115.965],[147.508,-102.497],[105.893,-104.041],[38.603,-86.244],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[25.007,-0.873],[35.65,-23.965],[51.286,-23.938],[50.758,-23.36],[54.068,-23.697],[58.558,-24.383],[142.472,-35.909],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.63,1.543],[-17.886,-16.572],[-1.835,-5.104],[35.967,-15.035],[32.536,0.076],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.71,35.772],[-6.916,10.774],[4.429,-0.55],[-9.43,0.124],[0,0],[-2.397,0.239],[-24.139,-2.305],[-4.656,-22.87]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[36.379,-2.481],[2.318,2.148],[0,0],[-26.738,11.177],[-43.517,-0.969],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.752,-6.995],[4.241,-6.339],[-8.465,1.052],[0.002,0.009],[0,0],[15.062,-1.353],[8.334,0.784],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[62.6,-99.355],[142.014,-112.635],[150.204,-98.749],[108.314,-101.539],[40.08,-85.52],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.93,-0.765],[35.476,-22.189],[51.62,-22.613],[50.614,-22.157],[54.016,-22.493],[58.394,-23.192],[139.033,-37.652],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.857,0.966],[-17.535,-17.24],[-1.698,-5.213],[36.703,-14.267],[32.515,0.786],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.661,35.8],[-6.557,10.538],[4.55,-0.469],[-9.443,0.028],[0,0],[-2.125,0.201],[-23.592,-2.182],[-4.493,-23.027]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[36.744,-1.552],[2.273,2.234],[0,0],[-27.285,10.606],[-43.802,-1.561],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.431,-6.478],[4.37,-6.607],[-8.697,0.897],[0.002,0.01],[0,0],[15.133,-1.326],[8.336,0.764],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[64.377,-98.252],[144.821,-109.599],[152.663,-95.333],[110.521,-99.259],[41.426,-84.859],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.86,-0.667],[35.317,-20.57],[51.924,-21.404],[50.484,-21.061],[53.967,-21.396],[58.244,-22.106],[142.237,-39.477],[171.115,1.278]],"c":true}]},{"t":29,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-23.048,0.48],[-17.239,-17.801],[-1.582,-5.305],[37.323,-13.62],[32.498,1.383],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.62,35.823],[-6.255,10.339],[4.652,-0.401],[-9.453,-0.052],[0,0],[-1.896,0.168],[-23.131,-2.079],[-4.356,-23.159]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[37.051,-0.771],[2.234,2.307],[0,0],[-27.745,10.125],[-44.042,-2.06],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.161,-6.042],[4.48,-6.833],[-8.892,0.767],[0.002,0.011],[0,0],[15.193,-1.303],[8.337,0.746],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[65.871,-97.324],[147.183,-107.044],[154.731,-92.459],[112.378,-97.34],[42.558,-84.304],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.801,-0.584],[35.184,-19.208],[52.18,-20.387],[50.374,-20.138],[53.927,-20.473],[58.119,-21.193],[144.703,-35.937],[171.115,1.278]],"c":true}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-23.157,0.203],[-17.071,-18.121],[-1.516,-5.357],[37.675,-13.252],[32.488,1.724],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.596,35.836],[-6.083,10.226],[4.71,-0.362],[-9.459,-0.098],[0,0],[-1.766,0.15],[-22.869,-2.02],[-4.278,-23.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[37.226,-0.326],[2.213,2.349],[0,0],[-28.007,9.851],[-44.179,-2.344],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.007,-5.794],[4.542,-6.962],[-9.003,0.693],[0.002,0.011],[0,0],[15.228,-1.29],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[66.722,-96.796],[148.528,-105.589],[155.91,-90.821],[113.435,-96.247],[43.203,-83.987],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.767,-0.537],[35.108,-18.432],[52.325,-19.808],[50.311,-19.612],[53.904,-19.947],[58.047,-20.672],[144.416,-38.905],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-23.157,0.203],[-17.071,-18.121],[-1.516,-5.357],[37.675,-13.252],[32.488,1.724],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.596,35.836],[-6.083,10.226],[4.71,-0.362],[-9.459,-0.098],[0,0],[-1.766,0.15],[-22.869,-2.02],[-4.278,-23.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[37.226,-0.326],[2.213,2.349],[0,0],[-28.007,9.851],[-44.179,-2.344],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.007,-5.794],[4.542,-6.962],[-9.003,0.693],[0.002,0.011],[0,0],[15.228,-1.29],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[66.722,-96.796],[148.528,-105.589],[155.91,-90.821],[113.435,-96.247],[43.203,-83.987],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.767,-0.537],[35.108,-18.432],[52.325,-19.808],[50.311,-19.612],[53.904,-19.947],[58.047,-20.672],[144.416,-38.905],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-23.048,0.48],[-17.239,-17.801],[-1.582,-5.305],[37.323,-13.62],[32.498,1.383],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.62,35.823],[-6.255,10.339],[4.652,-0.401],[-9.453,-0.052],[0,0],[-1.896,0.168],[-23.131,-2.079],[-4.356,-23.159]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[37.051,-0.771],[2.234,2.307],[0,0],[-27.745,10.125],[-44.042,-2.06],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.161,-6.042],[4.48,-6.833],[-8.892,0.767],[0.002,0.011],[0,0],[15.193,-1.303],[8.337,0.746],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[65.871,-97.324],[147.183,-107.044],[154.731,-92.459],[112.378,-97.34],[42.558,-84.304],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.801,-0.584],[35.184,-19.208],[52.18,-20.387],[50.374,-20.138],[53.927,-20.473],[58.119,-21.193],[144.703,-35.937],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.857,0.966],[-17.535,-17.24],[-1.698,-5.213],[36.703,-14.267],[32.515,0.786],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.661,35.8],[-6.557,10.538],[4.55,-0.469],[-9.443,0.028],[0,0],[-2.125,0.201],[-23.592,-2.182],[-4.493,-23.027]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[36.744,-1.552],[2.273,2.234],[0,0],[-27.285,10.606],[-43.802,-1.561],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.431,-6.478],[4.37,-6.607],[-8.697,0.897],[0.002,0.01],[0,0],[15.133,-1.326],[8.336,0.764],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[64.377,-98.252],[144.821,-109.599],[152.663,-95.333],[110.521,-99.259],[41.426,-84.859],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.86,-0.667],[35.317,-20.57],[51.924,-21.404],[50.484,-21.061],[53.967,-21.396],[58.244,-22.106],[142.237,-39.477],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.63,1.543],[-17.886,-16.572],[-1.835,-5.104],[35.967,-15.035],[32.536,0.076],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.71,35.772],[-6.916,10.774],[4.429,-0.55],[-9.43,0.124],[0,0],[-2.397,0.239],[-24.139,-2.305],[-4.656,-22.87]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[36.379,-2.481],[2.318,2.148],[0,0],[-26.738,11.177],[-43.517,-0.969],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[3.752,-6.995],[4.241,-6.339],[-8.465,1.052],[0.002,0.009],[0,0],[15.062,-1.353],[8.334,0.784],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[62.6,-99.355],[142.014,-112.635],[150.204,-98.749],[108.314,-101.539],[40.08,-85.52],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[24.93,-0.765],[35.476,-22.189],[51.62,-22.613],[50.614,-22.157],[54.016,-22.493],[58.394,-23.192],[139.033,-37.652],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.381,2.176],[-18.272,-15.84],[-1.986,-4.985],[35.159,-15.878],[32.559,-0.703],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.764,35.742],[-7.309,11.033],[4.296,-0.639],[-9.417,0.229],[0,0],[-2.695,0.282],[-31.729,-11.516],[-4.835,-22.697]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[35.979,-3.499],[2.368,2.053],[0,0],[-26.137,11.804],[-43.204,-0.319],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[4.104,-7.562],[4.098,-6.044],[-8.211,1.222],[0.002,0.008],[0,0],[14.984,-1.382],[7.868,2.856],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[60.652,-100.564],[138.934,-115.965],[147.508,-102.497],[105.893,-104.041],[38.603,-86.244],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[25.007,-0.873],[35.65,-23.965],[51.286,-23.938],[50.758,-23.36],[54.068,-23.697],[58.558,-24.383],[142.472,-35.909],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-22.117,2.848],[-18.681,-15.064],[-2.146,-4.858],[34.303,-16.772],[32.583,-1.529],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-18.821,35.711],[-7.726,11.308],[4.155,-0.734],[-9.403,0.34],[0,0],[-3.011,0.327],[-25.375,-2.582],[-5.024,-22.515]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[35.555,-4.579],[2.421,1.952],[0,0],[-25.501,12.468],[-42.872,0.371],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[4.477,-8.164],[3.948,-5.732],[-7.941,1.402],[0.002,0.007],[0,0],[37.323,-15.998],[8.329,0.831],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[58.586,-101.847],[135.669,-119.495],[144.649,-106.47],[103.327,-106.693],[37.038,-87.012],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[25.089,-0.987],[35.834,-25.848],[50.932,-25.344],[50.91,-24.635],[54.124,-24.973],[58.732,-25.646],[141.81,-34.889],[171.115,1.278]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-5.561,22.48],[-21.796,12.03],[-5.566,0.135],[-3.46,-39.788],[19.369,-26.14],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-20.776,34.618],[-22.051,20.742],[-0.673,-4.675],[-9.283,-1.816],[0,0],[-12.4,-8.936],[-22.869,-2.02],[-11.526,-16.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[8.947,-36.136],[2.825,-1.558],[0,0],[2.572,29.578],[-15.654,21.126],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[17.298,-28.823],[-1.233,4.989],[1.301,8.936],[0,0.011],[0,0],[12.389,8.947],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[-13.254,-133.197],[-1.414,-214.618],[14.726,-218.092],[-1.098,-178.304],[-12.088,-106.356],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[27.892,-4.912],[42.169,-90.511],[38.893,-67.047],[55.879,-52.225],[55.879,-52.214],[57.976,-33.579],[138.791,-26.718],[171.115,1.278]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[2.737,22.995],[-16.144,18.952],[-5.159,2.093],[-17.296,-35.998],[8.883,-31.298],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-20.776,34.618],[-13.299,27.196],[-0.673,-4.675],[-11.542,0.893],[0,0],[-12.4,-8.936],[-22.869,-2.02],[-11.526,-16.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[-4.399,-36.966],[2.092,-2.456],[0,0],[12.858,26.761],[-7.179,25.295],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[17.298,-28.823],[0.609,5.103],[1.301,8.936],[0,0.011],[0,0],[12.389,8.947],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[-36.763,-126.362],[-54.458,-206.714],[-40.587,-215.667],[-41.331,-172.854],[-26.188,-101.664],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[27.892,-4.912],[27.668,-106.638],[31.08,-67.359],[55.879,-52.225],[55.879,-52.214],[57.976,-33.579],[138.791,-26.718],[171.115,1.278]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-5.561,22.48],[-21.796,12.03],[-5.566,0.135],[-3.46,-39.788],[19.369,-26.14],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-20.776,34.618],[-22.051,20.742],[-0.673,-4.675],[-9.283,-1.816],[0,0],[-12.4,-8.936],[-22.869,-2.02],[-11.526,-16.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[8.947,-36.136],[2.825,-1.558],[0,0],[2.572,29.578],[-15.654,21.126],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[17.298,-28.823],[-1.233,4.989],[1.301,8.936],[0,0.011],[0,0],[12.389,8.947],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[-13.254,-133.197],[-1.414,-214.618],[14.726,-218.092],[-1.098,-178.304],[-12.088,-106.356],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[27.892,-4.912],[42.169,-90.511],[38.893,-67.047],[55.879,-52.225],[55.879,-52.214],[57.976,-33.579],[138.791,-26.718],[171.115,1.278]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-9.605,21.072],[-23.639,7.81],[-5.495,-0.891],[3.917,-39.745],[23.847,-22.131],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-20.776,34.618],[-25.49,16.332],[0.191,-4.72],[-9.283,-1.816],[0,0],[-12.4,-8.936],[-22.869,-2.02],[-11.526,-16.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[15.441,-33.874],[3.064,-1.012],[0,0],[-2.912,29.546],[-19.273,17.886],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[17.298,-28.823],[-2.13,4.677],[-0.365,9.023],[0,0.011],[0,0],[12.389,8.947],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[2.673,-129.302],[29.286,-207.156],[45.79,-207.602],[22.917,-171.404],[-1.117,-102.704],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[27.892,-4.912],[49.299,-77.15],[46.295,-66.564],[55.879,-52.225],[55.879,-52.214],[57.976,-33.579],[138.791,-26.718],[171.115,1.278]],"c":true}]},{"t":77,"s":[{"i":[[1.256,-14.508],[71.173,-6.66],[31.965,24.924],[0,0],[-35.34,52.999],[-5.561,22.48],[-21.796,12.03],[-5.566,0.135],[-3.46,-39.788],[19.369,-26.14],[16.912,-48.623],[-9.18,-14.639],[-23.837,-21.729],[-2.949,6.862],[-4.901,27.091],[17.681,4.227],[-28.801,37.93],[-20.776,34.618],[-22.051,20.742],[-0.673,-4.675],[-9.283,-1.816],[0,0],[-12.4,-8.936],[-22.869,-2.02],[-11.526,-16.235]],"o":[[-6.66,76.813],[-71.184,6.66],[0,0],[0,0],[35.329,-53.01],[8.947,-36.136],[2.825,-1.558],[0,0],[2.572,29.578],[-15.654,21.126],[-12.906,37.104],[7.395,11.792],[23.837,21.729],[2.938,-6.85],[2.972,-16.425],[-17.681,-4.227],[10.21,-13.447],[17.298,-28.823],[-1.233,4.989],[1.301,8.936],[0,0.011],[0,0],[12.389,8.947],[8.339,0.736],[0.886,9.967]],"v":[[170.61,37.762],[67.337,182.418],[-85.178,158.537],[-105.472,55.32],[-86.804,-47.157],[-13.254,-133.197],[-1.414,-214.618],[14.726,-218.092],[-1.098,-178.304],[-12.088,-106.356],[-88.934,-10.259],[-85.503,74.313],[-71.668,119.822],[4.192,133.803],[0.311,92.736],[-51.677,88.922],[6.918,46.599],[27.892,-4.912],[42.169,-90.511],[38.893,-67.047],[55.879,-52.225],[55.879,-52.214],[57.976,-33.579],[138.791,-26.718],[171.115,1.278]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.694117665291,0.223529413342,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.161,2.893],[16.784,4.507],[0,0],[0,0],[-0.011,0],[-1.177,-0.034],[-6.637,0.034],[-4.731,-0.157],[-1.424,-0.09],[-0.706,-0.045],[-0.011,0],[-1.435,-0.146]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-18.724,-3.823],[-19.318,-5.18],[0,0],[0.011,0],[1.132,0.078],[5.886,0.235],[4.63,-0.022],[1.424,0.056],[0.706,0.034],[0.011,0],[1.446,0.09],[30.25,3.094]],"v":[[170.05,-7.22],[164.074,-16.548],[128.207,-32.895],[73.538,-36.719],[65.14,-51.092],[65.151,-51.092],[65.174,-51.092],[68.638,-50.924],[87.586,-50.823],[101.68,-50.678],[105.94,-50.476],[108.059,-50.341],[108.082,-50.341],[112.398,-49.982]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.233,2.363],[16.807,3.963],[0,0],[0,0],[-0.011,0],[-1.176,-0.058],[-6.634,-0.085],[-4.732,-0.098],[-1.425,-0.07],[-0.707,-0.035],[-0.011,0],[-1.437,-0.126]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-17.744,-3.047],[-14.266,-3.775],[0,0],[0.011,0],[1.13,0.099],[5.879,0.34],[4.628,0.06],[1.424,0.036],[0.707,0.024],[0.011,0],[1.447,0.069],[30.283,2.665]],"v":[[170.316,-4.467],[164.074,-16.548],[127.09,-31.548],[74.115,-40.73],[64.885,-48.356],[64.896,-48.356],[64.918,-48.356],[68.378,-48.126],[87.314,-49.445],[101.492,-50.044],[105.754,-49.903],[107.874,-49.798],[107.896,-49.798],[112.216,-49.501]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.276,2.055],[16.821,3.646],[0,0],[0,0],[-0.011,-0.001],[-1.175,-0.073],[-6.633,-0.155],[-4.732,-0.064],[-1.425,-0.058],[-0.707,-0.029],[-0.011,0],[-1.438,-0.114]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-17.172,-2.594],[-11.319,-2.955],[0,0],[0.011,0],[1.129,0.11],[5.874,0.402],[4.627,0.108],[1.424,0.024],[0.707,0.018],[0.011,0],[1.448,0.057],[30.302,2.414]],"v":[[170.472,-2.86],[164.074,-16.548],[125.605,-29.785],[74.452,-41.312],[64.54,-44.676],[64.552,-44.676],[64.574,-44.675],[68.031,-44.409],[87.155,-46.117],[101.382,-47.916],[105.645,-47.809],[107.766,-47.723],[107.788,-47.723],[112.11,-47.461]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.344,1.559],[16.743,-1.852],[0,0],[0,0],[-0.011,0.001],[-1.165,0.141],[-6.577,0.83],[-4.733,-0.009],[-1.426,-0.039],[-0.707,-0.02],[-0.011,0],[-1.439,-0.095]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-16.254,-1.866],[-6.55,0.681],[0,0],[0.011,-0.001],[1.12,-0.132],[5.825,-0.707],[4.589,-0.579],[1.425,0.005],[0.707,0.008],[0.011,0],[1.448,0.038],[30.333,2.011]],"v":[[170.722,-0.279],[164.074,-16.548],[126.009,-29.462],[74.924,-36.269],[63.407,-34.958],[63.418,-34.959],[63.44,-34.962],[66.867,-35.375],[85.804,-37.662],[104.378,-39.648],[108.642,-39.599],[110.764,-39.54],[110.786,-39.541],[115.112,-39.337]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.39,1.222],[15.408,-4.982],[0,0],[0,0],[-0.011,0],[-1.169,-0.026],[-6.609,0.056],[-4.733,0.029],[-1.426,-0.026],[-0.708,-0.013],[-0.011,0],[-1.44,-0.082]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-50.39,-2.149],[-3.361,0.096],[0,0],[0.011,0],[1.123,0.047],[5.847,0.128],[4.61,-0.039],[1.425,-0.008],[0.707,0.002],[0.011,-0.001],[1.449,0.025],[30.354,1.738]],"v":[[170.891,1.471],[164.074,-16.548],[123.789,-37.151],[75.335,-32.287],[63.939,-30.349],[63.95,-30.349],[63.972,-30.349],[67.412,-30.253],[87.734,-34.949],[95.251,-40.031],[99.516,-40.02],[101.638,-39.98],[101.661,-39.981],[105.989,-39.816]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.397,1.174],[16.832,1.39],[0,0],[0,0],[-0.011,-0.001],[-1.17,-0.05],[-6.613,-0.056],[-4.209,2.001],[-1.426,-0.024],[-0.708,-0.012],[-0.011,0],[-1.44,-0.08]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-25.525,-6.755],[-2.9,0.011],[0,0],[0.011,0],[1.124,0.073],[5.85,0.249],[4.613,0.039],[1.287,-0.612],[0.707,0.001],[0.011,-0.001],[1.449,0.023],[30.357,1.699]],"v":[[170.915,1.724],[164.074,-16.548],[129.549,-36.295],[75.394,-28.849],[64.016,-29.682],[64.027,-29.682],[64.049,-29.682],[67.491,-29.512],[86.404,-29.191],[101.436,-36.926],[104.295,-37.467],[104.23,-37.587],[104.252,-37.588],[108.581,-39.303]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.403,1.128],[16.843,1.777],[0,0],[0,0],[-0.011,-0.001],[-1.17,-0.073],[-6.617,-0.161],[-4.377,1.375],[-1.426,-0.022],[-0.708,-0.011],[-0.011,0],[-1.44,-0.079]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.456,-1.235],[-2.465,-0.069],[0,0],[0.011,0],[1.124,0.098],[5.853,0.363],[4.616,0.112],[1.331,-0.42],[0.707,0],[0.011,-0.001],[1.449,0.021],[30.36,1.661]],"v":[[170.939,1.962],[164.074,-16.548],[129.971,-32.422],[75.45,-27.965],[64.088,-29.053],[64.099,-29.053],[64.122,-29.052],[67.565,-28.814],[86.476,-28.182],[107.259,-30.829],[108.147,-38.075],[108.785,-38.146],[108.807,-38.148],[113.136,-39.266]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.409,1.086],[16.852,2.129],[0,0],[0,0],[-0.011,-0.001],[-1.171,-0.093],[-6.621,-0.257],[-4.531,0.804],[-1.427,-0.021],[-0.708,-0.011],[-0.011,0],[-1.44,-0.077]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.379,-1.173],[-2.068,-0.142],[0,0],[0.011,0],[1.125,0.12],[5.856,0.467],[4.619,0.18],[1.372,-0.246],[0.707,-0.001],[0.011,-0.001],[1.449,0.02],[30.363,1.627]],"v":[[170.96,2.18],[164.074,-16.548],[132.074,-30.654],[75.501,-27.158],[64.154,-28.48],[64.166,-28.479],[64.188,-28.478],[67.633,-28.177],[86.541,-27.261],[105.445,-25.27],[112.324,-29.398],[113.601,-29.427],[113.624,-29.428],[114.828,-39.612]],"c":true}]},{"t":29,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.414,1.051],[16.86,2.425],[0,0],[0,0],[-0.011,-0.001],[-1.171,-0.111],[-6.625,-0.338],[-4.66,0.323],[-1.427,-0.019],[-0.708,-0.01],[-0.011,0],[-1.44,-0.076]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.314,-1.122],[-1.735,-0.203],[0,0],[0.011,0.001],[1.125,0.139],[5.859,0.555],[4.621,0.236],[1.406,-0.099],[0.707,-0.001],[0.011,-0.001],[1.449,0.018],[30.365,1.599]],"v":[[170.977,2.363],[164.074,-16.548],[128.021,-39.146],[75.544,-26.48],[64.21,-27.998],[64.221,-27.997],[64.244,-27.996],[67.69,-27.641],[86.596,-26.487],[100.17,-32.938],[107.571,-32.724],[109.387,-32.716],[109.409,-32.717],[118.779,-38.794]],"c":true}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.416,1.031],[16.865,2.594],[0,0],[0,0],[-0.011,-0.001],[-1.171,-0.121],[-6.626,-0.384],[-4.734,0.05],[-1.427,-0.019],[-0.708,-0.01],[-0.011,0],[-1.441,-0.075]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.277,-1.092],[-1.545,-0.238],[0,0],[0.011,0.001],[1.125,0.149],[5.86,0.604],[4.623,0.268],[1.425,-0.015],[0.707,-0.002],[0.011,-0.001],[1.449,0.018],[30.366,1.582]],"v":[[170.987,2.467],[164.074,-16.548],[130.863,-38.833],[75.569,-26.094],[64.242,-27.723],[64.253,-27.722],[64.275,-27.721],[67.723,-27.336],[86.627,-26.046],[103.05,-17.93],[107.316,-17.941],[109.439,-17.912],[109.461,-17.913],[113.79,-17.77]],"c":true}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.416,1.031],[16.865,2.594],[0,0],[0,0],[-0.011,-0.001],[-1.171,-0.121],[-6.626,-0.384],[-4.734,0.05],[-1.427,-0.019],[-0.708,-0.01],[-0.011,0],[-1.441,-0.075]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.277,-1.092],[-1.545,-0.238],[0,0],[0.011,0.001],[1.125,0.149],[5.86,0.604],[4.623,0.268],[1.425,-0.015],[0.707,-0.002],[0.011,-0.001],[1.449,0.018],[30.366,1.582]],"v":[[170.987,2.467],[164.074,-16.548],[130.863,-38.833],[75.569,-26.094],[64.242,-27.723],[64.253,-27.722],[64.275,-27.721],[67.723,-27.336],[86.627,-26.046],[103.05,-17.93],[107.316,-17.941],[109.439,-17.912],[109.461,-17.913],[113.79,-17.77]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.414,1.051],[16.86,2.425],[0,0],[0,0],[-0.011,-0.001],[-1.171,-0.111],[-6.625,-0.338],[-4.66,0.323],[-1.427,-0.019],[-0.708,-0.01],[-0.011,0],[-1.44,-0.076]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.314,-1.122],[-1.735,-0.203],[0,0],[0.011,0.001],[1.125,0.139],[5.859,0.555],[4.621,0.236],[1.406,-0.099],[0.707,-0.001],[0.011,-0.001],[1.449,0.018],[30.365,1.599]],"v":[[170.977,2.363],[164.074,-16.548],[128.021,-39.146],[75.544,-26.48],[64.21,-27.998],[64.221,-27.997],[64.244,-27.996],[67.69,-27.641],[86.596,-26.487],[100.17,-32.938],[107.571,-32.724],[109.387,-32.716],[109.409,-32.717],[118.779,-38.794]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.409,1.086],[16.852,2.129],[0,0],[0,0],[-0.011,-0.001],[-1.171,-0.093],[-6.621,-0.257],[-4.531,0.804],[-1.427,-0.021],[-0.708,-0.011],[-0.011,0],[-1.44,-0.077]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.379,-1.173],[-2.068,-0.142],[0,0],[0.011,0],[1.125,0.12],[5.856,0.467],[4.619,0.18],[1.372,-0.246],[0.707,-0.001],[0.011,-0.001],[1.449,0.02],[30.363,1.627]],"v":[[170.96,2.18],[164.074,-16.548],[132.074,-30.654],[75.501,-27.158],[64.154,-28.48],[64.166,-28.479],[64.188,-28.478],[67.633,-28.177],[86.541,-27.261],[105.445,-25.27],[112.324,-29.398],[113.601,-29.427],[113.624,-29.428],[114.828,-39.612]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.403,1.128],[16.843,1.777],[0,0],[0,0],[-0.011,-0.001],[-1.17,-0.073],[-6.617,-0.161],[-4.377,1.375],[-1.426,-0.022],[-0.708,-0.011],[-0.011,0],[-1.44,-0.079]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-15.456,-1.235],[-2.465,-0.069],[0,0],[0.011,0],[1.124,0.098],[5.853,0.363],[4.616,0.112],[1.331,-0.42],[0.707,0],[0.011,-0.001],[1.449,0.021],[30.36,1.661]],"v":[[170.939,1.962],[164.074,-16.548],[129.971,-32.422],[75.45,-27.965],[64.088,-29.053],[64.099,-29.053],[64.122,-29.052],[67.565,-28.814],[86.476,-28.182],[107.259,-30.829],[108.147,-38.075],[108.785,-38.146],[108.807,-38.148],[113.136,-39.266]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.397,1.174],[16.832,1.39],[0,0],[0,0],[-0.011,-0.001],[-1.17,-0.05],[-6.613,-0.056],[-4.209,2.001],[-1.426,-0.024],[-0.708,-0.012],[-0.011,0],[-1.44,-0.08]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-25.525,-6.755],[-2.9,0.011],[0,0],[0.011,0],[1.124,0.073],[5.85,0.249],[4.613,0.039],[1.287,-0.612],[0.707,0.001],[0.011,-0.001],[1.449,0.023],[30.357,1.699]],"v":[[170.915,1.724],[164.074,-16.548],[129.549,-36.295],[75.394,-28.849],[64.016,-29.682],[64.027,-29.682],[64.049,-29.682],[67.491,-29.512],[86.404,-29.191],[101.436,-36.926],[104.295,-37.467],[104.23,-37.587],[104.252,-37.588],[108.581,-39.303]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.39,1.222],[15.408,-4.982],[0,0],[0,0],[-0.011,0],[-1.169,-0.026],[-6.609,0.056],[-4.733,0.029],[-1.426,-0.026],[-0.708,-0.013],[-0.011,0],[-1.44,-0.082]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-50.39,-2.149],[-3.361,0.096],[0,0],[0.011,0],[1.123,0.047],[5.847,0.128],[4.61,-0.039],[1.425,-0.008],[0.707,0.002],[0.011,-0.001],[1.449,0.025],[30.354,1.738]],"v":[[170.891,1.471],[164.074,-16.548],[123.789,-37.151],[75.335,-32.287],[63.939,-30.349],[63.95,-30.349],[63.972,-30.349],[67.412,-30.253],[87.734,-34.949],[95.251,-40.031],[99.516,-40.02],[101.638,-39.98],[101.661,-39.981],[105.989,-39.816]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.161,2.893],[16.784,4.507],[0,0],[0,0],[-0.011,0],[-1.177,-0.034],[-6.637,0.034],[-4.731,-0.157],[-1.424,-0.09],[-0.706,-0.045],[-0.011,0],[-1.435,-0.146]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-18.724,-3.823],[-19.318,-5.18],[0,0],[0.011,0],[1.132,0.078],[5.886,0.235],[4.63,-0.022],[1.424,0.056],[0.706,0.034],[0.011,0],[1.446,0.09],[30.25,3.094]],"v":[[170.05,-7.22],[164.074,-16.548],[128.207,-32.895],[73.538,-36.719],[65.14,-51.092],[65.151,-51.092],[65.174,-51.092],[68.638,-50.924],[87.586,-50.823],[101.68,-50.678],[105.94,-50.476],[108.059,-50.341],[108.082,-50.341],[112.398,-49.982]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.161,2.893],[16.784,4.507],[0,0],[0,0],[-0.011,0],[-1.177,-0.034],[-6.637,0.034],[-4.731,-0.157],[-1.424,-0.09],[-0.706,-0.045],[-0.011,0],[-1.435,-0.146]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-18.724,-3.823],[-19.318,-5.18],[0,0],[0.011,0],[1.132,0.078],[5.886,0.235],[4.63,-0.022],[1.424,0.056],[0.706,0.034],[0.011,0],[1.446,0.09],[30.25,3.094]],"v":[[170.05,-7.22],[164.074,-16.548],[128.207,-32.895],[73.538,-36.719],[65.14,-51.092],[65.151,-51.092],[65.174,-51.092],[68.638,-50.924],[87.586,-50.823],[101.68,-50.678],[105.94,-50.476],[108.059,-50.341],[108.082,-50.341],[112.398,-49.982]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.161,2.893],[16.784,4.507],[0,0],[0,0],[-0.011,0],[-1.177,-0.034],[-6.637,0.034],[-4.731,-0.157],[-1.424,-0.09],[-0.706,-0.045],[-0.011,0],[-1.435,-0.146]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-18.724,-3.823],[-19.318,-5.18],[0,0],[0.011,0],[1.132,0.078],[5.886,0.235],[4.63,-0.022],[1.424,0.056],[0.706,0.034],[0.011,0],[1.446,0.09],[30.25,3.094]],"v":[[170.05,-7.22],[164.074,-16.548],[128.207,-32.895],[73.538,-36.719],[65.14,-51.092],[65.151,-51.092],[65.174,-51.092],[68.638,-50.924],[87.586,-50.823],[101.68,-50.678],[105.94,-50.476],[108.059,-50.341],[108.082,-50.341],[112.398,-49.982]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.161,2.893],[16.784,4.507],[0,0],[0,0],[-0.011,0],[-1.177,-0.034],[-6.637,0.034],[-4.731,-0.157],[-1.424,-0.09],[-0.706,-0.045],[-0.011,0],[-1.435,-0.146]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-18.724,-3.823],[-19.318,-5.18],[0,0],[0.011,0],[1.132,0.078],[5.886,0.235],[4.63,-0.022],[1.424,0.056],[0.706,0.034],[0.011,0],[1.446,0.09],[30.25,3.094]],"v":[[170.05,-7.22],[164.074,-16.548],[128.207,-32.895],[73.538,-36.719],[65.14,-51.092],[65.151,-51.092],[65.174,-51.092],[68.638,-50.924],[87.586,-50.823],[101.68,-50.678],[105.94,-50.476],[108.059,-50.341],[108.082,-50.341],[112.398,-49.982]],"c":true}]},{"t":77,"s":[{"i":[[-5.897,-35.026],[2.399,3.24],[14.161,2.893],[16.784,4.507],[0,0],[0,0],[-0.011,0],[-1.177,-0.034],[-6.637,0.034],[-4.731,-0.157],[-1.424,-0.09],[-0.706,-0.045],[-0.011,0],[-1.435,-0.146]],"o":[[-1.603,-2.982],[-8.33,-11.223],[-18.724,-3.823],[-19.318,-5.18],[0,0],[0.011,0],[1.132,0.078],[5.886,0.235],[4.63,-0.022],[1.424,0.056],[0.706,0.034],[0.011,0],[1.446,0.09],[30.25,3.094]],"v":[[170.05,-7.22],[164.074,-16.548],[128.207,-32.895],[73.538,-36.719],[65.14,-51.092],[65.151,-51.092],[65.174,-51.092],[68.638,-50.924],[87.586,-50.823],[101.68,-50.678],[105.94,-50.476],[108.059,-50.341],[108.082,-50.341],[112.398,-49.982]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.874509811401,0.474509805441,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-67.048],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":10,"s":[{"i":[[0,0],[-35.335,53.002],[-13.167,17.34],[-24.248,3.318],[11.988,-36.978],[4.187,-15.792],[-32.894,-2.889],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[21.17,-27.879],[27.32,-3.739],[-12.152,37.484],[-4.187,15.792],[40.158,3.527],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[9.789,-127.255],[49.868,-192.704],[83.798,-126.069],[42.674,-55.901],[112.216,-49.497],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":14,"s":[{"i":[[0,0],[-35.335,53.002],[-17.602,14.342],[-25.679,-1.767],[21.013,-33.924],[8.581,-11.56],[-32.915,-2.617],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[28.3,-23.058],[28.933,1.991],[-21.301,34.389],[-8.581,11.56],[40.184,3.194],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[23.233,-123.789],[79.793,-179.918],[98.949,-107.254],[44.88,-49.397],[112.11,-47.457],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":20,"s":[{"i":[[0,0],[-35.335,53.002],[-20.294,7.491],[-21.512,-9.687],[30.22,-24.21],[15.673,-6.821],[-32.949,-2.179],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[32.627,-12.044],[24.238,10.914],[-30.634,24.542],[-15.673,6.821],[40.225,2.66],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[44.302,-110.709],[113.095,-143.907],[103.692,-71.858],[48.488,-35.06],[115.111,-39.332],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":25,"s":[{"i":[[0,0],[-35.335,53.002],[-22.118,2.847],[-18.688,-15.055],[36.461,-17.625],[20.479,-3.608],[-22.042,7.074],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.56,-4.578],[21.056,16.963],[-36.961,17.867],[-20.48,3.608],[40.312,0.73],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[58.584,-101.844],[135.667,-119.498],[106.907,-47.866],[50.933,-25.342],[105.988,-39.812],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":26,"s":[{"i":[[0,0],[-35.335,53.002],[-22.382,2.176],[-18.279,-15.832],[37.364,-16.673],[21.376,-1.156],[-31.431,12.03],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.985,-3.498],[20.595,17.838],[-37.876,16.901],[-7.732,0.418],[40.301,1.076],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[60.649,-100.562],[138.932,-115.968],[107.372,-44.396],[51.287,-23.937],[108.58,-39.299],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":27,"s":[{"i":[[0,0],[-35.335,53.002],[-22.631,1.542],[-17.894,-16.564],[38.215,-15.775],[21.967,-1.356],[-31.455,13.087],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.385,-2.48],[20.161,18.663],[-38.739,15.991],[-12.702,0.855],[40.29,1.402],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[62.598,-99.352],[142.011,-112.638],[107.811,-41.123],[51.621,-22.611],[113.135,-39.262],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":28,"s":[{"i":[[0,0],[-35.335,53.002],[-22.858,0.965],[-17.543,-17.232],[38.991,-14.956],[22.506,-1.538],[-34.396,16.245],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.75,-1.551],[19.765,19.415],[-39.526,15.161],[-17.234,1.253],[40.281,1.699],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[64.374,-98.249],[144.819,-109.602],[108.21,-38.139],[51.925,-21.402],[114.827,-39.608],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"t":29,"s":[{"i":[[0,0],[-35.335,53.002],[-23.049,0.479],[-17.247,-17.793],[39.644,-14.267],[22.96,-1.691],[-31.16,19.021],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.057,-0.77],[19.432,20.048],[-40.188,14.462],[-21.046,1.587],[40.273,1.949],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[65.868,-97.322],[147.181,-107.048],[108.547,-35.628],[52.181,-20.386],[118.778,-38.79],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}],"h":1},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":30,"s":[{"i":[[0,0],[-35.335,53.002],[-23.157,0.202],[-17.079,-18.113],[40.016,-13.874],[23.218,-1.778],[-32.985,-1.713],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.231,-0.325],[19.243,20.408],[-40.565,14.064],[-23.218,1.778],[40.269,2.092],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[66.72,-96.793],[148.526,-105.593],[108.739,-34.198],[52.327,-19.806],[113.79,-17.766],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":1},"o":{"x":0.333,"y":0},"t":35.398,"s":[{"i":[[0,0],[-35.335,53.002],[-23.157,0.202],[-17.079,-18.113],[40.016,-13.874],[23.218,-1.778],[-32.985,-1.713],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.231,-0.325],[19.243,20.408],[-40.565,14.064],[-23.218,1.778],[40.269,2.092],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[66.72,-96.793],[148.526,-105.593],[108.739,-34.198],[52.327,-19.806],[113.79,-17.766],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":36.266,"s":[{"i":[[0,0],[-35.335,53.002],[-23.049,0.479],[-17.247,-17.793],[39.644,-14.267],[22.96,-1.691],[-31.16,19.021],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[37.057,-0.77],[19.432,20.048],[-40.188,14.462],[-21.046,1.587],[40.273,1.949],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[65.868,-97.322],[147.181,-107.048],[108.547,-35.628],[52.181,-20.386],[118.778,-38.79],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.808},"o":{"x":0.167,"y":0.192},"t":37.133,"s":[{"i":[[0,0],[-35.335,53.002],[-22.858,0.965],[-17.543,-17.232],[38.991,-14.956],[22.506,-1.538],[-34.396,16.245],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.75,-1.551],[19.765,19.415],[-39.526,15.161],[-17.234,1.253],[40.281,1.699],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[64.374,-98.249],[144.819,-109.602],[108.21,-38.139],[51.925,-21.402],[114.827,-39.608],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.192},"t":38,"s":[{"i":[[0,0],[-35.335,53.002],[-22.631,1.542],[-17.894,-16.564],[38.215,-15.775],[21.967,-1.356],[-31.455,13.087],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[36.385,-2.48],[20.161,18.663],[-38.739,15.991],[-12.702,0.855],[40.29,1.402],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[62.598,-99.352],[142.011,-112.638],[107.811,-41.123],[51.621,-22.611],[113.135,-39.262],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":38.869,"s":[{"i":[[0,0],[-35.335,53.002],[-22.382,2.176],[-18.279,-15.832],[37.364,-16.673],[21.376,-1.156],[-31.431,12.03],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.985,-3.498],[20.595,17.838],[-37.876,16.901],[-7.732,0.418],[40.301,1.076],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[60.649,-100.562],[138.932,-115.968],[107.372,-44.396],[51.287,-23.937],[108.58,-39.299],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.167,"y":0.167},"t":39.736,"s":[{"i":[[0,0],[-35.335,53.002],[-22.118,2.847],[-18.688,-15.055],[36.461,-17.625],[20.479,-3.608],[-22.042,7.074],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[35.56,-4.578],[21.056,16.963],[-36.961,17.867],[-20.48,3.608],[40.312,0.73],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[58.584,-101.844],[135.667,-119.498],[106.907,-47.866],[50.933,-25.342],[105.988,-39.812],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":46.676,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-67.048],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.833,"y":0.833},"o":{"x":0.333,"y":0},"t":51.881,"s":[{"i":[[0,0],[-35.335,53.002],[2.736,22.996],[-16.135,18.959],[-18.171,-38.257],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[-4.398,-36.972],[18.179,-21.361],[18.42,38.781],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-36.76,-126.359],[-54.462,-206.711],[20.859,-174.977],[31.082,-67.361],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.167,"y":0.167},"t":55.775,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-67.048],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":60.393,"s":[{"i":[[0,0],[-35.335,53.002],[-9.606,21.072],[-23.635,7.821],[4.343,-42.13],[0.95,-23.267],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[15.444,-33.878],[26.63,-8.811],[-4.402,42.707],[-0.95,23.267],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[2.674,-129.298],[29.282,-207.156],[77.165,-140.917],[46.297,-66.565],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]},{"t":77,"s":[{"i":[[0,0],[-35.335,53.002],[-5.567,22.479],[-21.793,12.034],[-3.48,-42.21],[-3.346,-23.044],[-32.858,-3.356],[6.657,-76.814],[71.181,-6.657],[31.957,24.914]],"o":[[0,0],[35.335,-53.002],[8.95,-36.141],[24.555,-13.559],[3.528,42.788],[3.346,23.044],[40.114,4.097],[-6.657,76.814],[-71.181,6.657],[0,0]],"v":[[-105.471,55.314],[-86.808,-47.162],[-13.252,-133.194],[-1.418,-214.617],[57.831,-158.315],[38.895,-67.048],[112.397,-49.978],[170.605,37.761],[67.333,182.416],[-85.175,158.542]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.819607853889,0.250980407,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false}],"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":6,"ty":3,"parent":7,"sr":1,"ks":{"o":{"a":0,"k":0},"r":{"a":1,"k":[{"i":{"x":[0.34],"y":[1]},"o":{"x":[0.333],"y":[0]},"t":0,"s":[0]},{"i":{"x":[0.34],"y":[1]},"o":{"x":[0.33],"y":[0]},"t":21,"s":[17.203]},{"i":{"x":[0.667],"y":[1]},"o":{"x":[0.67],"y":[0]},"t":45,"s":[-20]},{"i":{"x":[0],"y":[1]},"o":{"x":[0.333],"y":[0]},"t":58,"s":[5]},{"t":81,"s":[0]}]},"p":{"a":1,"k":[{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[-287.188,148.438,0],"to":[0,0,0],"ti":[0,0,0]},{"i":{"x":0.34,"y":1},"o":{"x":0.33,"y":0},"t":21,"s":[-287.188,179.688,0],"to":[0,0,0],"ti":[0,0,0]},{"i":{"x":0,"y":1},"o":{"x":0.67,"y":0},"t":45,"s":[-309.688,117.188,0],"to":[0,0,0],"ti":[0,0,0]},{"t":69,"s":[-287.188,148.438,0]}]},"s":{"a":0,"k":[125,125,100]}},"ao":0,"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":7,"ty":3,"parent":8,"sr":1,"ks":{"o":{"a":0,"k":0},"p":{"a":0,"k":[100,-91,0]},"s":{"a":0,"k":[80,80,100]}},"ao":0,"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":8,"ty":3,"sr":1,"ks":{"o":{"a":0,"k":0},"p":{"a":0,"k":[198,332,0]}},"ao":0,"ip":0,"op":120,"st":0,"bm":0},{"ddd":0,"ind":9,"ty":4,"parent":6,"sr":1,"ks":{"p":{"a":0,"k":[26.816,0.469,0]},"a":{"a":0,"k":[-162.347,76.575,0]}},"ao":0,"shapes":[{"ty":"gr","it":[{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[6.631,3.364],[7.496,-10.379],[3.171,-19.028],[-7.348,-7.961],[-6.631,7.208],[2.165,18.654]],"o":[[-3.331,-1.69],[-7.496,10.379],[-3.171,19.028],[6.919,7.496],[6.631,-7.208],[-3.748,-32.294]],"v":[[-122.732,-5.303],[-150.761,0.478],[-168.06,42.859],[-164.889,92.447],[-117.606,92.447],[-121.643,44.592]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":12,"s":[{"i":[[7.227,1.747],[7.496,-10.379],[3.171,-19.028],[-7.348,-7.961],[-6.631,7.208],[6.4,17.655]],"o":[[-3.63,-0.878],[-7.496,10.379],[-3.171,19.028],[6.919,7.496],[6.631,-7.208],[-11.081,-30.564]],"v":[[-143.994,-3.752],[-150.761,0.478],[-168.06,42.859],[-164.889,92.447],[-117.606,92.447],[-131.449,44.553]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":27,"s":[{"i":[[6.069,4.296],[7.496,-10.379],[3.171,-19.028],[-7.348,-7.961],[-6.631,7.208],[-0.582,18.77]],"o":[[-3.049,-2.158],[-7.496,10.379],[-3.171,19.028],[6.919,7.496],[6.631,-7.208],[1.007,-32.495]],"v":[[-111.926,-4.259],[-150.761,0.478],[-168.06,42.859],[-164.889,92.447],[-117.606,92.447],[-118.134,45.26]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.67,"y":0},"t":47.266,"s":[{"i":[[7.227,1.747],[7.496,-10.379],[3.171,-19.028],[-7.348,-7.961],[-6.631,7.208],[6.4,17.655]],"o":[[-3.63,-0.878],[-7.496,10.379],[-3.171,19.028],[6.919,7.496],[6.631,-7.208],[-11.081,-30.564]],"v":[[-143.994,-3.752],[-150.761,0.478],[-168.06,42.859],[-164.889,92.447],[-117.606,92.447],[-131.449,44.553]],"c":true}]},{"i":{"x":0,"y":1},"o":{"x":0.333,"y":0},"t":58,"s":[{"i":[[6.631,3.364],[7.496,-10.379],[3.171,-19.028],[-7.348,-7.961],[-6.631,7.208],[2.165,18.654]],"o":[[-3.331,-1.69],[-7.496,10.379],[-3.171,19.028],[6.919,7.496],[6.631,-7.208],[-3.748,-32.294]],"v":[[-122.732,-5.303],[-150.761,0.478],[-168.06,42.859],[-164.889,92.447],[-117.606,92.447],[-121.643,44.592]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.67,"y":0},"t":86,"s":[{"i":[[6.631,3.364],[7.496,-10.379],[3.171,-19.028],[-7.348,-7.961],[-6.631,7.208],[2.165,18.654]],"o":[[-3.331,-1.69],[-7.496,10.379],[-3.171,19.028],[6.919,7.496],[6.631,-7.208],[-3.748,-32.294]],"v":[[-122.732,-5.303],[-150.761,0.478],[-168.06,42.859],[-164.889,92.447],[-117.606,92.447],[-121.643,44.592]],"c":true}]},{"t":180,"s":[{"i":[[6.631,3.364],[7.496,-10.379],[3.171,-19.028],[-7.348,-7.961],[-6.631,7.208],[2.165,18.654]],"o":[[-3.331,-1.69],[-7.496,10.379],[-3.171,19.028],[6.919,7.496],[6.631,-7.208],[-3.748,-32.294]],"v":[[-122.732,-5.303],[-150.761,0.478],[-168.06,42.859],[-164.889,92.447],[-117.606,92.447],[-121.643,44.592]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.819607853889,0.250980407,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":12,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-57.506,156.96]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":27,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-63.434,-30.637],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.67,"y":0},"t":47.266,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-57.506,156.96]],"c":true}]},{"i":{"x":0,"y":1},"o":{"x":0.333,"y":0},"t":58,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-69.3,-31.227],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.67,"y":0},"t":86,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-84.838,-37.381],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"t":180,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]}]},"hd":false},{"ty":"st","c":{"a":0,"k":[0.96862745285,0.494117647409,0.254901975393,1]},"o":{"a":0,"k":100},"w":{"a":0,"k":8},"lc":2,"lj":2,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"gr","it":[{"ind":0,"ty":"sh","ks":{"a":1,"k":[{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":0,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.333,"y":0},"t":12,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-57.506,156.96]],"c":true}]},{"i":{"x":0.34,"y":1},"o":{"x":0.333,"y":0},"t":27,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-63.434,-30.637],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.67,"y":0},"t":47.266,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-57.506,156.96]],"c":true}]},{"i":{"x":0,"y":1},"o":{"x":0.333,"y":0},"t":58,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-69.3,-31.227],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"i":{"x":0.667,"y":1},"o":{"x":0.67,"y":0},"t":86,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-84.838,-37.381],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]},{"t":180,"s":[{"i":[[95.277,-43.87],[39.46,3.414],[-16.728,-58.72],[-21.773,5.469],[-8.406,-1.418]],"o":[[0,0],[-32.154,-2.782],[12.203,42.836],[12.25,-3.077],[65.95,11.123]],"v":[[-92.414,-38.03],[-145.147,-21.773],[-182.985,111.844],[-123.485,163.981],[-85.175,158.542]],"c":true}]}]},"hd":false},{"ty":"fl","c":{"a":0,"k":[1,0.694117665291,0.223529413342,1]},"o":{"a":0,"k":100},"r":1,"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false},{"ty":"tr","p":{"a":0,"k":[0,0]},"a":{"a":0,"k":[0,0]},"s":{"a":0,"k":[100,100]},"r":{"a":0,"k":0},"o":{"a":0,"k":100},"sk":{"a":0,"k":0},"sa":{"a":0,"k":0}}],"bm":0,"hd":false}],"ip":0,"op":120,"st":0,"bm":0}]} \ No newline at end of file diff --git a/TMessagesProj/src/main/res/values/ids.xml b/TMessagesProj/src/main/res/values/ids.xml index ebd0060d7..d57b82fba 100644 --- a/TMessagesProj/src/main/res/values/ids.xml +++ b/TMessagesProj/src/main/res/values/ids.xml @@ -30,6 +30,7 @@ + \ No newline at end of file diff --git a/TMessagesProj/src/main/res/values/strings.xml b/TMessagesProj/src/main/res/values/strings.xml index 7cbb05efe..c51207ddc 100644 --- a/TMessagesProj/src/main/res/values/strings.xml +++ b/TMessagesProj/src/main/res/values/strings.xml @@ -2038,6 +2038,7 @@ Reload Contacts Reset Dialogs Read all Chats + Check app update Enable pause music on record Disable pause music on record Enable smooth keyboard @@ -2172,6 +2173,11 @@ Name updated. Channel title updated. Channel description updated. + Is %1$s still your number? + Keep your number up to date to ensure you can always log into Telegram. **Learn more** + https://telegram.org/faq#q-i-have-a-new-phone-number-what-do-i-do + Yes + No Local Database Clear local database @@ -2505,7 +2511,7 @@ Enter your current passcode Enter a passcode Enter your new passcode - Enter your passcode + Enter your Telegram passcode Re-enter your new passcode Passcodes do not match Auto-lock @@ -2801,14 +2807,28 @@ Set Additional Password You can set a password that will be required when you log in on a new device in addition to the code you get in the SMS. Your Password + Your password + Your account is protected by 2-Step Verification.\nDo you still remember your password? + Yes, definitely + Not sure On Off Enter your password + Check Password + Do you still remember your password? + Perfect! + You still remember your password. + You entered the wrong password. + Back to Settings Please enter your password to complete the transfer. Enter a password Enter a new password Enter your new password Re-enter your password + New Password + You can now set a new password that will be used to log into your account. + Enter new password + Re-enter new password Recovery email Recovery Email Your email @@ -2833,11 +2853,16 @@ Are you sure you want to disable your password? Are you sure you want to abort two-step verification setup? Are you sure you want to abort recovery email setup? + Cancel the password reset process? If you request a new reset later, it will take another 7 days. + YES + NO + Cancel reset Abort setup Warning! All data saved in your Telegram Passport will be lost! Password Hint Hint Create a hint for your password + Create a hint for your password. Passwords do not match Abort two-step verification setup Abort setup @@ -2850,13 +2875,21 @@ Invalid email address. Please check that you\'ve entered it correctly and try again. Sorry Since you didn\'t provide a recovery email when setting up your password, your remaining options are either to remember your password or to reset your account. + Since you didn’t provide a recovery email when setting up your password, your remaining options are either to remember your password or wait 7 days until your password is reset. + Are you sure you want to reset your 2-Step Verification password? + Your 2-Step Verification password was reset. RESET ACCOUNT + RESET PASSWORD + You can reset your password in %1$s + Reset password Recovery code We have sent a recovery code to the email address you provided:\n\n%1$s Please check your email and enter the 6-digit code we sent you. Having trouble accessing your email %1$s? Having trouble accessing your email? If you can\'t restore access to your email, your remaining options are either to remember your password or to reset your account. + If you don’t have access to your recovery email, your remaining options are either to remember your password or wait 7 days until your password is reset. + You recently requested a password reset that was canceled. Please wait %1$s before making a new request. RESET MY ACCOUNT You will lose all your chats and messages, along with any media and files you shared, if you proceed with resetting your account. Warning @@ -3100,6 +3133,8 @@ Voice chat started Voice chat scheduled on %1$s Voice chat finished (%s) + un1 ended the voice chat (%s) + You ended the voice chat (%s) un1 invited un2 to the voice chat You invited un2 to the voice chat un1 invited you to the voice chat @@ -4358,6 +4393,7 @@ Lock application Unlock application Backspace + Fingerprint Previous Repeat, off Repeat, playlist