Android FFmpeg 視頻解碼過程與實戰分析

概述#

本文首先以 FFmpeg 視頻解碼為主題，主要介紹了 FFmpeg 進行解碼視頻時的主要流程、基本原理；其次，文章還講述了與 FFmpeg 視頻解碼有關的簡單應用，包括如何在原有的 FFmpeg 視頻解碼的基礎上按照一定時間軸順序播放視頻、如何在播放視頻時加入 seek 的邏輯；除此之外，文章重點介紹了解碼視頻時可能容易遺漏的細節，最後是簡單地闡述了下如何封裝一個具有基本的視頻解碼功能的 VideoDecoder。

前言#

FFmpeg#

FFmpeg 是一套可以用來錄製、轉換數字音頻、視頻，並能將其轉化為流的開源計算機程序，它可生成用於處理和操作多媒體數據的庫，其中包含了先進的音視頻解碼庫 libavcodec 和音視頻格式轉換庫 libavformat。

FFmpeg 六大常用功能模塊#

libavformat：多媒體文件或協議的封裝和解封裝庫，如 mp4、flv 等文件封裝格式，rtmp、rtsp 等網絡協議封裝格式；
libavcodec：音視頻解碼核心庫；
libavfilter：音視頻、字幕濾鏡庫；
libswscale：圖像格式轉換庫；
libswresample：音頻重採樣庫；
libavutil：工具庫

視頻解碼基礎入門#

解復用（Demux）：解復用也可叫解封裝。這裡有一個概念叫封裝格式，封裝格式指的是音視頻的組合格式，常見的有 mp4、flv、mkv 等。通俗來講，封裝是將音頻流、視頻流、字幕流以及其他附件按一定規則組合成一個封裝的產物。而解封裝起着與封裝相反的作用，將一個流媒體文件拆解成音頻數據和視頻數據等。此時拆分後數據是經過壓縮編碼的，常見的視頻壓縮數據格式有 h264。

解碼（Decode）：簡單來說，就是對壓縮的編碼數據解壓成原始的視頻像素數據，常用的原始視頻像素數據格式有 yuv。

色彩空間轉換（Color Space Convert）：通常對於圖像顯示器來說，它是通過 RGB 模型來顯示圖像的，但在傳輸圖像數據時使用 YUV 模型可以節省帶寬。因此在顯示圖像時就需要將 yuv 像素格式的數據轉換成 rgb 的像素格式後再進行渲染。
渲染（Render）：將前面已經解碼和進行色彩空間轉換的每一個視頻幀的數據發送給顯卡以繪製在屏幕畫面上。

一、引入 FFmpeg 前的準備工作#

1.1 FFmpeg so 庫編譯#

在 FFmpeg 官網下載源碼庫並解壓；
下載 NDK 庫並解壓；
配置解壓後的 FFmpeg 源碼庫目錄中的 configure，修改高亮部分幾個參數為以下的內容，主要目的是生成 Android 可使用的名稱 - 版本.so 文件的格式；

# ······
# build settings
SHFLAGS='-shared -Wl,-soname,$$(@F)'
LIBPREF="lib"
LIBSUF=".a"
FULLNAME='$(NAME)$(BUILDSUF)'
LIBNAME='$(LIBPREF)$(FULLNAME)$(LIBSUF)'
SLIBPREF="lib"
SLIBSUF=".so"
SLIBNAME='$(SLIBPREF)$(FULLNAME)$(SLIBSUF)'
SLIBNAME_WITH_VERSION='$(SLIBNAME).$(LIBVERSION)'

# 已修改配置
SLIBNAME_WITH_MAJOR='$(SLIBNAME)$(FULLNAME)-$(LIBMAJOR)$(SLIBSUF)'
LIB_INSTALL_EXTRA_CMD='$$(RANLIB)"$(LIBDIR)/$(LIBNAME)"'
SLIB_INSTALL_NAME='$(SLIBNAME_WITH_MAJOR)'
SLIB_INSTALL_LINKS='$(SLIBNAME)'
# ······

在 FFmpeg 源碼庫目錄下新建腳本文件 build_android_arm_v8a.sh，在文件中配置 NDK 的路徑，並輸入下面其他的內容；

# 清空上次的編譯
make clean
# 這裡先配置你的 NDK 路徑
export NDK=/Users/bytedance/Library/Android/sdk/ndk/21.4.7075529
TOOLCHAIN=$NDK/toolchains/llvm/prebuilt/darwin-x86_64


function build_android
{

./configure \
--prefix=$PREFIX \
--disable-postproc \
--disable-debug \
--disable-doc \
--enable-FFmpeg \
--disable-doc \
--disable-symver \
--disable-static \
--enable-shared \
--cross-prefix=$CROSS_PREFIX \
--target-os=android \
--arch=$ARCH \
--cpu=$CPU \
--cc=$CC \
--cxx=$CXX \
--enable-cross-compile \
--sysroot=$SYSROOT \
--extra-cflags="-Os -fpic $OPTIMIZE_CFLAGS" \
--extra-ldflags="$ADDI_LDFLAGS"

make clean
make -j16
make install

echo "============================ build android arm64-v8a success =========================="

}

# arm64-v8a
ARCH=arm64
CPU=armv8-a
API=21
CC=$TOOLCHAIN/bin/aarch64-linux-android$API-clang
CXX=$TOOLCHAIN/bin/aarch64-linux-android$API-clang++
SYSROOT=$NDK/toolchains/llvm/prebuilt/darwin-x86_64/sysroot
CROSS_PREFIX=$TOOLCHAIN/bin/aarch64-linux-android-
PREFIX=$(pwd)/android/$CPU
OPTIMIZE_CFLAGS="-march=$CPU"

echo $CC

build_android

設置 NDK 文件夾中所有文件的權限 chmod 777 -R NDK；
終端執行腳本 ./build_android_arm_v8a.sh，開始編譯 FFmpeg。編譯成功後的文件會在 FFmpeg 下的 android 目錄中，會出現多個 .so 文件；

若要編譯 arm-v7a，只需要拷貝修改以上的腳本為以下 build_android_arm_v7a.sh 的內容。

#armv7-a
ARCH=arm
CPU=armv7-a
API=21
CC=$TOOLCHAIN/bin/armv7a-linux-androideabi$API-clang
CXX=$TOOLCHAIN/bin/armv7a-linux-androideabi$API-clang++
SYSROOT=$NDK/toolchains/llvm/prebuilt/darwin-x86_64/sysroot
CROSS_PREFIX=$TOOLCHAIN/bin/arm-linux-androideabi-
PREFIX=$(pwd)/android/$CPU
OPTIMIZE_CFLAGS="-mfloat-abi=softfp -mfpu=vfp -marm -march=$CPU "

1.2 在 Android 中引入 FFmpeg 的 so 庫#

NDK 環境、CMake 構建工具、LLDB（C/C++ 代碼調試工具）；
新建 C++ module，一般會生成以下幾個重要的文件：CMakeLists.txt、native-lib.cpp、MainActivity；
在 app/src/main/ 目錄下，新建目錄，並命名 jniLibs，這是 Android Studio 默認放置 so 動態庫的目錄；接著在 jniLibs 目錄下，新建 arm64-v8a 目錄，然後將編譯好的 .so 文件粘貼至此目錄下；然後再將編譯時生成的 .h 頭文件（FFmpeg 對外暴露的接口）粘貼至 cpp 目錄下的 include 中。以上的 .so 動態庫目錄和 .h 頭文件目錄都會在 CMakeLists.txt 中顯式聲明和鏈接進來；
最上層的 MainActivity，在這裡面加載 C/C++ 代碼編譯的庫：native-lib。native-lib 在 CMakeLists.txt 中被添加到名為 "ffmpeg" 的 library 中，所以在 System.loadLibrary()中輸入的是 "ffmpeg"；

class MainActivity : AppCompatActivity() {

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        // Example of a call to a native method
        sample_text.text = stringFromJNI()
    }

    // 聲明一個外部引用的方法，此方法和 C/C++ 層的代碼是對應的。
    external fun stringFromJNI(): String

    companion object {

        // 在 init{} 中加載 C/C++ 編譯成的 library：ffmpeg
        // library 名稱的定義和添加在 CMakeLists.txt 中完成
        init {
            System.loadLibrary("ffmpeg")
        }
    }
}

native-lib.cpp 是一個 C++ 接口文件，Java 層中聲明的 external 方法在這裡得到實現；

#include <jni.h>
#include <string>
extern "C" JNIEXPORT jstring JNICALL
Java_com_bytedance_example_MainActivity_stringFromJNI(
        JNIEnv *env,
        jobject /* this */) {
    std::string hello = "Hello from C++";
    return env->NewStringUTF(hello.c_str());
}

CMakeLists.txt 是一個構建腳本，目的是配置可以編譯出 native-lib 此 so 庫的構建信息；

# For more information about using CMake with Android Studio, read the
# documentation: https://d.android.com/studio/projects/add-native-code.html

# Sets the minimum version of CMake required to build the native library.

cmake_minimum_required(VERSION 3.10.2)

# Declares and names the project.

project("ffmpeg")

# Creates and names a library, sets it as either STATIC
# or SHARED, and provides the relative paths to its source code.
# You can define multiple libraries, and CMake builds them for you.
# Gradle automatically packages shared libraries with your APK.

# 定義 so 庫和頭文件所在目錄，方便後面使用
set(FFmpeg_lib_dir ${CMAKE_SOURCE_DIR}/../jniLibs/${ANDROID_ABI})
set(FFmpeg_head_dir ${CMAKE_SOURCE_DIR}/FFmpeg)

# 添加頭文件目錄
include_directories(
        FFmpeg/include
)

add_library( # Sets the name of the library.
        ffmmpeg

        # Sets the library as a shared library.
        SHARED

        # Provides a relative path to your source file(s).
        native-lib.cpp
        )

# Searches for a specified prebuilt library and stores the path as a
# variable. Because CMake includes system libraries in the search path by
# default, you only need to specify the name of the public NDK library
# you want to add. CMake verifies that the library exists before
# completing its build.

# 添加FFmpeg相關的so庫
add_library( avutil
        SHARED
        IMPORTED )
set_target_properties( avutil
        PROPERTIES IMPORTED_LOCATION
        ${FFmpeg_lib_dir}/libavutil.so )
add_library( swresample
        SHARED
        IMPORTED )
set_target_properties( swresample
        PROPERTIES IMPORTED_LOCATION
        ${FFmpeg_lib_dir}/libswresample.so )

add_library( avcodec
        SHARED
        IMPORTED )
set_target_properties( avcodec
        PROPERTIES IMPORTED_LOCATION
        ${FFmpeg_lib_dir}/libavcodec.so )


find_library( # Sets the name of the path variable.
        log-lib

        # Specifies the name of the NDK library that
        # you want CMake to locate.
        log)

# Specifies libraries CMake should link to your target library. You
# can link multiple libraries, such as libraries you define in this
# build script, prebuilt third-party libraries, or system libraries.

target_link_libraries( # Specifies the target library.
        audioffmmpeg

        # 把前面添加進來的 FFmpeg.so 庫都鏈接到目標庫 native-lib 上
        avutil
        swresample
        avcodec

        -landroid

        # Links the target library to the log library
        # included in the NDK.
        ${log-lib})

以上的操作就將 FFmpeg 引入 Android 項目。

二、FFmpeg 解碼視頻的原理和細節#

2.1 主要流程#

2.2 基本原理#

2.2.1 常用的 ffmpeg 接口#

// 1 分配 AVFormatContext
avformat_alloc_context();
// 2 打開文件輸入流
avformat_open_input(AVFormatContext **ps, const char *url,
                        const AVInputFormat *fmt, AVDictionary **options);
// 3 提取輸入文件中的數據流信息
avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options);
// 4 分配編解碼上下文
avcodec_alloc_context3(const AVCodec *codec);
// 5 基於與數據流相關的編解碼參數來填充編解碼器上下文
avcodec_parameters_to_context(AVCodecContext *codec,
                                  const AVCodecParameters *par);
// 6 查找對應已註冊的編解碼器
avcodec_find_decoder(enum AVCodecID id);
// 7 打開編解碼器
avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);
// 8 不停地從碼流中提取壓縮幀數據，獲取的是一幀視頻的壓縮數據
av_read_frame(AVFormatContext *s, AVPacket *pkt);
// 9 發送原生的壓縮數據輸入到解碼器（compressed data）
avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
// 10 接收解碼器輸出的解碼數據
avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);

2.2.2 視頻解碼的整體思路#

首先要註冊 libavformat 並且註冊所有的編解碼器、復用 / 解復用組、協議等。它是所有基於 FFmpeg 的應用程序中第一個被調用的函數，只有調用了該函數，才能正常使用 FFmpeg 的各項功能。另外，在最新版本的 FFmpeg 中目前已經可以不用加入這行代碼；

av_register_all();

打開視頻文件，提取文件中的數據流信息；

auto av_format_context = avformat_alloc_context();
avformat_open_input(&av_format_context, path_.c_str(), nullptr, nullptr);
avformat_find_stream_info(av_format_context, nullptr);

然後獲取視頻媒體流的下標，才能找到文件中的視頻媒體流；

int video_stream_index = -1;
for (int i = 0; i < av_format_context->nb_streams; i++) {
    // 匹配找到視頻媒體流的下標，
    if (av_format_context->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
        video_stream_index = i;
        LOGD(TAG, "find video stream index = %d", video_stream_index);
        break;
    }
}

獲取視頻媒體流、獲取解碼器上下文、獲取解碼器上下文、配置解碼器上下文的參數值、打開解碼器；

// 獲取視頻媒體流
auto stream = av_format_context->streams[video_stream_index];
// 找到已註冊的解碼器
auto codec = avcodec_find_decoder(stream->codecpar->codec_id);
// 獲取解碼器上下文
AVCodecContext* codec_ctx = avcodec_alloc_context3(codec);
// 將視頻媒體流的參數配置到解碼器上下文
auto ret = avcodec_parameters_to_context(codec_ctx, stream->codecpar);

if (ret >= 0) {
    // 打開解碼器
    avcodec_open2(codec_ctx, codec, nullptr);
    // ······
}

通過指定像素格式、圖像寬、圖像高來計算所需緩衝區需要的內存大小，分配設置緩衝區；並且由於是上屏繪製，因此我們需要用到 ANativeWindow，使用 ANativeWindow_setBuffersGeometry 設置此繪製窗口的屬性；

video_width_ = codec_ctx->width;
video_height_ = codec_ctx->height;

int buffer_size = av_image_get_buffer_size(AV_PIX_FMT_RGBA,
                                           video_width_, video_height_, 1);
// 輸出 buffer
out_buffer_ = (uint8_t*) av_malloc(buffer_size * sizeof(uint8_t));
// 通過設置寬高來限制緩衝區中的像素數量，而非顯示屏幕的尺寸。
// 如果緩衝區與顯示的屏幕尺寸不相符，則實際顯示的可能會是拉伸，或者被壓縮的圖像
int result = ANativeWindow_setBuffersGeometry(native_window_, video_width_,
                                              video_height_, WINDOW_FORMAT_RGBA_8888);

分配內存空間給像素格式為 RGBA 的 AVFrame，用於存放轉換成 RGBA 後的幀數據；設置 rgba_frame 緩衝區，使其與 out_buffer_ 相關聯；

auto rgba_frame = av_frame_alloc();
av_image_fill_arrays(rgba_frame->data, rgba_frame->linesize,
                     out_buffer_,
                     AV_PIX_FMT_RGBA,
                     video_width_, video_height_, 1);

獲取 SwsContext，它在調用 sws_scale() 進行圖像格式轉換和圖像縮放時會使用到。YUV420P 轉換為 RGBA 時可能會在調用 sws_scale 時格式轉換失敗而無法返回正確的高度值，原因跟調用 sws_getContext 時 flags 有關，需要將 SWS_BICUBIC 換成 SWS_FULL_CHR_H_INT | SWS_ACCURATE_RND；

struct SwsContext* data_convert_context = sws_getContext(
                    video_width_, video_height_, codec_ctx->pix_fmt,
                    video_width_, video_height_, AV_PIX_FMT_RGBA,
                    SWS_BICUBIC, nullptr, nullptr, nullptr);

分配內存空間給用於存儲原始數據的 AVFrame，指向原始幀數據；並且分配內存空間給用於存放視頻解碼前數據的 AVPacket；

auto frame = av_frame_alloc();
auto packet = av_packet_alloc();

從視頻碼流中循環讀取壓縮幀數據，然後開始解碼；

ret = av_read_frame(av_format_context, packet);
if (packet->size) {
    Decode(codec_ctx, packet, frame, stream, lock, data_convert_context, rgba_frame);
}

在 Decode() 函數中將裝有原生壓縮數據的 packet 作為輸入發送給解碼器；

/* send the packet with the compressed data to the decoder */
ret = avcodec_send_packet(codec_ctx, pkt);

解碼器返回解碼後的幀數據到指定的 frame 上，後續可對已解碼 frame 的 pts 換算為時間戳，按時間軸的顯示順序逐幀繪製到播放的畫面上；

while (ret >= 0 && !is_stop_) {
    // 返回解碼後的數據到 frame
    ret = avcodec_receive_frame(codec_ctx, frame);
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
        return;
    } else if (ret < 0) {
        return;
    }
    // 拿到當前解碼後的 frame，對其 pts 換算成時間戳，以便於跟傳入的指定時間戳進行比
    auto decode_time_ms = frame->pts * 1000 / stream->time_base.den;
    if (decode_time_ms >= time_ms_) {
        last_decode_time_ms_ = decode_time_ms;
        is_seeking_ = false;
        // ······
        // 圖片數據格式轉換
        // ······
        // 把轉換後的數據繪製到屏幕上
    }
    av_packet_unref(pkt);
}

繪製畫面之前，要進行圖片數據格式的轉換，這裡就要用到前面獲取到的 SwsContext；

// 圖片數據格式轉換
int result = sws_scale(
        sws_context,
        (const uint8_t* const*) frame->data, frame->linesize,
        0, video_height_,
        rgba_frame->data, rgba_frame->linesize);

if (result <= 0) {
    LOGE(TAG, "Player Error : data convert fail");
    return;
}

因為是上屏繪製，所以用到了 ANativeWindow 和 ANativeWindow_Buffer。在繪製畫面之前，需要使用鎖定窗口的下一個繪圖 surface 以進行繪製，然後將要顯示的幀數據寫入到緩衝區中，最後解鎖窗口的繪圖 surface，將緩衝區的數據發布到屏幕顯示上；

// 播放
result = ANativeWindow_lock(native_window_, &window_buffer_, nullptr);
if (result < 0) {
    LOGE(TAG, "Player Error : Can not lock native window");
} else {
    // 將圖像繪製到界面上
    // 注意 : 這裡 rgba_frame 一行的像素和 window_buffer 一行的像素長度可能不一致
    // 需要轉換好 否則可能花屏
    auto bits = (uint8_t*) window_buffer_.bits;
    for (int h = 0; h < video_height_; h++) {
        memcpy(bits + h * window_buffer_.stride * 4,
               out_buffer_ + h * rgba_frame->linesize[0],
               rgba_frame->linesize[0]);
    }
    ANativeWindow_unlockAndPost(native_window_);
}

以上就是主要的解碼過程。除此之外，因為 C++ 使用資源和內存空間時需要自行釋放，所以解碼結束後還需要調用釋放的接口釋放資源，以免造成內存泄漏。

sws_freeContext(data_convert_context);
av_free(out_buffer_);
av_frame_free(&rgba_frame);
av_frame_free(&frame);
av_packet_free(&packet);

avcodec_close(codec_ctx);
avcodec_free_context(&codec_ctx);

avformat_close_input(&av_format_context);
avformat_free_context(av_format_context);
ANativeWindow_release(native_window_);

2.3 簡單應用#

為了更好地理解視頻解碼的過程，這裡封裝一個視頻解碼器 VideoDecoder，解碼器初步會有以下幾個函數：

VideoDecoder(const char* path, std::function<void(long timestamp)> on_decode_frame);

void Prepare(ANativeWindow* window);

bool DecodeFrame(long time_ms);

void Release();

在這個視頻解碼器中，輸入指定時間戳後會返回解碼的這一幀數據。其中較為重要的是 DecodeFrame(long time_ms) 函數，它可以由使用者自行調用，傳入指定幀的時間戳，進而解碼對應的幀數據。此外，可以增加同步鎖以實現解碼線程和使用線程分離。

2.3.1 加入同步鎖實現視頻播放#

若只要對視頻進行解碼，是不需要使用同步等待的；

但若是要實現視頻的播放，那麼每解碼繪製完一幀就需使用鎖進行同步等待，這是因為播放視頻時需要讓解碼和繪製分離、且按照一定的時間軸順序和速度進行解碼和繪製。

condition_.wait(lock);

在上層調用 DecodeFrame 函數傳入解碼的時間戳時喚醒同步鎖，讓解碼繪製的循環繼續執行。

bool VideoDecoder::DecodeFrame(long time_ms) {
    // ······
    time_ms_ = time_ms;
    condition_.notify_all();
    return true;
}

2.3.2 播放時加入 seek_frame#

在正常播放情況下，視頻是一幀一幀逐幀解碼播放；但在拖動進度條到達指定的 seek 點的情況下，如果還是從頭到尾逐幀解碼到 seek 點的話，效率可能不太高。這時候就需要在一定規則內對 seek 點的時間戳做檢查，符合條件的直接 seek 到指定的時間戳。

FFmpeg 中的 av_seek_frame#

av_seek_frame 可以定位到關鍵幀和非關鍵幀，這取決於選擇的 flag 值。因為視頻的解碼需要依賴關鍵幀，所以一般我們需要定位到關鍵幀；

int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp,
                  int flags);

av_seek_frame 中的 flag 是用來指定尋找的 I 幀和傳入的時間戳之間的位置關係。當要 seek 已過去的時間戳時，時間戳不一定會剛好處在 I 幀的位置，但因為解碼需要依賴 I 幀，所以需要先找到此時間戳附近一個的 I 幀，此時 flag 就表明要 seek 到當前時間戳的前一個 I 幀還是後一個 I 幀；
flag 有四個選項：

flag 選項	描述
AVSEEK_FLAG_BACKWARD	第一個 Flag 是 seek 到請求的時間戳之前最近的關鍵幀。通常情況下，seek 以 ms 為單位，若指定的 ms 時間戳剛好不是關鍵幀（大幾率），會自動往回 seek 到最近的關鍵幀。雖然這種 flag 定位並不是非常精確，但能夠較好地處理掉馬賽克的問題，因為 BACKWARD 的方式會向回查找關鍵幀處，定位到關鍵幀處。
AVSEEK_FLAG_BYTE	第二個 Flag 是 seek 到文件中對應的位置（字節表示），和 AVSEEK_FLAG_FRAME 完全一致，但查找算法不同。
AVSEEK_FLAG_ANY	第三個 Flag 是可以 seek 到任意幀，不一定是關鍵幀，因此使用時可能出現花屏（馬賽克），但進度和手滑完全一致。
AVSEEK_FLAG_FRAME	第四個 Flag 是 seek 的時間戳對應 frame 序號，可以理解為向後找到最近的關鍵幀，與 BACKWARD 的方向是相反的。

flag 可能同時包含以上的多個值。比如 AVSEEK_FLAG_BACKWARD | AVSEEK_FLAG_BYTE；
FRAME 和 BACKWARD 是按幀之間的間隔推算出 seek 的目標位置，適合快進快退；BYTE 則適合大幅度滑動。

seek 的場景#

解碼時傳入的時間戳若是往前進的方向，並且超過上一幀時間戳有一定距離就需要 seek，這裡的 “一定距離” 是通過多次實驗估算所得，並非都是以下代碼中使用的 1000ms；
如果是往後退的方向且小於上一次解碼時間戳，但與上一次解碼時間戳的距離比較大（比如已超過 50ms），就要 seek 到上個關鍵幀；
使用 bool 變量 is_seeking_ 是為了防止其他干擾當前 seeking 的操作，目的是控制當前只有一個 seek 操作在進行。

if (!is_seeking_ && (time_ms_ > last_decode_time_ms_ + 1000 ||
                     time_ms_ < last_decode_time_ms_ - 50)) {
    is_seeking_ = true;
    // seek 時傳入的是指定幀帶有 time_base 的時間戳，因此要用 times_ms 進行推算
    LOGD(TAG, "seek frame time_ms_ = %ld， last_decode_time_ms_ = %ld", time_ms_,
         last_decode_time_ms_);
    av_seek_frame(av_format_context,
                  video_stream_index,
                  time_ms_ * stream->time_base.den / 1000,
                  AVSEEK_FLAG_BACKWARD);
}

插入 seek 的邏輯#

因為在解碼前要檢查是否 seek，所以要在 av_read_frame 函數（返回視頻媒體流下一幀）之前插入 seek 的邏輯，符合 seek 條件時使用 av_seek_frame 到達指定 I 幀，接著 av_read_frame 後再繼續解碼到目的時間戳的位置。

// 是否進行 seek 的邏輯寫在這
// 接下來是讀取視頻流的下一幀
int ret = av_read_frame(av_format_context, packet);

2.4 解碼過程中的細節#

2.4.1 DecodeFrame 時 seek 的條件#

使用 av_seek_frame 函數時需要指定正確的 flag，並且還要約定進行 seek 操作時的條件，否則視頻可能會出現花屏（馬賽克）。

if (!is_seeking_ && (time_ms_ > last_decode_time_ms_ + 1000 ||
                     time_ms_ < last_decode_time_ms_ - 50)) {
    is_seeking_ = true;
    av_seek_frame(···,···,···,AVSEEK_FLAG_BACKWARD);
}

2.4.2 減少解碼的次數#

在視頻解碼時，在有些條件下是可以不用對傳入時間戳的幀數據進行解碼的。比如：

當前解碼時間戳若是前進方向並且與上一次的解碼時間戳相同或者與當前正在解碼的時間戳相同，則不需要進行解碼；
當前解碼時間戳若不大於上一次的解碼時間戳並且與上一次的解碼時間戳之間的距離相差較小（比如未超過 50ms），則不需要進行解碼。

bool VideoDecoder::DecodeFrame(long time_ms) {
    LOGD(TAG, "DecodeFrame time_ms = %ld", time_ms);
    if (last_decode_time_ms_ == time_ms || time_ms_ == time_ms) {
        LOGD(TAG, "DecodeFrame last_decode_time_ms_ == time_ms");
        return false;
    }
    if (time_ms <= last_decode_time_ms_ &&
        time_ms + 50 >= last_decode_time_ms_) {
        return false;
    }
    time_ms_ = time_ms;
    condition_.notify_all();
    return true;
}

有了以上這些條件的約束後，會減少一些不必要的解碼操作。

2.4.3 使用 AVFrame 的 pts#

AVPacket 存儲解碼前的數據（編碼數據/AAC 等），保存的是解封裝之後、解碼前的數據，仍然是壓縮數據；AVFrame 存儲解碼後的數據（像素數據/RGB/PCM 等）；
AVPacket 的 pts 和 AVFrame 的 pts 意義存在差異。前者表示這個解壓包何時顯示，後者表示幀數據何時顯示；

// AVPacket 的 pts
   /**
    * Presentation timestamp in AVStream->time_base units; the time at which
    * the decompressed packet will be presented to the user.
    * Can be AV_NOPTS_VALUE if it is not stored in the file.
    * pts MUST be larger or equal to dts as presentation cannot happen before
    * decompression, unless one wants to view hex dumps. Some formats misuse
    * the terms dts and pts/cts to mean something different. Such timestamps
    * must be converted to true pts/dts before they are stored in AVPacket.
    */
   int64_t pts;

   // AVFrame 的 pts
   /**
    * Presentation timestamp in time_base units (time when frame should be shown to user).
    */
   int64_t pts;

是否將當前解碼的幀數據繪製到畫面上，取決於傳入到解碼時間戳與當前解碼器返回的已解碼幀的時間戳的比較結果。這裡不可使用 AVPacket 的 pts，它很可能不是一個遞增的時間戳；
需要進行畫面繪製的前提是：當傳入指定的解碼時間戳不大於當前已解碼 frame 的 pts 換算後的時間戳時進行畫面繪製。

auto decode_time_ms = frame->pts * 1000 / stream->time_base.den;
LOGD(TAG, "decode_time_ms = %ld", decode_time_ms);
if (decode_time_ms >= time_ms_) {
    last_decode_time_ms_ = decode_time_ms;
    is_seeking = false;
    // 畫面繪製
    // ····
}

2.4.4 解碼最後一幀時視頻已經沒有數據#

使用 av_read_frame(av_format_context, packet)返回視頻媒體流下一幀到 AVPacket 中。如果函數返回的 int 值是 0 則是 Success，如果小於 0 則是 Error 或者 EOF。

因此如果在播放視頻時返回的是小於 0 的值，調用 avcodec_flush_buffers 函數重置解碼器的狀態，flush 緩衝區中的內容，然後再 seek 到當前傳入的時間戳處，完成解碼後的回調，再讓同步鎖進行等待。

// 讀取碼流中的音頻若干幀或者視頻一幀，
// 這裡是讀取視頻一幀（完整的一幀），獲取的是一幀視頻的壓縮數據，接下來才能對其進行解碼
ret = av_read_frame(av_format_context, packet);
if (ret < 0) {
    avcodec_flush_buffers(codec_ctx);
    av_seek_frame(av_format_context, video_stream_index,
                  time_ms_ * stream->time_base.den / 1000, AVSEEK_FLAG_BACKWARD);
    LOGD(TAG, "ret < 0, condition_.wait(lock)");
    // 防止解碼最後一幀時視頻已經沒有數據
    on_decode_frame_(last_decode_time_ms_);
    condition_.wait(lock);
}

2.5 上層封裝解碼器 VideoDecoder#

如果要在上層封裝一個 VideoDecoder，只需要將 C++ 層 VideoDecoder 的接口暴露在 native-lib.cpp 中，然後上層通過 JNI 的方式調用 C++ 的接口。

比如上層要傳入指定的解碼時間戳進行解碼時，寫一個 deocodeFrame 方法，然後把時間戳傳到 C++ 層的 nativeDecodeFrame 進行解碼，而 nativeDecodeFrame 這個方法的實現就寫在 native-lib.cpp 中。

// FFmpegVideoDecoder.kt
class FFmpegVideoDecoder(
    path: String,
    val onDecodeFrame: (timestamp: Long, texture: SurfaceTexture, needRender: Boolean) -> Unit
){
    // 抽第 timeMs 幀，根据 sync 是否同步等待
    fun decodeFrame(timeMS: Long, sync: Boolean = false) {
        // 若當前不需要抽幀時不進行等待
        if (nativeDecodeFrame(decoderPtr, timeMS) && sync) {
            // ······
    } else {
            // ······
        }
    }

    private external fun nativeDecodeFrame(decoder: Long, timeMS: Long): Boolean

    companion object {
        const val TAG = "FFmpegVideoDecoder"

        init {
            System.loadLibrary("ffmmpeg")

        }
    }
}

然後在 native-lib.cpp 中調用 C++ 層 VideoDecoder 的接口 DecodeFrame，這樣就通過 JNI 的方式建立起了上層和 C++ 底層之間的聯繫

// native-lib.cpp
extern "C"
JNIEXPORT jboolean JNICALL
Java_com_example_decoder_video_FFmpegVideoDecoder_nativeDecodeFrame(JNIEnv* env,
                                                               jobject thiz,
                                                               jlong decoder,
                                                               jlong time_ms) {
    auto videoDecoder = (codec::VideoDecoder*)decoder;
    return videoDecoder->DecodeFrame(time_ms);
}

三、心得#

技術經驗

FFmpeg 編譯後與 Android 結合起來實現視頻的解碼播放，便捷性很高。
由於是用 C++ 層實現具體的解碼流程，會有學習難度，最好有一定的 C++ 基礎。

四、附錄#

C++ 封裝的 VideoDecoder

VideoDecoder.h

#include <jni.h>
#include <mutex>
#include <android/native_window.h>
#include <android/native_window_jni.h>
#include <time.h>

extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
}
#include <string>
/*
 * VideoDecoder 可用於解碼某個音視頻文件（比如.mp4）中視頻媒體流的數據。
 * Java 層傳入指定文件的路徑後，可以按一定 fps 循環傳入指定的時間戳進行解碼（抽幀），這一實現由 C++ 提供的 DecodeFrame 來完成。
 * 在每次解碼結束時，將解碼某一幀的時間戳回調給上層的解碼器，以供其他操作使用。
 */
namespace codec {
class VideoDecoder {

private:
    std::string path_;
    long time_ms_ = -1;
    long last_decode_time_ms_ = -1;
    bool is_seeking_ = false;
    ANativeWindow* native_window_ = nullptr;
    ANativeWindow_Buffer window_buffer_{};、
    // 視頻寬高屬性
    int video_width_ = 0;
    int video_height_ = 0;
    uint8_t* out_buffer_ = nullptr;
    // on_decode_frame 用於將抽取指定幀的時間戳回調給上層解碼器，以供上層解碼器進行其他操作。
    std::function<void(long timestamp)> on_decode_frame_ = nullptr;
    bool is_stop_ = false;

    // 會與在循環同步時用的鎖 “std::unique_lock<std::mutex>” 配合使用
    std::mutex work_queue_mtx;
    // 真正在進行同步等待和喚醒的屬性
    std::condition_variable condition_;
    // 解碼器真正進行解碼的函數
    void Decode(AVCodecContext* codec_ctx, AVPacket* pkt, AVFrame* frame, AVStream* stream,
                std::unique_lock<std::mutex>& lock, SwsContext* sws_context, AVFrame* pFrame);

public:
    // 新建解碼器時要傳入媒體文件路徑和一個解碼後的回調 on_decode_frame。
    VideoDecoder(const char* path, std::function<void(long timestamp)> on_decode_frame);
    // 在 JNI 層將上層傳入的 Surface 包裝後新建一個 ANativeWindow 傳入，在後面解碼後繪製幀數據時需要用到
    void Prepare(ANativeWindow* window);
    // 抽取指定時間戳的視頻幀，可由上層調用
    bool DecodeFrame(long time_ms);
    // 釋放解碼器資源
    void Release();
    // 獲取當前系統毫秒時間
    static int64_t GetCurrentMilliTime(void);
};

}

VideoDecoder.cpp

#include "VideoDecoder.h"
#include "../log/Logger.h"
#include <thread>
#include <utility>

extern "C" {
#include <libavutil/imgutils.h>
}

#define TAG "VideoDecoder"
namespace codec {

VideoDecoder::VideoDecoder(const char* path, std::function<void(long timestamp)> on_decode_frame)
        : on_decode_frame_(std::move(on_decode_frame)) {
    path_ = std::string(path);
}

void VideoDecoder::Decode(AVCodecContext* codec_ctx, AVPacket* pkt, AVFrame* frame, AVStream* stream,
                     std::unique_lock<std::mutex>& lock, SwsContext* sws_context,
                     AVFrame* rgba_frame) {

    int ret;
    /* send the packet with the compressed data to the decoder */
    ret = avcodec_send_packet(codec_ctx, pkt);
    if (ret == AVERROR(EAGAIN)) {
        LOGE(TAG,
             "Decode: Receive_frame and send_packet both returned EAGAIN, which is an API violation.");
    } else if (ret < 0) {
        return;
    }

    // read all the output frames (infile general there may be any number of them
    while (ret >= 0 && !is_stop_) {
        // 對於frame, avcodec_receive_frame內部每次都先調用
        ret = avcodec_receive_frame(codec_ctx, frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            return;
        } else if (ret < 0) {
            return;
        }
        int64_t startTime = GetCurrentMilliTime();
        LOGD(TAG, "decodeStartTime: %ld", startTime);
        // 換算當前解碼的frame時間戳
        auto decode_time_ms = frame->pts * 1000 / stream->time_base.den;
        LOGD(TAG, "decode_time_ms = %ld", decode_time_ms);
        if (decode_time_ms >= time_ms_) {
            LOGD(TAG, "decode decode_time_ms = %ld, time_ms_ = %ld", decode_time_ms, time_ms_);
            last_decode_time_ms_ = decode_time_ms;
            is_seeking_ = false;

            // 數據格式轉換
            int result = sws_scale(
                    sws_context,
                    (const uint8_t* const*) frame->data, frame->linesize,
                    0, video_height_,
                    rgba_frame->data, rgba_frame->linesize);

            if (result <= 0) {
                LOGE(TAG, "Player Error : data convert fail");
                return;
            }

            // 播放
            result = ANativeWindow_lock(native_window_, &window_buffer_, nullptr);
            if (result < 0) {
                LOGE(TAG, "Player Error : Can not lock native window");
            } else {
                // 將圖像繪製到界面上
                auto bits = (uint8_t*) window_buffer_.bits;
                for (int h = 0; h < video_height_; h++) {
                    memcpy(bits + h * window_buffer_.stride * 4,
                           out_buffer_ + h * rgba_frame->linesize[0],
                           rgba_frame->linesize[0]);
                }
                ANativeWindow_unlockAndPost(native_window_);
            }
            on_decode_frame_(decode_time_ms);
            int64_t endTime = GetCurrentMilliTime();
            LOGD(TAG, "decodeEndTime - decodeStartTime: %ld", endTime - startTime);
            LOGD(TAG, "finish decode frame");
            condition_.wait(lock);
        }
        // 主要作用是清理AVPacket中的所有空間數據，清理完畢後進行初始化操作，並且將 data 與 size 置為0，方便下次調用。
        // 釋放 packet 引用
        av_packet_unref(pkt);
    }
}

void VideoDecoder::Prepare(ANativeWindow* window) {
    native_window_ = window;
    av_register_all();
    auto av_format_context = avformat_alloc_context();
    avformat_open_input(&av_format_context, path_.c_str(), nullptr, nullptr);
    avformat_find_stream_info(av_format_context, nullptr);
    int video_stream_index = -1;
    for (int i = 0; i < av_format_context->nb_streams; i++) {
        // 找到視頻媒體流的下標
        if (av_format_context->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            video_stream_index = i;
            LOGD(TAG, "find video stream index = %d", video_stream_index);
            break;
        }
    }

    // run once
    do {
        if (video_stream_index == -1) {
            codec::LOGE(TAG, "Player Error : Can not find video stream");
            break;
        }
        std::unique_lock<std::mutex> lock(work_queue_mtx);

        // 獲取視頻媒體流
        auto stream = av_format_context->streams[video_stream_index];
        // 找到已註冊的解碼器
        auto codec = avcodec_find_decoder(stream->codecpar->codec_id);
        // 獲取解碼器上下文
        AVCodecContext* codec_ctx = avcodec_alloc_context3(codec);
        auto ret = avcodec_parameters_to_context(codec_ctx, stream->codecpar);

        if (ret >= 0) {
            // 打開
            avcodec_open2(codec_ctx, codec, nullptr);
            // 解碼器打開後才有寬高的值
            video_width_ = codec_ctx->width;
            video_height_ = codec_ctx->height;

            AVFrame* rgba_frame = av_frame_alloc();
            int buffer_size = av_image_get_buffer_size(AV_PIX_FMT_RGBA, video_width_, video_height_,
                                                       1);
            // 分配內存空間給輸出 buffer
            out_buffer_ = (uint8_t*) av_malloc(buffer_size * sizeof(uint8_t));
            av_image_fill_arrays(rgba_frame->data, rgba_frame->linesize, out_buffer_,
                                 AV_PIX_FMT_RGBA,
                                 video_width_, video_height_, 1);

            // 通過設置寬高限制緩衝區中的像素數量，而非屏幕的物理顯示尺寸。
            // 如果緩衝區與物理屏幕的顯示尺寸不相符，則實際顯示可能會是拉伸，或者被壓縮的圖像
            int result = ANativeWindow_setBuffersGeometry(native_window_, video_width_,
                                                          video_height_, WINDOW_FORMAT_RGBA_8888);
            if (result < 0) {
                LOGE(TAG, "Player Error : Can not set native window buffer");
                avcodec_close(codec_ctx);
                avcodec_free_context(&codec_ctx);
                av_free(out_buffer_);
                break;
            }

            auto frame = av_frame_alloc();
            auto packet = av_packet_alloc();

            struct SwsContext* data_convert_context = sws_getContext(
                    video_width_, video_height_, codec_ctx->pix_fmt,
                    video_width_, video_height_, AV_PIX_FMT_RGBA,
                    SWS_BICUBIC, nullptr, nullptr, nullptr);
            while (!is_stop_) {
                LOGD(TAG, "front seek time_ms_ = %ld, last_decode_time_ms_ = %ld", time_ms_,
                     last_decode_time_ms_);
                if (!is_seeking_ && (time_ms_ > last_decode_time_ms_ + 1000 ||
                                     time_ms_ < last_decode_time_ms_ - 50)) {
                    is_seeking_ = true;
                    LOGD(TAG, "seek frame time_ms_ = %ld， last_decode_time_ms_ = %ld", time_ms_,
                         last_decode_time_ms_);
                    // 傳進去的是指定幀帶有 time_base 的時間戳，所以是要將原來的 times_ms 按照上面獲取時的計算方式反推算出時間戳
                    av_seek_frame(av_format_context, video_stream_index,
                                  time_ms_ * stream->time_base.den / 1000, AVSEEK_FLAG_BACKWARD);
                }
                // 讀取視頻一幀（完整的一幀），獲取的是一幀視頻的壓縮數據，接下來才能對其進行解碼
                ret = av_read_frame(av_format_context, packet);
                if (ret < 0) {
                    avcodec_flush_buffers(codec_ctx);
                    av_seek_frame(av_format_context, video_stream_index,
                                  time_ms_ * stream->time_base.den / 1000, AVSEEK_FLAG_BACKWARD);
                    LOGD(TAG, "ret < 0, condition_.wait(lock)");
                    // 防止解碼最後一幀時視頻已經沒有數據
                    on_decode_frame_(last_decode_time_ms_);
                    condition_.wait(lock);
                }
                if (packet->size) {
                    Decode(codec_ctx, packet, frame, stream, lock, data_convert_context,
                           rgba_frame);
                }
            }
            // 釋放資源
            sws_freeContext(data_convert_context);
            av_free(out_buffer_);
            av_frame_free(&rgba_frame);
            av_frame_free(&frame);
            av_packet_free(&packet);

        }
        avcodec_close(codec_ctx);
        avcodec_free_context(&codec_ctx);

    } while (false);
    avformat_close_input(&av_format_context);
    avformat_free_context(av_format_context);
    ANativeWindow_release(native_window_);
    delete this;
}

bool VideoDecoder::DecodeFrame(long time_ms) {
    LOGD(TAG, "DecodeFrame time_ms = %ld", time_ms);
    if (last_decode_time_ms_ == time_ms || time_ms_ == time_ms) {
        LOGD(TAG, "DecodeFrame last_decode_time_ms_ == time_ms");
        return false;
    }
    if (last_decode_time_ms_ >= time_ms && last_decode_time_ms_ <= time_ms + 50) {
        return false;
    }
    time_ms_ = time_ms;
    condition_.notify_all();
}

void VideoDecoder::Release() {
    is_stop_ = true;
    condition_.notify_all();
}

/**
 * 獲取當前的毫秒級時間
 */
int64_t VideoDecoder::GetCurrentMilliTime(void) {
    struct timeval tv{};
    gettimeofday(&tv, nullptr);
    return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
}

}