最近项目里有一个奇怪的需求:为编码完成后的AAC文件添加ID3v2 header。这个ID3v2是个什么东西呢?简单的说就是一系列元数据,里面存储了一些跟歌曲相关的信息(比如:演唱者、歌曲名、备注等等),比如说我在网络上下载了一首歌曲,在windows的文件浏览器下有时就能看到这首歌曲的一些元数据信息(如果有的话),而这些信息就是存储在ID3v2 header中的。
一般网络上大部分的MP3文件都带有这个东西,利用FFmpeg就可以提取到这些信息。我遇到的场景比较特殊,因为公司的解码器目前只支持播放AAC格式的,所以需要把任意格式转成AAC,但问题就出在这儿,我用FFmpeg把源文件转成AAC格式后,源文件里的元数据被FFmpeg剔除了,也就是说我转码后的AAC文件是不包含ID3v2 header的,而公司又想往AAC文件里面加这个东西,没办法,只能硬着头皮去做了。
最开始我想找一个开源的库去做这件事情,后来看了下ID3v2的标准格式以及公司要求写入的内容,觉得并不是那么麻烦,就自己动手撸一个了。
关于ID3v2 的标准格式可参考:ID3 tag version 2.4.0 - Main Structure, 或者是ID3的官网,里面介绍得都比较详细,就不再赘述,我们公司要求写入的内容主要包含以下两部分:
1. ID3 Header
2. ID3 Frame
其中的Frame ID 一共要写5组:分别是“TONF”、“TIT2”、“TALB”、“TPE1”、“TXXX”,用以分别标识:原始文件名、歌曲名、专辑名、演唱者、AAC的格式类型。大概要求就是这样,下面是具体的Code:
ID3v2.h
#ifndef AUDIOCONVERT_ID3V2HEADER_H #define AUDIOCONVERT_ID3V2HEADER_H typedef enum { AAC_LATM_NORMAL, AAC_ADTS, AAC_RAW_DATA, AAC_LATM_SIMPLE, } AAC_TYPE; typedef struct __attribute__((__packed__)) ID3_HEADER { uint8_t id3_Identifier[3]; uint8_t id3_Version; uint8_t id3_Revision; uint8_t id3_Flags; uint32_t id3_Size; } ID3_HEADER_t; typedef struct __attribute__((__packed__)) ID3_FRAME { uint8_t frame_ID[4]; uint32_t frame_Size; uint16_t frame_Flags; uint8_t frame_content[0]; } ID3_FRAME_t; typedef struct __attribute__((__packed__)) BBPRO_AAC_AUDIO_HEADER { ID3_HEADER_t id_header; uint8_t id3_frames[0]; } BBPRO_AAC_AUDIO_HEADER_t; extern size_t generate_bbpro_audio_header(uint8_t **out_audio_header, const char *origin_file_name, size_t origin_file_name_len, const char *song_name, size_t song_name_len, const char *album_name, size_t album_name_len, const char *singer_name, size_t singer_name_len, AAC_TYPE aac_type ); #endif //AUDIOCONVERT_ID3V2HEADER_H
ID3v2.cpp
// // Created by nisha_chen on 2020/11/24. // #include <stdint.h> #include <malloc.h> #include <string.h> #include <arpa/inet.h> #include "BBProID3v2Header.h" #include "SLLog.h" char m_id3_header[] = "ID3"; #define DEFAULT_FRAME_FLAGS 0 #define EXTEND_FRAME_PADDING 0x080 /** * Generate id3v2 tag header information according to the input parameters. * * @param out_audio_header [out]param Output tag header pointer, Released by the caller * @param origin_file_name Original file name. File name before transcoding * @param origin_file_name_len The length of the original file name * @param song_name Song name * @param song_name_len The length of the song name * @param album_name Album name * @param album_name_len Length of album name * @param singer_name Author name * @param singer_name_len Length of author name * @param aac_type AAC type after transcoding * @return The length of the generated id3v2 label */ size_t generate_bbpro_audio_header(uint8_t **out_audio_header, const char *origin_file_name, size_t origin_file_name_len, const char *song_name, size_t song_name_len, const char *album_name, size_t album_name_len, const char *singer_name, size_t singer_name_len, AAC_TYPE aac_type ) { /* ID3 Header */ ID3_HEADER_t id3_header; // Identifier id3_header.id3_Identifier[0] = 'I'; id3_header.id3_Identifier[1] = 'D'; id3_header.id3_Identifier[2] = '3'; // Version id3_header.id3_Version = 0x03; // Revision id3_header.id3_Revision = 0x00; // Flags id3_header.id3_Flags = 0; // Size, wait fill value /* TONF Frame (Origin File Name, File name before transcoding) */ uint32_t origin_file_name_frame_size; if (origin_file_name) { origin_file_name_frame_size = sizeof(ID3_FRAME_t) + origin_file_name_len; } else { origin_file_name_frame_size = sizeof(ID3_FRAME_t); } ID3_FRAME_t *pOriginFileNameFrame = (ID3_FRAME_t *) malloc(origin_file_name_frame_size); memset(pOriginFileNameFrame, 0, origin_file_name_frame_size); // Frame ID pOriginFileNameFrame->frame_ID[0] = 'T'; pOriginFileNameFrame->frame_ID[1] = 'O'; pOriginFileNameFrame->frame_ID[2] = 'N'; pOriginFileNameFrame->frame_ID[3] = 'F'; // Frame Size pOriginFileNameFrame->frame_Size = htonl(origin_file_name_len); // Frame Flag pOriginFileNameFrame->frame_Flags = DEFAULT_FRAME_FLAGS; // Frame content if (origin_file_name) { memcpy(pOriginFileNameFrame->frame_content, origin_file_name, origin_file_name_len); } /* TIT2 Frame (Name of this song) */ uint32_t song_name_frame_size; if (song_name) { song_name_frame_size = sizeof(ID3_FRAME_t) + song_name_len; } else { song_name_frame_size = sizeof(ID3_FRAME_t); } ID3_FRAME_t *pSongNameFrame = (ID3_FRAME_t *) malloc(song_name_frame_size); memset(pSongNameFrame, 0, song_name_frame_size); // Frame ID pSongNameFrame->frame_ID[0] = 'T'; pSongNameFrame->frame_ID[1] = 'I'; pSongNameFrame->frame_ID[2] = 'T'; pSongNameFrame->frame_ID[3] = '2'; // Frame Size pSongNameFrame->frame_Size = htonl(song_name_len); // Frame Flag pSongNameFrame->frame_Flags = DEFAULT_FRAME_FLAGS; // Frame Content if (song_name) { memcpy(pSongNameFrame->frame_content, song_name, song_name_len); } /* TALB Frame (Album name) */ uint32_t album_name_frame_size; if (album_name) { album_name_frame_size = sizeof(ID3_FRAME_t) + album_name_len; } else { album_name_frame_size = sizeof(ID3_FRAME_t); } ID3_FRAME_t *pAlbumNameFrame = (ID3_FRAME_t *) malloc(album_name_frame_size); memset(pAlbumNameFrame, 0, album_name_frame_size); // Frame ID pAlbumNameFrame->frame_ID[0] = 'T'; pAlbumNameFrame->frame_ID[1] = 'A'; pAlbumNameFrame->frame_ID[2] = 'L'; pAlbumNameFrame->frame_ID[3] = 'B'; // Frame Size pAlbumNameFrame->frame_Size = htonl(album_name_len); // Frame Flag pAlbumNameFrame->frame_Flags = DEFAULT_FRAME_FLAGS; // Frame Content if (album_name) { memcpy(pAlbumNameFrame->frame_content, album_name, album_name_len); } /* TPE1 Frame (Author Name) */ uint32_t author_name_frame_size; if (singer_name) { author_name_frame_size = sizeof(ID3_FRAME_t) + singer_name_len; } else { author_name_frame_size = sizeof(ID3_FRAME_t); } ID3_FRAME_t *pAuthorNameFrame = (ID3_FRAME_t *) malloc(author_name_frame_size); memset(pAuthorNameFrame, 0, author_name_frame_size); // Frame ID pAuthorNameFrame->frame_ID[0] = 'T'; pAuthorNameFrame->frame_ID[1] = 'P'; pAuthorNameFrame->frame_ID[2] = 'E'; pAuthorNameFrame->frame_ID[3] = '1'; // Frame Size pAuthorNameFrame->frame_Size = htonl(singer_name_len); // Frame Flag pAuthorNameFrame->frame_Flags = DEFAULT_FRAME_FLAGS; // Frame Content if (singer_name) { memcpy(pAuthorNameFrame->frame_content, singer_name, singer_name_len); } /* TXXX Frame (AAC Type, See: enum AAC_TYPE)*/ uint32_t aac_type_frame_size = sizeof(ID3_FRAME_t) + sizeof(char); ID3_FRAME_t *pAacTypeFrame = (ID3_FRAME_t *) malloc(aac_type_frame_size); memset(pAacTypeFrame, 0, aac_type_frame_size); // Frame ID pAacTypeFrame->frame_ID[0] = 'T'; pAacTypeFrame->frame_ID[1] = 'X'; pAacTypeFrame->frame_ID[2] = 'X'; pAacTypeFrame->frame_ID[3] = 'X'; // Frame Size pAacTypeFrame->frame_Size = htonl(sizeof(char)); // Frame Flag pAacTypeFrame->frame_Flags = DEFAULT_FRAME_FLAGS; // Frame Content *(pAacTypeFrame->frame_content) = aac_type; // Calculate the total size of all frames uint32_t total_frame_size = origin_file_name_frame_size + song_name_frame_size + album_name_frame_size + author_name_frame_size + aac_type_frame_size; uint8_t bitMask = (total_frame_size & 0x080) >> 7; XLOGW("ID3v2: total frame size: %u", total_frame_size); if (bitMask == 1) { total_frame_size += EXTEND_FRAME_PADDING; XLOGW("ID3v2: The highest bit of frame size's is 1, which expands to: %u", total_frame_size); } // Update the total size of all frames in the id3 label header // id3_header.id3_Size = total_frame_size; // Note!!! The byte order defined here is big endian id3_header.id3_Size = htonl(total_frame_size); // Create audio header // The main focus is on the Size field. Size has a total of 4 bytes, and the highest bit // of each byte is set to 0, so a total of 7*4 bits are used to indicate the size. // Here I fixed the length to 512 bytes uint32_t audio_header_total_size; audio_header_total_size = sizeof(BBPRO_AAC_AUDIO_HEADER_t) + total_frame_size; BBPRO_AAC_AUDIO_HEADER_t *pAudioHeader = (BBPRO_AAC_AUDIO_HEADER_t *) malloc(audio_header_total_size); memset(pAudioHeader, 0, audio_header_total_size); // Copy ID3v2 Header Info pAudioHeader->id_header = id3_header; // Copy Frame Info uint8_t *pOffset = pAudioHeader->id3_frames; memcpy(pOffset, pOriginFileNameFrame, origin_file_name_frame_size); pOffset += origin_file_name_frame_size; memcpy(pOffset, pSongNameFrame, song_name_frame_size); pOffset += song_name_frame_size; memcpy(pOffset, pAlbumNameFrame, album_name_frame_size); pOffset += album_name_frame_size; memcpy(pOffset, pAuthorNameFrame, author_name_frame_size); pOffset += author_name_frame_size; memcpy(pOffset, pAacTypeFrame, aac_type_frame_size); pOffset += aac_type_frame_size; *out_audio_header = (uint8_t *) pAudioHeader; free(pOriginFileNameFrame); free(pSongNameFrame); free(pAlbumNameFrame); free(pAuthorNameFrame); free(pAacTypeFrame); return audio_header_total_size; }
主要code大概就这么多,主要是 generate_bbpro_audio_header 这个函数,我这里把“原始文件名”、“歌曲名”呀等一些信息作为参数传递进来了,如果你用的也是FFmpeg,这些元数据信息可以在解码前通过FFmpeg提取出来,详情可参考:
https://github.com/FFmpeg/FFmpeg/blob/master/doc/examples/metadata.c
我写的这个函数返回的是ID3v2 header的一个头指针,返回值就是这个指针指向的数据的长度,另外,还需要注意一点的就是 ID3v2 中的各个size都必须是大端格式,这个是标准规定的:
The bitorder in ID3v2 is most significant bit first (MSB). The byteorder in multibyte numbers is most significant byte first (e.g. $12345678 would
be encoded $12 34 56 78), also known as big endian and network byte order.
如果是小端的话将来可能无法播放。
利用这个函数生成ID3v2 header后,可以把这个指针指向的数据手动写入到AAC文件的开头:
最后再用VLC 验证一下能不能正常播放就可以了,一般而言,如果把最终的AAC文件拖入的VLC里面也能够正常播放,就说明写入没有问题,我试了几款播放器,虽然是给AAC手动加的ID3V2头,但是并不影响播放。