Merge pull request #12 from spicyjpeg/loop-points

Add support for audio loop point parsing
This commit is contained in:
Adrian "asie" Siekierka
2025-12-05 09:03:01 +01:00
committed by GitHub
9 changed files with 259 additions and 73 deletions

View File

@@ -71,23 +71,43 @@ Notes:
authoring tool capable of rebuilding the EDC/ECC data (as it is dependent on authoring tool capable of rebuilding the EDC/ECC data (as it is dependent on
the file's absolute location on the disc) and generating a Mode 2 CD-ROM image the file's absolute location on the disc) and generating a Mode 2 CD-ROM image
with "native" 2352-byte sectors. with "native" 2352-byte sectors.
- Similarly, files generated with `-t xa` or `-t xacd` **must be interleaved** - Similarly, files generated with `-t xa` or `-t xacd` **must be interleaved**
**with other XA-ADPCM tracks or empty padding using an external tool** before **with other XA-ADPCM tracks or empty padding using an external tool** before
they can be played. they can be played.
- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .vag
header at the beginning of the file. The header is always 48 bytes long for - `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a
`vag` files, while in the case of `vagi` files it is padded to the size [.vag header](https://psx-spx.consoledev.net/cdromfileformats/#cdrom-file-audio-single-samples-vag-sony)
specified using the `-a` option (2048 bytes by default). Note that `vagi` at the beginning of the file. The header is always 48 bytes long for `vag`
files with more than 2 channels and/or alignment other than 2048 bytes are not files, while in the case of `vagi` files it is padded to the size specified
standardized. using the `-a` option (2048 bytes by default). The `vagi` format extends the
header with the following fields:
- the file's interleave size at offset `0x08-0x0B` (little endian);
- the loop start offset in bytes-per-channel, if any, at offset `0x14-0x17`
(big endian). *Note that this field is specific to psxavenc and not part of*
*the standard interleaved .vag header*;
- the file's channel count at offset `0x1E`. *This field is not part of the*
*interleaved .vag header either, but can be found in other variants of the*
*format.*
- The `spu` and `vag` formats support encoding a loop point as part of the ADPCM
data, while `vagi` supports storing one in the header for use by the stream
driver. If the input file is either a .wav file with sampler metadata (`smpl`
chunk) or in a format FFmpeg supports parsing cue/chapter markers from, the
first marker will be used as the loop point by default. The `-l` and `-n`
options can be used to manually set a loop point or ignore the one present in
the input file respectively.
- ~~The `strspu` format encodes the input file's audio track as a series of~~ - ~~The `strspu` format encodes the input file's audio track as a series of~~
~~custom .str chunks (type ID `0x0001` by default) holding interleaved~~ ~~custom .str chunks (type ID `0x0001` by default) holding interleaved~~
~~SPU-ADPCM data in the same format as `spui`, rather than XA-ADPCM. As .str~~ ~~SPU-ADPCM data in the same format as `spui`, rather than XA-ADPCM. As .str~~
~~chunks do not require custom XA subheaders, a file with standard 2048-byte~~ ~~chunks do not require custom XA subheaders, a file with standard 2048-byte~~
~~sectors that does not need any special handling will be generated.~~ *This* ~~sectors that does not need any special handling will be generated.~~ *This*
*format has not yet been implemented.* *format has not yet been implemented.*
- The `strv` format disables audio altogether and is equivalent to `strspu` on - The `strv` format disables audio altogether and is equivalent to `strspu` on
an input file with no audio track. an input file with no audio track.
- The `sbs` format (used in some System 573 games) consists of a series of - The `sbs` format (used in some System 573 games) consists of a series of
concatenated BS frames, each padded to the size specified by the `-a` option concatenated BS frames, each padded to the size specified by the `-a` option
(the default setting is 8192 bytes), with no additional headers besides the BS (the default setting is 8192 bytes), with no additional headers besides the BS

View File

@@ -384,11 +384,9 @@ int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_
uint8_t *last_block = output + length - PSX_AUDIO_SPU_BLOCK_SIZE; uint8_t *last_block = output + length - PSX_AUDIO_SPU_BLOCK_SIZE;
if (loop_start < 0) { if (loop_start < 0) {
last_block[1] |= PSX_AUDIO_SPU_LOOP_END;
// Insert trailing looping block // Insert trailing looping block
memset(output + length, 0, PSX_AUDIO_SPU_BLOCK_SIZE); memset(output + length, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
output[length + 1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; output[length + 1] = PSX_AUDIO_SPU_LOOP_TRAP;
length += PSX_AUDIO_SPU_BLOCK_SIZE; length += PSX_AUDIO_SPU_BLOCK_SIZE;
} else { } else {

View File

@@ -62,9 +62,12 @@ typedef struct {
} psx_audio_encoder_state_t; } psx_audio_encoder_state_t;
enum { enum {
PSX_AUDIO_SPU_LOOP_END = 1 << 0, PSX_AUDIO_SPU_LOOP_END = (1 << 0),
PSX_AUDIO_SPU_LOOP_REPEAT = 3 << 0, PSX_AUDIO_SPU_LOOP_REPEAT = (1 << 0) | (1 << 1),
PSX_AUDIO_SPU_LOOP_START = 1 << 2 // Some old tools will not recognize loop start points if bit 1 is not set
// in addition to bit 2. Real hardware does not care.
PSX_AUDIO_SPU_LOOP_START = (1 << 1) | (1 << 2),
PSX_AUDIO_SPU_LOOP_TRAP = (1 << 0) | (1 << 2)
}; };
uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int sample_count); uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int sample_count);

View File

@@ -270,13 +270,14 @@ static int parse_xa_option(args_t *args, char option, const char *param) {
static const char *const spu_options_help = static const char *const spu_options_help =
"Mono SPU-ADPCM options:\n" "Mono SPU-ADPCM options:\n"
" [-f freq] [-a size] [-l ms | -L] [-D]\n" " [-f freq] [-a size] [-l ms | -n | -L] [-D]\n"
"\n" "\n"
" -f freq Use specified sample rate (default 44100)\n" " -f freq Use specified sample rate (default 44100)\n"
" -a size Pad audio data excluding header to multiple of given size (default 64)\n" " -a size Pad audio data excluding header to multiple of given size (default 64)\n"
" -l ms Add loop point at specified offset (in milliseconds)\n" " -l ms Add loop point at specified timestamp (in milliseconds, overrides any loop point present in input file)\n"
" -L Set loop end flag at the end of data but do not add a loop point\n" " -n Do not set loop end flag nor add a loop point (even if input file has one)\n"
" -D Do not prepend encoded data with a dummy silent block\n" " -L Set ADPCM loop end flag at end of data but do not add a loop point (even if input file has one)\n"
" -D Do not prepend encoded data with a dummy silent block to reset decoder state\n"
"\n"; "\n";
static int parse_spu_option(args_t *args, char option, const char *param) { static int parse_spu_option(args_t *args, char option, const char *param) {
@@ -288,11 +289,17 @@ static int parse_spu_option(args_t *args, char option, const char *param) {
return parse_int(&(args->alignment), "alignment", param, 1, -1); return parse_int(&(args->alignment), "alignment", param, 1, -1);
case 'l': case 'l':
args->flags |= FLAG_SPU_LOOP_END; args->flags |= FLAG_OVERRIDE_LOOP_POINT | FLAG_SPU_ENABLE_LOOP;
return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1); return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1);
case 'n':
args->flags |= FLAG_OVERRIDE_LOOP_POINT;
args->audio_loop_point = -1;
return 1;
case 'L': case 'L':
args->flags |= FLAG_SPU_LOOP_END; args->flags |= FLAG_OVERRIDE_LOOP_POINT | FLAG_SPU_ENABLE_LOOP;
args->audio_loop_point = -1;
return 1; return 1;
case 'D': case 'D':
@@ -306,15 +313,16 @@ static int parse_spu_option(args_t *args, char option, const char *param) {
static const char *const spui_options_help = static const char *const spui_options_help =
"Interleaved SPU-ADPCM options:\n" "Interleaved SPU-ADPCM options:\n"
" [-f freq] [-c channels] [-i size] [-a size] [-L] [-D]\n" " [-f freq] [-c channels] [-i size] [-a size] [-l ms | -n] [-L] [-D]\n"
"\n" "\n"
" -f freq Use specified sample rate (default 44100)\n" " -f freq Use specified sample rate (default 44100)\n"
" -c channels Use specified channel count (default 2)\n" " -c channels Use specified channel count (default 2)\n"
" -i size Use specified channel interleave size (default 2048)\n" " -i size Use specified channel interleave size (default 2048)\n"
" -a size Pad .vag header and each audio chunk to multiples of given size\n" " -a size Pad .vag header and each audio chunk to multiples of given size (default 2048)\n"
" (default 2048)\n" " -l ms Store specified timestamp in file header as loop point (in milliseconds, overrides any loop point present in input file)\n"
" -L Set loop end flag at the end of each audio chunk\n" " -n Do not store any loop point in file header (even if input file has one)\n"
" -D Do not prepend first chunk's data with a dummy silent block\n" " -L Set ADPCM loop end flag at the end of each audio chunk (separately from loop point in file header)\n"
" -D Do not prepend first chunk's data with a dummy silent block to reset decoder state\n"
"\n"; "\n";
static int parse_spui_option(args_t *args, char option, const char *param) { static int parse_spui_option(args_t *args, char option, const char *param) {
@@ -337,8 +345,17 @@ static int parse_spui_option(args_t *args, char option, const char *param) {
case 'a': case 'a':
return parse_int(&(args->alignment), "alignment", param, 1, -1); return parse_int(&(args->alignment), "alignment", param, 1, -1);
case 'l':
args->flags |= FLAG_OVERRIDE_LOOP_POINT;
return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1);
case 'n':
args->flags |= FLAG_OVERRIDE_LOOP_POINT;
args->audio_loop_point = -1;
return 1;
case 'L': case 'L':
args->flags |= FLAG_SPU_LOOP_END; args->flags |= FLAG_SPU_ENABLE_LOOP;
return 1; return 1;
case 'D': case 'D':
@@ -358,8 +375,7 @@ static const char *const bs_options_help =
" v2: MDEC BS v2 (default)\n" " v2: MDEC BS v2 (default)\n"
" v3: MDEC BS v3\n" " v3: MDEC BS v3\n"
" v3dc: MDEC BS v3, expect decoder to wrap DC coefficients\n" " v3dc: MDEC BS v3, expect decoder to wrap DC coefficients\n"
" -s WxH Rescale input file to fit within specified size\n" " -s WxH Rescale input file to fit within specified size (16x16-640x512 in 16-pixel increments, default 320x240)\n"
" (16x16-640x512 in 16-pixel increments, default 320x240)\n"
" -I Force stretching to given size without preserving aspect ratio\n" " -I Force stretching to given size without preserving aspect ratio\n"
"\n"; "\n";
@@ -422,8 +438,7 @@ static const char *const str_options_help =
" -x 1|2 Set CD-ROM speed the file is meant to played at (default 2)\n" " -x 1|2 Set CD-ROM speed the file is meant to played at (default 2)\n"
" -T id Tag video sectors with specified .str type ID (default 0x8001)\n" " -T id Tag video sectors with specified .str type ID (default 0x8001)\n"
" -A id Tag SPU-ADPCM sectors with specified .str type ID (default 0x0001)\n" " -A id Tag SPU-ADPCM sectors with specified .str type ID (default 0x0001)\n"
" -X Place audio sectors after corresponding video sectors\n" " -X Place audio sectors after corresponding video sectors rather than ahead of them\n"
" (rather than ahead of them)\n"
"\n"; "\n";
static int parse_str_option(args_t *args, char option, const char *param) { static int parse_str_option(args_t *args, char option, const char *param) {

View File

@@ -35,10 +35,11 @@ enum {
FLAG_HIDE_PROGRESS = 1 << 2, FLAG_HIDE_PROGRESS = 1 << 2,
FLAG_PRINT_HELP = 1 << 3, FLAG_PRINT_HELP = 1 << 3,
FLAG_PRINT_VERSION = 1 << 4, FLAG_PRINT_VERSION = 1 << 4,
FLAG_SPU_LOOP_END = 1 << 5, FLAG_OVERRIDE_LOOP_POINT = 1 << 5,
FLAG_SPU_NO_LEADING_DUMMY = 1 << 6, FLAG_SPU_ENABLE_LOOP = 1 << 6,
FLAG_BS_IGNORE_ASPECT = 1 << 7, FLAG_SPU_NO_LEADING_DUMMY = 1 << 7,
FLAG_STR_TRAILING_AUDIO = 1 << 8 FLAG_BS_IGNORE_ASPECT = 1 << 8,
FLAG_STR_TRAILING_AUDIO = 1 << 9
}; };
typedef enum { typedef enum {

View File

@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg Copyright (c) 2023, 2025 spicyjpeg
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
@@ -23,7 +23,10 @@ freely, subject to the following restrictions:
*/ */
#include <assert.h> #include <assert.h>
#include <math.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@@ -36,6 +39,77 @@ freely, subject to the following restrictions:
#include "args.h" #include "args.h"
#include "decoding.h" #include "decoding.h"
enum {
LOOP_TYPE_FORWARD,
LOOP_TYPE_PING_PONG,
LOOP_TYPE_BACKWARD
};
// HACK: FFmpeg does not parse "smpl" chunks out of .wav files on its own, so a
// minimal RIFF chunk parser needs to be implemented here. (It does however
// parse "cue" chunk entries as chapters; if no "smpl" chunk is found, the
// file's first chapter if any is used as a loop point by default.)
static int parse_wav_loop_point(AVIOContext *pb, const args_t *args) {
if (!pb->seekable) {
if (!(args->flags & FLAG_QUIET))
fprintf(stderr, "Warning: input file is not seekable, cannot parse loop points\n");
return -1;
}
int64_t saved_file_pos = avio_tell(pb);
int start_offset = -1;
if (avio_seek(pb, 0, SEEK_SET) != 0)
return -1;
avio_rl32(pb); // "RIFF" magic
avio_rl32(pb); // File size
avio_rl32(pb); // "WAVE" magic
while (!avio_feof(pb)) {
uint32_t chunk_type = avio_rl32(pb);
uint32_t chunk_size = avio_rl32(pb);
if (chunk_type != MKTAG('s', 'm', 'p', 'l') || chunk_size < (sizeof(uint32_t) * 9)) {
avio_skip(pb, chunk_size);
continue;
}
avio_rl32(pb); // Manufacturer ID
avio_rl32(pb); // Product ID
avio_rl32(pb); // Sample period (ns)
avio_rl32(pb); // MIDI unity note number
avio_rl32(pb); // MIDI pitch fraction
avio_rl32(pb); // SMPTE format
avio_rl32(pb); // SMPTE offset
uint32_t loop_count = avio_rl32(pb);
avio_rl32(pb); // Additional data size
if (loop_count == 0)
break;
if (loop_count > 1 && !(args->flags & FLAG_QUIET))
fprintf(stderr, "Warning: input file has %d loop points, using first one\n", (int)loop_count);
avio_rl32(pb); // Loop ID
uint32_t loop_type = avio_rl32(pb);
start_offset = (int)avio_rl32(pb);
avio_rl32(pb); // End offset
avio_rl32(pb); // Sample fraction
uint32_t play_count = avio_rl32(pb);
if (!(args->flags & FLAG_QUIET)) {
if (loop_type != LOOP_TYPE_FORWARD)
fprintf(stderr, "Warning: treating %s loop as forward loop\n", (loop_type == LOOP_TYPE_PING_PONG) ? "ping-pong" : "backward");
if (play_count != 0)
fprintf(stderr, "Warning: treating loop repeating %d times as endless loop\n", (int)play_count);
}
break;
}
avio_seek(pb, saved_file_pos, SEEK_SET);
return start_offset;
}
static bool decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { static bool decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
if (packet != NULL) { if (packet != NULL) {
if (avcodec_send_packet(codec, packet) != 0) if (avcodec_send_packet(codec, packet) != 0)
@@ -152,10 +226,11 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
layout.order = AV_CHANNEL_ORDER_UNSPEC; layout.order = AV_CHANNEL_ORDER_UNSPEC;
} }
if (!(args->flags & FLAG_QUIET)) { if (
if (args->audio_channels > av->audio_codec_context->ch_layout.nb_channels) args->audio_channels > av->audio_codec_context->ch_layout.nb_channels &&
fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels); !(args->flags & FLAG_QUIET)
} )
fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels);
av->sample_count_mul = args->audio_channels; av->sample_count_mul = args->audio_channels;
@@ -191,13 +266,11 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) if (avcodec_open2(av->video_codec_context, codec, NULL) < 0)
return false; return false;
if (!(args->flags & FLAG_QUIET)) { if (
if ( (decoder->video_width > av->video_codec_context->width || decoder->video_height > av->video_codec_context->height) &&
decoder->video_width > av->video_codec_context->width || !(args->flags & FLAG_QUIET)
decoder->video_height > av->video_codec_context->height )
) fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);
fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);
}
if (!(args->flags & FLAG_BS_IGNORE_ASPECT)) { if (!(args->flags & FLAG_BS_IGNORE_ASPECT)) {
// Reduce the provided size so that it matches the input file's // Reduce the provided size so that it matches the input file's
@@ -205,11 +278,10 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height; double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height;
double dst_ratio = (double)decoder->video_width / (double)decoder->video_height; double dst_ratio = (double)decoder->video_width / (double)decoder->video_height;
if (src_ratio < dst_ratio) { if (src_ratio < dst_ratio)
decoder->video_width = (int)((double)decoder->video_height * src_ratio + 15.0) & ~15; decoder->video_width = ((int)round((double)decoder->video_height * src_ratio) + 15) & ~15;
} else { else
decoder->video_height = (int)((double)decoder->video_width / src_ratio + 15.0) & ~15; decoder->video_height = ((int)round((double)decoder->video_width / src_ratio) + 15) & ~15;
}
} }
av->scaler = sws_getContext( av->scaler = sws_getContext(
@@ -253,6 +325,48 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
return true; return true;
} }
int get_av_loop_point(decoder_t *decoder, const args_t *args) {
decoder_state_t *av = &(decoder->state);
if (strcmp(av->format->iformat->name, "wav") == 0 && av->audio_stream != NULL) {
int start_offset = parse_wav_loop_point(av->format->pb, args);
if (start_offset >= 0) {
double pts = (double)start_offset / (double)av->audio_codec_context->sample_rate;
int loop_point = (int)round(pts * 1000.0);
if (!(args->flags & FLAG_QUIET))
fprintf(stderr, "Detected loop point (from smpl data): %d ms\n", loop_point);
return loop_point;
}
}
AVDictionaryEntry *loop_start_tag = av_dict_get(av->format->metadata, "loop_start", 0, 0);
if (loop_start_tag != NULL) {
int loop_point = (int)((strtoll(loop_start_tag->value, NULL, 10) * 1000) / AV_TIME_BASE);
if (!(args->flags & FLAG_QUIET))
fprintf(stderr, "Detected loop point (from metadata): %d ms\n", loop_point);
return loop_point;
}
if (av->format->nb_chapters > 0) {
if (av->format->nb_chapters > 1 && !(args->flags & FLAG_QUIET))
fprintf(stderr, "Warning: input file has %d chapters, using first one as loop point\n", av->format->nb_chapters);
AVChapter *chapter = av->format->chapters[0];
double pts = (double)chapter->start * (double)chapter->time_base.num / (double)chapter->time_base.den;
int loop_point = (int)round(pts * 1000.0);
if (!(args->flags & FLAG_QUIET))
fprintf(stderr, "Detected loop point (from first chapter): %d ms\n", loop_point);
return loop_point;
}
return -1;
}
static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) { static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) {
decoder_state_t *av = &(decoder->state); decoder_state_t *av = &(decoder->state);
@@ -309,9 +423,8 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
// Some files seem to have timestamps starting from a negative value // Some files seem to have timestamps starting from a negative value
// (but otherwise valid) for whatever reason. // (but otherwise valid) for whatever reason.
double pts = double pts = (double)av->frame->pts * (double)av->video_stream->time_base.num / (double)av->video_stream->time_base.den;
((double)av->frame->pts * (double)av->video_stream->time_base.num)
/ av->video_stream->time_base.den;
#if 0 #if 0
if (pts < 0.0) if (pts < 0.0)
return; return;
@@ -325,10 +438,13 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
//fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step); //fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step);
// Insert duplicate frames if the frame rate of the input stream is // Insert duplicate frames if the frame rate of the input stream is lower
// lower than the target frame rate. // than the target frame rate.
int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step); int dupe_frames = (int)ceil((pts - av->video_next_pts) / pts_step);
if (dupe_frames < 0) dupe_frames = 0;
if (dupe_frames < 0)
dupe_frames = 0;
decoder->video_frames = realloc( decoder->video_frames = realloc(
decoder->video_frames, decoder->video_frames,
(decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size (decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size

View File

@@ -25,6 +25,7 @@ freely, subject to the following restrictions:
#pragma once #pragma once
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h>
#include <libavutil/opt.h> #include <libavutil/opt.h>
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavcodec/avdct.h> #include <libavcodec/avdct.h>
@@ -74,6 +75,7 @@ enum {
}; };
bool open_av_data(decoder_t *decoder, const args_t *args, int flags); bool open_av_data(decoder_t *decoder, const args_t *args, int flags);
int get_av_loop_point(decoder_t *decoder, const args_t *args);
bool poll_av_data(decoder_t *decoder); bool poll_av_data(decoder_t *decoder);
bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames); bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames);
void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames); void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames);

View File

@@ -23,6 +23,7 @@ freely, subject to the following restrictions:
*/ */
#include <assert.h> #include <assert.h>
#include <math.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
@@ -104,13 +105,13 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
else else
header[0x03] = 'p'; header[0x03] = 'p';
// Version (big-endian) // Version (big endian)
header[0x04] = 0x00; header[0x04] = 0x00;
header[0x05] = 0x00; header[0x05] = 0x00;
header[0x06] = 0x00; header[0x06] = 0x00;
header[0x07] = 0x20; header[0x07] = 0x20;
// Interleave (little-endian) // Interleave (little endian)
if (args->format == FORMAT_VAGI) { if (args->format == FORMAT_VAGI) {
header[0x08] = (uint8_t)args->audio_interleave; header[0x08] = (uint8_t)args->audio_interleave;
header[0x09] = (uint8_t)(args->audio_interleave >> 8); header[0x09] = (uint8_t)(args->audio_interleave >> 8);
@@ -118,24 +119,38 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
header[0x0B] = (uint8_t)(args->audio_interleave >> 24); header[0x0B] = (uint8_t)(args->audio_interleave >> 24);
} }
// Length of data for each channel (big-endian) // Length of data for each channel (big endian)
header[0x0C] = (uint8_t)(size_per_channel >> 24); header[0x0C] = (uint8_t)(size_per_channel >> 24);
header[0x0D] = (uint8_t)(size_per_channel >> 16); header[0x0D] = (uint8_t)(size_per_channel >> 16);
header[0x0E] = (uint8_t)(size_per_channel >> 8); header[0x0E] = (uint8_t)(size_per_channel >> 8);
header[0x0F] = (uint8_t)size_per_channel; header[0x0F] = (uint8_t)size_per_channel;
// Sample rate (big-endian) // Sample rate (big endian)
header[0x10] = (uint8_t)(args->audio_frequency >> 24); header[0x10] = (uint8_t)(args->audio_frequency >> 24);
header[0x11] = (uint8_t)(args->audio_frequency >> 16); header[0x11] = (uint8_t)(args->audio_frequency >> 16);
header[0x12] = (uint8_t)(args->audio_frequency >> 8); header[0x12] = (uint8_t)(args->audio_frequency >> 8);
header[0x13] = (uint8_t)args->audio_frequency; header[0x13] = (uint8_t)args->audio_frequency;
// Number of channels (little-endian) // Loop point in bytes (big endian, non-standard)
if (args->format == FORMAT_VAGI && args->audio_loop_point >= 0) {
int loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000);
if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY))
loop_start_block++;
int loop_point = loop_start_block * PSX_AUDIO_SPU_BLOCK_SIZE;
header[0x14] = (uint8_t)(loop_point >> 24);
header[0x15] = (uint8_t)(loop_point >> 16);
header[0x16] = (uint8_t)(loop_point >> 8);
header[0x17] = (uint8_t)loop_point;
}
// Number of channels (non-standard)
header[0x1E] = (uint8_t)args->audio_channels; header[0x1E] = (uint8_t)args->audio_channels;
header[0x1F] = 0x00;
// Filename // Filename
int name_offset = strlen(args->output_file); int name_offset = strlen(args->output_file);
while ( while (
name_offset > 0 && name_offset > 0 &&
args->output_file[name_offset - 1] != '/' && args->output_file[name_offset - 1] != '/' &&
@@ -235,7 +250,7 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
if (block_count == loop_start_block) if (block_count == loop_start_block)
block[1] |= PSX_AUDIO_SPU_LOOP_START; block[1] |= PSX_AUDIO_SPU_LOOP_START;
if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input) if ((args->flags & FLAG_SPU_ENABLE_LOOP) && decoder->end_of_input)
block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
retire_av_data(decoder, samples_length, 0); retire_av_data(decoder, samples_length, 0);
@@ -253,10 +268,10 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
} }
} }
if (!(args->flags & FLAG_SPU_LOOP_END)) { if (!(args->flags & FLAG_SPU_ENABLE_LOOP)) {
// Insert trailing looping block // Insert trailing looping block
memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE); memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; block[1] = PSX_AUDIO_SPU_LOOP_TRAP;
fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
block_count++; block_count++;
@@ -291,6 +306,8 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
if (args->format == FORMAT_VAGI) if (args->format == FORMAT_VAGI)
fseek(output, header_size, SEEK_SET); fseek(output, header_size, SEEK_SET);
else if (args->audio_loop_point >= 0 && !(args->flags & FLAG_QUIET))
fprintf(stderr, "Warning: ignoring loop point as there is no header to store it in\n");
int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels; int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size); psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
@@ -326,14 +343,17 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
if (length > 0) { if (length > 0) {
uint8_t *last_block = chunk_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE; uint8_t *last_block = chunk_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE;
if (args->flags & FLAG_SPU_LOOP_END) { if (
(args->flags & FLAG_SPU_ENABLE_LOOP) ||
(decoder->end_of_input && args->audio_loop_point >= 0)
) {
last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT; last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT;
} else if (decoder->end_of_input) { } else if (decoder->end_of_input) {
// HACK: the trailing block should in theory be appended to // HACK: the trailing block should in theory be appended to
// the existing data, but it's easier to just zerofill and // the existing data, but it's easier to just zerofill and
// repurpose the last encoded block // repurpose the last encoded block.
memset(last_block, 0, PSX_AUDIO_SPU_BLOCK_SIZE); memset(last_block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; last_block[1] = PSX_AUDIO_SPU_LOOP_TRAP;
} }
} }
} }
@@ -420,7 +440,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
encoder.state.quant_scale_sum = 0; encoder.state.quant_scale_sum = 0;
// FIXME: this needs an extra frame to prevent A/V desync // FIXME: this needs an extra frame to prevent A/V desync
int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size); int frames_needed = (int)ceil((double)video_sectors_per_block / frame_size);
if (frames_needed < 2) if (frames_needed < 2)
frames_needed = 2; frames_needed = 2;
@@ -542,7 +562,7 @@ void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) {
encoder.state.quant_scale_sum = 0; encoder.state.quant_scale_sum = 0;
// FIXME: this needs an extra frame to prevent A/V desync // FIXME: this needs an extra frame to prevent A/V desync
int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size); int frames_needed = (int)ceil((double)video_sectors_per_block / frame_size);
if (frames_needed < 2) if (frames_needed < 2)
frames_needed = 2; frames_needed = 2;

View File

@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg Copyright (c) 2023, 2025 spicyjpeg
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
@@ -72,6 +72,7 @@ int main(int argc, const char **argv) {
if (output == NULL) { if (output == NULL) {
fprintf(stderr, "Failed to open output file: %s\n", args.output_file); fprintf(stderr, "Failed to open output file: %s\n", args.output_file);
close_av_data(&decoder);
return 1; return 1;
} }
@@ -94,6 +95,13 @@ int main(int argc, const char **argv) {
case FORMAT_SPU: case FORMAT_SPU:
case FORMAT_VAG: case FORMAT_VAG:
if (!(args.flags & FLAG_OVERRIDE_LOOP_POINT)) {
args.audio_loop_point = get_av_loop_point(&decoder, &args);
if (args.audio_loop_point >= 0)
args.flags |= FLAG_SPU_ENABLE_LOOP;
}
if (!(args.flags & FLAG_QUIET)) if (!(args.flags & FLAG_QUIET))
fprintf( fprintf(
stderr, stderr,
@@ -106,6 +114,9 @@ int main(int argc, const char **argv) {
case FORMAT_SPUI: case FORMAT_SPUI:
case FORMAT_VAGI: case FORMAT_VAGI:
if (!(args.flags & FLAG_OVERRIDE_LOOP_POINT))
args.audio_loop_point = get_av_loop_point(&decoder, &args);
if (!(args.flags & FLAG_QUIET)) if (!(args.flags & FLAG_QUIET))
fprintf( fprintf(
stderr, stderr,
@@ -121,7 +132,7 @@ int main(int argc, const char **argv) {
case FORMAT_STR: case FORMAT_STR:
case FORMAT_STRCD: case FORMAT_STRCD:
if (!(args.flags & FLAG_QUIET)) { if (!(args.flags & FLAG_QUIET)) {
if (decoder.state.audio_stream) if (decoder.state.audio_stream != NULL)
fprintf( fprintf(
stderr, stderr,
"Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n", "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
@@ -152,7 +163,7 @@ int main(int argc, const char **argv) {
case FORMAT_STRV: case FORMAT_STRV:
if (!(args.flags & FLAG_QUIET)) { if (!(args.flags & FLAG_QUIET)) {
if (decoder.state.audio_stream) if (decoder.state.audio_stream != NULL)
fprintf( fprintf(
stderr, stderr,
"Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n", "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",