Merge pull request #12 from spicyjpeg/loop-points

Add support for audio loop point parsing
Flip loop point endianness, add notes to README
2025-12-17 11:30:26 +00:00 · 2025-12-05 09:03:01 +01:00 · 2025-12-05 08:57:13 +01:00 · 2025-12-05 07:45:57 +01:00 · 2025-12-05 02:26:01 +01:00 · 2025-12-05 02:02:17 +01:00
11 changed files with 265 additions and 78 deletions
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -1,7 +1,7 @@
 #!/bin/bash

 ROOT_DIR="$(pwd)"
-FFMPEG_VERSION="7.1"
+FFMPEG_VERSION="8.0.1"
 NUM_JOBS="4"

 if [ $# -eq 1 ]; then
@@ -44,7 +44,6 @@ cd ffmpeg-build
 	--disable-programs \
 	--disable-doc \
 	--disable-avdevice \
-	--disable-postproc \
 	--disable-avfilter \
 	--disable-network \
 	--disable-encoders \
--- a/README.md
+++ b/README.md
@@ -71,23 +71,43 @@ Notes:
  authoring tool capable of rebuilding the EDC/ECC data (as it is dependent on
  the file's absolute location on the disc) and generating a Mode 2 CD-ROM image
  with "native" 2352-byte sectors.
+
 - Similarly, files generated with `-t xa` or `-t xacd` **must be interleaved**
  **with other XA-ADPCM tracks or empty padding using an external tool** before
  they can be played.
- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .vag
-  header at the beginning of the file. The header is always 48 bytes long for
-  `vag` files, while in the case of `vagi` files it is padded to the size
-  specified using the `-a` option (2048 bytes by default). Note that `vagi`
-  files with more than 2 channels and/or alignment other than 2048 bytes are not
-  standardized.
+
+- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a
+  [.vag header](https://psx-spx.consoledev.net/cdromfileformats/#cdrom-file-audio-single-samples-vag-sony)
+  at the beginning of the file. The header is always 48 bytes long for `vag`
+  files, while in the case of `vagi` files it is padded to the size specified
+  using the `-a` option (2048 bytes by default). The `vagi` format extends the
+  header with the following fields:
+  - the file's interleave size at offset `0x08-0x0B` (little endian);
+  - the loop start offset in bytes-per-channel, if any, at offset `0x14-0x17`
+    (big endian). *Note that this field is specific to psxavenc and not part of*
+    *the standard interleaved .vag header*;
+  - the file's channel count at offset `0x1E`. *This field is not part of the*
+    *interleaved .vag header either, but can be found in other variants of the*
+    *format.*
+
+- The `spu` and `vag` formats support encoding a loop point as part of the ADPCM
+  data, while `vagi` supports storing one in the header for use by the stream
+  driver. If the input file is either a .wav file with sampler metadata (`smpl`
+  chunk) or in a format FFmpeg supports parsing cue/chapter markers from, the
+  first marker will be used as the loop point by default. The `-l` and `-n`
+  options can be used to manually set a loop point or ignore the one present in
+  the input file respectively.
+
 - ~~The `strspu` format encodes the input file's audio track as a series of~~
  ~~custom .str chunks (type ID `0x0001` by default) holding interleaved~~
  ~~SPU-ADPCM data in the same format as `spui`, rather than XA-ADPCM. As .str~~
  ~~chunks do not require custom XA subheaders, a file with standard 2048-byte~~
  ~~sectors that does not need any special handling will be generated.~~ *This*
  *format has not yet been implemented.*
+
 - The `strv` format disables audio altogether and is equivalent to `strspu` on
  an input file with no audio track.
+
 - The `sbs` format (used in some System 573 games) consists of a series of
  concatenated BS frames, each padded to the size specified by the `-a` option
  (the default setting is 8192 bytes), with no additional headers besides the BS
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -384,11 +384,9 @@ int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_
 		uint8_t *last_block = output + length - PSX_AUDIO_SPU_BLOCK_SIZE;

 		if (loop_start < 0) {
-			last_block[1] |= PSX_AUDIO_SPU_LOOP_END;
-
 			// Insert trailing looping block
 			memset(output + length, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
-			output[length + 1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+			output[length + 1] = PSX_AUDIO_SPU_LOOP_TRAP;

 			length += PSX_AUDIO_SPU_BLOCK_SIZE;
 		} else {
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -62,9 +62,12 @@ typedef struct {
 } psx_audio_encoder_state_t;

 enum {
-	PSX_AUDIO_SPU_LOOP_END    = 1 << 0,
-	PSX_AUDIO_SPU_LOOP_REPEAT = 3 << 0,
-	PSX_AUDIO_SPU_LOOP_START  = 1 << 2
+	PSX_AUDIO_SPU_LOOP_END    = (1 << 0),
+	PSX_AUDIO_SPU_LOOP_REPEAT = (1 << 0) | (1 << 1),
+	// Some old tools will not recognize loop start points if bit 1 is not set
+	// in addition to bit 2. Real hardware does not care.
+	PSX_AUDIO_SPU_LOOP_START  = (1 << 1) | (1 << 2),
+	PSX_AUDIO_SPU_LOOP_TRAP   = (1 << 0) | (1 << 2)
 };

 uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int sample_count);
--- a/psxavenc/args.c
+++ b/psxavenc/args.c
@@ -270,13 +270,14 @@ static int parse_xa_option(args_t *args, char option, const char *param) {

 static const char *const spu_options_help =
 	"Mono SPU-ADPCM options:\n"
-	"    [-f freq] [-a size] [-l ms | -L] [-D]\n"
+	"    [-f freq] [-a size] [-l ms | -n | -L] [-D]\n"
 	"\n"
 	"    -f freq           Use specified sample rate (default 44100)\n"
 	"    -a size           Pad audio data excluding header to multiple of given size (default 64)\n"
-	"    -l ms             Add loop point at specified offset (in milliseconds)\n"
-	"    -L                Set loop end flag at the end of data but do not add a loop point\n"
-	"    -D                Do not prepend encoded data with a dummy silent block\n"
+	"    -l ms             Add loop point at specified timestamp (in milliseconds, overrides any loop point present in input file)\n"
+	"    -n                Do not set loop end flag nor add a loop point (even if input file has one)\n"
+	"    -L                Set ADPCM loop end flag at end of data but do not add a loop point (even if input file has one)\n"
+	"    -D                Do not prepend encoded data with a dummy silent block to reset decoder state\n"
 	"\n";

 static int parse_spu_option(args_t *args, char option, const char *param) {
@@ -288,11 +289,17 @@ static int parse_spu_option(args_t *args, char option, const char *param) {
 			return parse_int(&(args->alignment), "alignment", param, 1, -1);

 		case 'l':
-			args->flags |= FLAG_SPU_LOOP_END;
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT | FLAG_SPU_ENABLE_LOOP;
 			return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1);

+		case 'n':
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT;
+			args->audio_loop_point = -1;
+			return 1;
+
 		case 'L':
-			args->flags |= FLAG_SPU_LOOP_END;
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT | FLAG_SPU_ENABLE_LOOP;
+			args->audio_loop_point = -1;
 			return 1;

 		case 'D':
@@ -306,15 +313,16 @@ static int parse_spu_option(args_t *args, char option, const char *param) {

 static const char *const spui_options_help =
 	"Interleaved SPU-ADPCM options:\n"
-	"    [-f freq] [-c channels] [-i size] [-a size] [-L] [-D]\n"
+	"    [-f freq] [-c channels] [-i size] [-a size] [-l ms | -n] [-L] [-D]\n"
 	"\n"
 	"    -f freq           Use specified sample rate (default 44100)\n"
 	"    -c channels       Use specified channel count (default 2)\n"
 	"    -i size           Use specified channel interleave size (default 2048)\n"
-	"    -a size           Pad .vag header and each audio chunk to multiples of given size\n"
-	"                      (default 2048)\n"
-	"    -L                Set loop end flag at the end of each audio chunk\n"
-	"    -D                Do not prepend first chunk's data with a dummy silent block\n"
+	"    -a size           Pad .vag header and each audio chunk to multiples of given size (default 2048)\n"
+	"    -l ms             Store specified timestamp in file header as loop point (in milliseconds, overrides any loop point present in input file)\n"
+	"    -n                Do not store any loop point in file header (even if input file has one)\n"
+	"    -L                Set ADPCM loop end flag at the end of each audio chunk (separately from loop point in file header)\n"
+	"    -D                Do not prepend first chunk's data with a dummy silent block to reset decoder state\n"
 	"\n";

 static int parse_spui_option(args_t *args, char option, const char *param) {
@@ -337,8 +345,17 @@ static int parse_spui_option(args_t *args, char option, const char *param) {
 		case 'a':
 			return parse_int(&(args->alignment), "alignment", param, 1, -1);

+		case 'l':
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT;
+			return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1);
+
+		case 'n':
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT;
+			args->audio_loop_point = -1;
+			return 1;
+
 		case 'L':
-			args->flags |= FLAG_SPU_LOOP_END;
+			args->flags |= FLAG_SPU_ENABLE_LOOP;
 			return 1;

 		case 'D':
@@ -358,8 +375,7 @@ static const char *const bs_options_help =
 	"                        v2:   MDEC BS v2 (default)\n"
 	"                        v3:   MDEC BS v3\n"
 	"                        v3dc: MDEC BS v3, expect decoder to wrap DC coefficients\n"
-	"    -s WxH            Rescale input file to fit within specified size\n"
-	"                      (16x16-640x512 in 16-pixel increments, default 320x240)\n"
+	"    -s WxH            Rescale input file to fit within specified size (16x16-640x512 in 16-pixel increments, default 320x240)\n"
 	"    -I                Force stretching to given size without preserving aspect ratio\n"
 	"\n";

@@ -422,8 +438,7 @@ static const char *const str_options_help =
 	"    -x 1|2            Set CD-ROM speed the file is meant to played at (default 2)\n"
 	"    -T id             Tag video sectors with specified .str type ID (default 0x8001)\n"
 	"    -A id             Tag SPU-ADPCM sectors with specified .str type ID (default 0x0001)\n"
-	"    -X                Place audio sectors after corresponding video sectors\n"
-	"                      (rather than ahead of them)\n"
+	"    -X                Place audio sectors after corresponding video sectors rather than ahead of them\n"
 	"\n";

 static int parse_str_option(args_t *args, char option, const char *param) {
--- a/psxavenc/args.h
+++ b/psxavenc/args.h
@@ -35,10 +35,11 @@ enum {
 	FLAG_HIDE_PROGRESS        = 1 << 2,
 	FLAG_PRINT_HELP           = 1 << 3,
 	FLAG_PRINT_VERSION        = 1 << 4,
-	FLAG_SPU_LOOP_END         = 1 << 5,
-	FLAG_SPU_NO_LEADING_DUMMY = 1 << 6,
-	FLAG_BS_IGNORE_ASPECT     = 1 << 7,
-	FLAG_STR_TRAILING_AUDIO   = 1 << 8
+	FLAG_OVERRIDE_LOOP_POINT  = 1 << 5,
+	FLAG_SPU_ENABLE_LOOP      = 1 << 6,
+	FLAG_SPU_NO_LEADING_DUMMY = 1 << 7,
+	FLAG_BS_IGNORE_ASPECT     = 1 << 8,
+	FLAG_STR_TRAILING_AUDIO   = 1 << 9
 };

 typedef enum {
--- a/psxavenc/decoding.c
+++ b/psxavenc/decoding.c
@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend

 Copyright (c) 2019, 2020 Adrian "asie" Siekierka
 Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
+Copyright (c) 2023, 2025 spicyjpeg

 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -23,7 +23,10 @@ freely, subject to the following restrictions:
 */

 #include <assert.h>
+#include <math.h>
 #include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -36,6 +39,77 @@ freely, subject to the following restrictions:
 #include "args.h"
 #include "decoding.h"

+enum {
+	LOOP_TYPE_FORWARD,
+	LOOP_TYPE_PING_PONG,
+	LOOP_TYPE_BACKWARD
+};
+
+// HACK: FFmpeg does not parse "smpl" chunks out of .wav files on its own, so a
+// minimal RIFF chunk parser needs to be implemented here. (It does however
+// parse "cue" chunk entries as chapters; if no "smpl" chunk is found, the
+// file's first chapter if any is used as a loop point by default.)
+static int parse_wav_loop_point(AVIOContext *pb, const args_t *args) {
+	if (!pb->seekable) {
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Warning: input file is not seekable, cannot parse loop points\n");
+		return -1;
+	}
+
+	int64_t saved_file_pos = avio_tell(pb);
+	int start_offset = -1;
+
+	if (avio_seek(pb, 0, SEEK_SET) != 0)
+		return -1;
+
+	avio_rl32(pb); // "RIFF" magic
+	avio_rl32(pb); // File size
+	avio_rl32(pb); // "WAVE" magic
+
+	while (!avio_feof(pb)) {
+		uint32_t chunk_type = avio_rl32(pb);
+		uint32_t chunk_size = avio_rl32(pb);
+
+		if (chunk_type != MKTAG('s', 'm', 'p', 'l') || chunk_size < (sizeof(uint32_t) * 9)) {
+			avio_skip(pb, chunk_size);
+			continue;
+		}
+
+		avio_rl32(pb); // Manufacturer ID
+		avio_rl32(pb); // Product ID
+		avio_rl32(pb); // Sample period (ns)
+		avio_rl32(pb); // MIDI unity note number
+		avio_rl32(pb); // MIDI pitch fraction
+		avio_rl32(pb); // SMPTE format
+		avio_rl32(pb); // SMPTE offset
+		uint32_t loop_count = avio_rl32(pb);
+		avio_rl32(pb); // Additional data size
+
+		if (loop_count == 0)
+			break;
+		if (loop_count > 1 && !(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Warning: input file has %d loop points, using first one\n", (int)loop_count);
+
+		avio_rl32(pb); // Loop ID
+		uint32_t loop_type = avio_rl32(pb);
+		start_offset = (int)avio_rl32(pb);
+		avio_rl32(pb); // End offset
+		avio_rl32(pb); // Sample fraction
+		uint32_t play_count = avio_rl32(pb);
+
+		if (!(args->flags & FLAG_QUIET)) {
+			if (loop_type != LOOP_TYPE_FORWARD)
+				fprintf(stderr, "Warning: treating %s loop as forward loop\n", (loop_type == LOOP_TYPE_PING_PONG) ? "ping-pong" : "backward");
+			if (play_count != 0)
+				fprintf(stderr, "Warning: treating loop repeating %d times as endless loop\n", (int)play_count);
+		}
+		break;
+	}
+
+	avio_seek(pb, saved_file_pos, SEEK_SET);
+	return start_offset;
+}
+
 static bool decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
 	if (packet != NULL) {
 		if (avcodec_send_packet(codec, packet) != 0)
@@ -152,10 +226,11 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 			layout.order = AV_CHANNEL_ORDER_UNSPEC;
 		}

-		if (!(args->flags & FLAG_QUIET)) {
-			if (args->audio_channels > av->audio_codec_context->ch_layout.nb_channels)
-				fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels);
-		}
+		if (
+			args->audio_channels > av->audio_codec_context->ch_layout.nb_channels &&
+			!(args->flags & FLAG_QUIET)
+		)
+			fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels);

 		av->sample_count_mul = args->audio_channels;

@@ -191,13 +266,11 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 		if (avcodec_open2(av->video_codec_context, codec, NULL) < 0)
 			return false;

-		if (!(args->flags & FLAG_QUIET)) {
-			if (
-				decoder->video_width > av->video_codec_context->width ||
-				decoder->video_height > av->video_codec_context->height
-			)
-				fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);
-		}
+		if (
+			(decoder->video_width > av->video_codec_context->width || decoder->video_height > av->video_codec_context->height) &&
+			!(args->flags & FLAG_QUIET)
+		)
+			fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);

 		if (!(args->flags & FLAG_BS_IGNORE_ASPECT)) {
 			// Reduce the provided size so that it matches the input file's
@@ -205,11 +278,10 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 			double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height;
 			double dst_ratio = (double)decoder->video_width / (double)decoder->video_height;

-			if (src_ratio < dst_ratio) {
-				decoder->video_width = (int)((double)decoder->video_height * src_ratio + 15.0) & ~15;
-			} else {
-				decoder->video_height = (int)((double)decoder->video_width / src_ratio + 15.0) & ~15;
-			}
+			if (src_ratio < dst_ratio)
+				decoder->video_width = ((int)round((double)decoder->video_height * src_ratio) + 15) & ~15;
+			else
+				decoder->video_height = ((int)round((double)decoder->video_width / src_ratio) + 15) & ~15;
 		}

 		av->scaler = sws_getContext(
@@ -253,6 +325,48 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 	return true;
 }

+int get_av_loop_point(decoder_t *decoder, const args_t *args) {
+	decoder_state_t *av = &(decoder->state);
+
+	if (strcmp(av->format->iformat->name, "wav") == 0 && av->audio_stream != NULL) {
+		int start_offset = parse_wav_loop_point(av->format->pb, args);
+
+		if (start_offset >= 0) {
+			double pts = (double)start_offset / (double)av->audio_codec_context->sample_rate;
+			int loop_point = (int)round(pts * 1000.0);
+
+			if (!(args->flags & FLAG_QUIET))
+				fprintf(stderr, "Detected loop point (from smpl data): %d ms\n", loop_point);
+			return loop_point;
+		}
+	}
+
+	AVDictionaryEntry *loop_start_tag = av_dict_get(av->format->metadata, "loop_start", 0, 0);
+
+	if (loop_start_tag != NULL) {
+		int loop_point = (int)((strtoll(loop_start_tag->value, NULL, 10) * 1000) / AV_TIME_BASE);
+
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Detected loop point (from metadata): %d ms\n", loop_point);
+		return loop_point;
+	}
+
+	if (av->format->nb_chapters > 0) {
+		if (av->format->nb_chapters > 1 && !(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Warning: input file has %d chapters, using first one as loop point\n", av->format->nb_chapters);
+
+		AVChapter *chapter = av->format->chapters[0];
+		double pts = (double)chapter->start * (double)chapter->time_base.num / (double)chapter->time_base.den;
+		int loop_point = (int)round(pts * 1000.0);
+
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Detected loop point (from first chapter): %d ms\n", loop_point);
+		return loop_point;
+	}
+
+	return -1;
+}
+
 static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) {
 	decoder_state_t *av = &(decoder->state);

@@ -309,9 +423,8 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {

 	// Some files seem to have timestamps starting from a negative value
 	// (but otherwise valid) for whatever reason.
-	double pts =
-		((double)av->frame->pts * (double)av->video_stream->time_base.num)
-		/ av->video_stream->time_base.den;
+	double pts = (double)av->frame->pts * (double)av->video_stream->time_base.num / (double)av->video_stream->time_base.den;
+
 #if 0
 	if (pts < 0.0)
 		return;
@@ -325,10 +438,13 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {

 	//fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step);

-	// Insert duplicate frames if the frame rate of the input stream is
-	// lower than the target frame rate.
-	int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
-	if (dupe_frames < 0) dupe_frames = 0;
+	// Insert duplicate frames if the frame rate of the input stream is lower
+	// than the target frame rate.
+	int dupe_frames = (int)ceil((pts - av->video_next_pts) / pts_step);
+
+	if (dupe_frames < 0)
+		dupe_frames = 0;
+
 	decoder->video_frames = realloc(
 		decoder->video_frames,
 		(decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
@@ -447,8 +563,10 @@ void close_av_data(decoder_t *decoder) {

 	av_frame_free(&(av->frame));
 	swr_free(&(av->resampler));
+#if LIBAVCODEC_VERSION_MAJOR < 61
 	// Deprecated, kept for compatibility with older FFmpeg versions.
 	avcodec_close(av->audio_codec_context);
+#endif
 	avcodec_free_context(&(av->audio_codec_context));
 	avformat_free_context(av->format);

--- a/psxavenc/decoding.h
+++ b/psxavenc/decoding.h
@@ -25,6 +25,7 @@ freely, subject to the following restrictions:
 #pragma once

 #include <stdbool.h>
+#include <stdint.h>
 #include <libavutil/opt.h>
 #include <libavcodec/avcodec.h>
 #include <libavcodec/avdct.h>
@@ -74,6 +75,7 @@ enum {
 };

 bool open_av_data(decoder_t *decoder, const args_t *args, int flags);
+int get_av_loop_point(decoder_t *decoder, const args_t *args);
 bool poll_av_data(decoder_t *decoder);
 bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames);
 void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames);
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -23,6 +23,7 @@ freely, subject to the following restrictions:
 */

 #include <assert.h>
+#include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -104,13 +105,13 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 	else
 	 	header[0x03] = 'p';

-	// Version (big-endian)
+	// Version (big endian)
 	header[0x04] = 0x00;
 	header[0x05] = 0x00;
 	header[0x06] = 0x00;
 	header[0x07] = 0x20;

-	// Interleave (little-endian)
+	// Interleave (little endian)
 	if (args->format == FORMAT_VAGI) {
 		header[0x08] = (uint8_t)args->audio_interleave;
 		header[0x09] = (uint8_t)(args->audio_interleave >> 8);
@@ -118,24 +119,38 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 		header[0x0B] = (uint8_t)(args->audio_interleave >> 24);
 	}

-	// Length of data for each channel (big-endian)
+	// Length of data for each channel (big endian)
 	header[0x0C] = (uint8_t)(size_per_channel >> 24);
 	header[0x0D] = (uint8_t)(size_per_channel >> 16);
 	header[0x0E] = (uint8_t)(size_per_channel >> 8);
 	header[0x0F] = (uint8_t)size_per_channel;

-	// Sample rate (big-endian)
+	// Sample rate (big endian)
 	header[0x10] = (uint8_t)(args->audio_frequency >> 24);
 	header[0x11] = (uint8_t)(args->audio_frequency >> 16);
 	header[0x12] = (uint8_t)(args->audio_frequency >> 8);
 	header[0x13] = (uint8_t)args->audio_frequency;

-	// Number of channels (little-endian)
+	// Loop point in bytes (big endian, non-standard)
+	if (args->format == FORMAT_VAGI && args->audio_loop_point >= 0) {
+		int loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000);
+
+		if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY))
+			loop_start_block++;
+
+		int loop_point = loop_start_block * PSX_AUDIO_SPU_BLOCK_SIZE;
+		header[0x14] = (uint8_t)(loop_point >> 24);
+		header[0x15] = (uint8_t)(loop_point >> 16);
+		header[0x16] = (uint8_t)(loop_point >> 8);
+		header[0x17] = (uint8_t)loop_point;
+	}
+
+	// Number of channels (non-standard)
 	header[0x1E] = (uint8_t)args->audio_channels;
-	header[0x1F] = 0x00;

 	// Filename
 	int name_offset = strlen(args->output_file);
+
 	while (
 		name_offset > 0 &&
 		args->output_file[name_offset - 1] != '/' &&
@@ -235,7 +250,7 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {

 		if (block_count == loop_start_block)
 			block[1] |= PSX_AUDIO_SPU_LOOP_START;
-		if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input)
+		if ((args->flags & FLAG_SPU_ENABLE_LOOP) && decoder->end_of_input)
 			block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;

 		retire_av_data(decoder, samples_length, 0);
@@ -253,10 +268,10 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 		}
 	}

-	if (!(args->flags & FLAG_SPU_LOOP_END)) {
+	if (!(args->flags & FLAG_SPU_ENABLE_LOOP)) {
 		// Insert trailing looping block
 		memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
-		block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+		block[1] = PSX_AUDIO_SPU_LOOP_TRAP;

 		fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
 		block_count++;
@@ -291,6 +306,8 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {

 	if (args->format == FORMAT_VAGI)
 		fseek(output, header_size, SEEK_SET);
+	else if (args->audio_loop_point >= 0 && !(args->flags & FLAG_QUIET))
+		fprintf(stderr, "Warning: ignoring loop point as there is no header to store it in\n");

 	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
 	psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
@@ -326,14 +343,17 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 			if (length > 0) {
 				uint8_t *last_block = chunk_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE;

-				if (args->flags & FLAG_SPU_LOOP_END) {
+				if (
+					(args->flags & FLAG_SPU_ENABLE_LOOP) ||
+					(decoder->end_of_input && args->audio_loop_point >= 0)
+				) {
 					last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT;
 				} else if (decoder->end_of_input) {
 					// HACK: the trailing block should in theory be appended to
 					// the existing data, but it's easier to just zerofill and
-					// repurpose the last encoded block
+					// repurpose the last encoded block.
 					memset(last_block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
-					last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+					last_block[1] = PSX_AUDIO_SPU_LOOP_TRAP;
 				}
 			}
 		}
@@ -420,7 +440,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 	encoder.state.quant_scale_sum = 0;

 	// FIXME: this needs an extra frame to prevent A/V desync
-	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
+	int frames_needed = (int)ceil((double)video_sectors_per_block / frame_size);

 	if (frames_needed < 2)
 		frames_needed = 2;
@@ -542,7 +562,7 @@ void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) {
 	encoder.state.quant_scale_sum = 0;

 	// FIXME: this needs an extra frame to prevent A/V desync
-	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
+	int frames_needed = (int)ceil((double)video_sectors_per_block / frame_size);

 	if (frames_needed < 2)
 		frames_needed = 2;
--- a/psxavenc/main.c
+++ b/psxavenc/main.c
@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend

 Copyright (c) 2019, 2020 Adrian "asie" Siekierka
 Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
+Copyright (c) 2023, 2025 spicyjpeg

 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -72,6 +72,7 @@ int main(int argc, const char **argv) {

 	if (output == NULL) {
 		fprintf(stderr, "Failed to open output file: %s\n", args.output_file);
+		close_av_data(&decoder);
 		return 1;
 	}

@@ -94,6 +95,13 @@ int main(int argc, const char **argv) {

 		case FORMAT_SPU:
 		case FORMAT_VAG:
+			if (!(args.flags & FLAG_OVERRIDE_LOOP_POINT)) {
+				args.audio_loop_point = get_av_loop_point(&decoder, &args);
+
+				if (args.audio_loop_point >= 0)
+					args.flags |= FLAG_SPU_ENABLE_LOOP;
+			}
+
 			if (!(args.flags & FLAG_QUIET))
 				fprintf(
 					stderr,
@@ -106,6 +114,9 @@ int main(int argc, const char **argv) {

 		case FORMAT_SPUI:
 		case FORMAT_VAGI:
+			if (!(args.flags & FLAG_OVERRIDE_LOOP_POINT))
+				args.audio_loop_point = get_av_loop_point(&decoder, &args);
+
 			if (!(args.flags & FLAG_QUIET))
 				fprintf(
 					stderr,
@@ -121,7 +132,7 @@ int main(int argc, const char **argv) {
 		case FORMAT_STR:
 		case FORMAT_STRCD:
 			if (!(args.flags & FLAG_QUIET)) {
-				if (decoder.state.audio_stream)
+				if (decoder.state.audio_stream != NULL)
 					fprintf(
 						stderr,
 						"Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
@@ -152,7 +163,7 @@ int main(int argc, const char **argv) {

 		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
-				if (decoder.state.audio_stream)
+				if (decoder.state.audio_stream != NULL)
 					fprintf(
 						stderr,
 						"Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -588,10 +588,10 @@ void encode_frame_bs(mdec_encoder_t *encoder, const uint8_t *video_frame) {
 	if (real_index > (video_frame_count - 1))
 		real_index = video_frame_count - 1;

-	uint8_t *y_plane = video_frames + encoder->video_width * encoder->video_height * 3/2 * real_index;
+	const uint8_t *y_plane = video_frames + encoder->video_width * encoder->video_height * 3/2 * real_index;
 #else
-	uint8_t *y_plane = video_frame;
-	uint8_t *c_plane = y_plane + (encoder->video_width * encoder->video_height);
+	const uint8_t *y_plane = video_frame;
+	const uint8_t *c_plane = y_plane + (encoder->video_width * encoder->video_height);
 #endif

 	int dct_block_count_x = (encoder->video_width + 15) / 16;
Author	SHA1	Message	Date
Adrian "asie" Siekierka	bdc24e897e	Merge pull request #12 from spicyjpeg/loop-points Add support for audio loop point parsing	2025-12-05 09:03:01 +01:00
spicyjpeg	9a22336cec	Flip loop point endianness, add notes to README	2025-12-05 08:57:13 +01:00
Adrian Siekierka	186c0fad10	update CI to FFmpeg 8.0.1, preserve avcodec_close() for older FFmpeg versions	2025-12-05 07:45:57 +01:00
spicyjpeg	d1bc6e2f5f	Minor fixes for interleaved .vag loop points	2025-12-05 02:26:01 +01:00
spicyjpeg	ae864bf940	Add support for FFmpeg loop_start metadata tag	2025-12-05 02:02:17 +01:00
spicyjpeg	b7dc599771	Add loop point parsing support	2025-12-05 01:50:23 +01:00
malucart	ac4dea75ea	remove avcodec_close to fix build (#11 )	2025-11-30 15:03:58 +01:00
Adrian Siekierka	7aaae789aa	fix discarded qualifier warning	2025-06-01 18:12:18 +02:00
Adrian Siekierka	ed4821fcac	chore: update CI ffmpeg to 7.1.1	2025-03-08 07:38:56 +01:00