diff --git a/README.md b/README.md
index 007bd35..5fa245f 100644
--- a/README.md
+++ b/README.md
@@ -71,23 +71,41 @@ Notes:
   authoring tool capable of rebuilding the EDC/ECC data (as it is dependent on
   the file's absolute location on the disc) and generating a Mode 2 CD-ROM image
   with "native" 2352-byte sectors.
+
 - Similarly, files generated with `-t xa` or `-t xacd` **must be interleaved**
   **with other XA-ADPCM tracks or empty padding using an external tool** before
   they can be played.
-- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .vag
-  header at the beginning of the file. The header is always 48 bytes long for
-  `vag` files, while in the case of `vagi` files it is padded to the size
-  specified using the `-a` option (2048 bytes by default). Note that `vagi`
-  files with more than 2 channels and/or alignment other than 2048 bytes are not
-  standardized.
+
+- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a
+  [.vag header](https://psx-spx.consoledev.net/cdromfileformats/#cdrom-file-audio-single-samples-vag-sony)
+  at the beginning of the file. The header is always 48 bytes long for `vag`
+  files, while in the case of `vagi` files it is padded to the size specified
+  using the `-a` option (2048 bytes by default).
+
+- The `vagi` format extends the standard .vag header by changing the signature
+  to `VAGi` and adding the following fields:
+  - the file's interleave size at offset `0x08-0x0B` (little endian);
+  - the loop start offset in bytes-per-channel, if any, at offset `0x14-0x17`
+    (little endian);
+  - the file's channel count at offset `0x1E`.
+
+- The `spu`, `vag`, `spui` and `vagi` formats support setting a loop start
+  point. If the input file is either a .wav file with sampler metadata (`smpl`
+  chunk) or in a format FFmpeg supports parsing cue/chapter markers from, the
+  first marker will be used as the loop point by default. The `-l` and `-n`
+  options can be used to manually set a loop point or ignore the one present in
+  the input file respectively.
+
 - ~~The `strspu` format encodes the input file's audio track as a series of~~
   ~~custom .str chunks (type ID `0x0001` by default) holding interleaved~~
   ~~SPU-ADPCM data in the same format as `spui`, rather than XA-ADPCM. As .str~~
   ~~chunks do not require custom XA subheaders, a file with standard 2048-byte~~
   ~~sectors that does not need any special handling will be generated.~~ *This*
   *format has not yet been implemented.*
+
 - The `strv` format disables audio altogether and is equivalent to `strspu` on
   an input file with no audio track.
+
 - The `sbs` format (used in some System 573 games) consists of a series of
   concatenated BS frames, each padded to the size specified by the `-a` option
   (the default setting is 8192 bytes), with no additional headers besides the BS
diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index 80e3413..d807c57 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -384,11 +384,9 @@ int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_
 		uint8_t *last_block = output + length - PSX_AUDIO_SPU_BLOCK_SIZE;
 
 		if (loop_start < 0) {
-			last_block[1] |= PSX_AUDIO_SPU_LOOP_END;
-
 			// Insert trailing looping block
 			memset(output + length, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
-			output[length + 1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+			output[length + 1] = PSX_AUDIO_SPU_LOOP_TRAP;
 
 			length += PSX_AUDIO_SPU_BLOCK_SIZE;
 		} else {
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index 67733dd..6724bd8 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -62,9 +62,12 @@ typedef struct {
 } psx_audio_encoder_state_t;
 
 enum {
-	PSX_AUDIO_SPU_LOOP_END    = 1 << 0,
-	PSX_AUDIO_SPU_LOOP_REPEAT = 3 << 0,
-	PSX_AUDIO_SPU_LOOP_START  = 1 << 2
+	PSX_AUDIO_SPU_LOOP_END    = (1 << 0),
+	PSX_AUDIO_SPU_LOOP_REPEAT = (1 << 0) | (1 << 1),
+	// Some old tools will not recognize loop start points if bit 1 is not set
+	// in addition to bit 2. Real hardware does not care.
+	PSX_AUDIO_SPU_LOOP_START  = (1 << 1) | (1 << 2),
+	PSX_AUDIO_SPU_LOOP_TRAP   = (1 << 0) | (1 << 2)
 };
 
 uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int sample_count);
diff --git a/psxavenc/args.c b/psxavenc/args.c
index 93c3ef0..921dc6e 100644
--- a/psxavenc/args.c
+++ b/psxavenc/args.c
@@ -270,13 +270,14 @@ static int parse_xa_option(args_t *args, char option, const char *param) {
 
 static const char *const spu_options_help =
 	"Mono SPU-ADPCM options:\n"
-	"    [-f freq] [-a size] [-l ms | -L] [-D]\n"
+	"    [-f freq] [-a size] [-l ms | -n | -L] [-D]\n"
 	"\n"
 	"    -f freq           Use specified sample rate (default 44100)\n"
 	"    -a size           Pad audio data excluding header to multiple of given size (default 64)\n"
-	"    -l ms             Add loop point at specified offset (in milliseconds)\n"
-	"    -L                Set loop end flag at the end of data but do not add a loop point\n"
-	"    -D                Do not prepend encoded data with a dummy silent block\n"
+	"    -l ms             Add loop point at specified timestamp (in milliseconds, overrides any loop point present in input file)\n"
+	"    -n                Do not set loop end flag nor add a loop point (even if input file has one)\n"
+	"    -L                Set ADPCM loop end flag at end of data but do not add a loop point (even if input file has one)\n"
+	"    -D                Do not prepend encoded data with a dummy silent block to reset decoder state\n"
 	"\n";
 
 static int parse_spu_option(args_t *args, char option, const char *param) {
@@ -288,11 +289,17 @@ static int parse_spu_option(args_t *args, char option, const char *param) {
 			return parse_int(&(args->alignment), "alignment", param, 1, -1);
 
 		case 'l':
-			args->flags |= FLAG_SPU_LOOP_END;
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT | FLAG_SPU_ENABLE_LOOP;
 			return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1);
 
+		case 'n':
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT;
+			args->audio_loop_point = -1;
+			return 1;
+
 		case 'L':
-			args->flags |= FLAG_SPU_LOOP_END;
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT | FLAG_SPU_ENABLE_LOOP;
+			args->audio_loop_point = -1;
 			return 1;
 
 		case 'D':
@@ -306,15 +313,16 @@ static int parse_spu_option(args_t *args, char option, const char *param) {
 
 static const char *const spui_options_help =
 	"Interleaved SPU-ADPCM options:\n"
-	"    [-f freq] [-c channels] [-i size] [-a size] [-L] [-D]\n"
+	"    [-f freq] [-c channels] [-i size] [-a size] [-l ms | -n] [-L] [-D]\n"
 	"\n"
 	"    -f freq           Use specified sample rate (default 44100)\n"
 	"    -c channels       Use specified channel count (default 2)\n"
 	"    -i size           Use specified channel interleave size (default 2048)\n"
-	"    -a size           Pad .vag header and each audio chunk to multiples of given size\n"
-	"                      (default 2048)\n"
-	"    -L                Set loop end flag at the end of each audio chunk\n"
-	"    -D                Do not prepend first chunk's data with a dummy silent block\n"
+	"    -a size           Pad .vag header and each audio chunk to multiples of given size (default 2048)\n"
+	"    -l ms             Store specified timestamp in file header as loop point (in milliseconds, overrides any loop point present in input file)\n"
+	"    -n                Do not store any loop point in file header (even if input file has one)\n"
+	"    -L                Set ADPCM loop end flag at the end of each audio chunk (separately from loop point in file header)\n"
+	"    -D                Do not prepend first chunk's data with a dummy silent block to reset decoder state\n"
 	"\n";
 
 static int parse_spui_option(args_t *args, char option, const char *param) {
@@ -337,8 +345,17 @@ static int parse_spui_option(args_t *args, char option, const char *param) {
 		case 'a':
 			return parse_int(&(args->alignment), "alignment", param, 1, -1);
 
+		case 'l':
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT;
+			return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1);
+
+		case 'n':
+			args->flags |= FLAG_OVERRIDE_LOOP_POINT;
+			args->audio_loop_point = -1;
+			return 1;
+
 		case 'L':
-			args->flags |= FLAG_SPU_LOOP_END;
+			args->flags |= FLAG_SPU_ENABLE_LOOP;
 			return 1;
 
 		case 'D':
@@ -358,8 +375,7 @@ static const char *const bs_options_help =
 	"                        v2:   MDEC BS v2 (default)\n"
 	"                        v3:   MDEC BS v3\n"
 	"                        v3dc: MDEC BS v3, expect decoder to wrap DC coefficients\n"
-	"    -s WxH            Rescale input file to fit within specified size\n"
-	"                      (16x16-640x512 in 16-pixel increments, default 320x240)\n"
+	"    -s WxH            Rescale input file to fit within specified size (16x16-640x512 in 16-pixel increments, default 320x240)\n"
 	"    -I                Force stretching to given size without preserving aspect ratio\n"
 	"\n";
 
@@ -422,8 +438,7 @@ static const char *const str_options_help =
 	"    -x 1|2            Set CD-ROM speed the file is meant to played at (default 2)\n"
 	"    -T id             Tag video sectors with specified .str type ID (default 0x8001)\n"
 	"    -A id             Tag SPU-ADPCM sectors with specified .str type ID (default 0x0001)\n"
-	"    -X                Place audio sectors after corresponding video sectors\n"
-	"                      (rather than ahead of them)\n"
+	"    -X                Place audio sectors after corresponding video sectors rather than ahead of them\n"
 	"\n";
 
 static int parse_str_option(args_t *args, char option, const char *param) {
diff --git a/psxavenc/args.h b/psxavenc/args.h
index d313646..1db5889 100644
--- a/psxavenc/args.h
+++ b/psxavenc/args.h
@@ -35,10 +35,11 @@ enum {
 	FLAG_HIDE_PROGRESS        = 1 << 2,
 	FLAG_PRINT_HELP           = 1 << 3,
 	FLAG_PRINT_VERSION        = 1 << 4,
-	FLAG_SPU_LOOP_END         = 1 << 5,
-	FLAG_SPU_NO_LEADING_DUMMY = 1 << 6,
-	FLAG_BS_IGNORE_ASPECT     = 1 << 7,
-	FLAG_STR_TRAILING_AUDIO   = 1 << 8
+	FLAG_OVERRIDE_LOOP_POINT  = 1 << 5,
+	FLAG_SPU_ENABLE_LOOP      = 1 << 6,
+	FLAG_SPU_NO_LEADING_DUMMY = 1 << 7,
+	FLAG_BS_IGNORE_ASPECT     = 1 << 8,
+	FLAG_STR_TRAILING_AUDIO   = 1 << 9
 };
 
 typedef enum {
diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c
index 6540874..251d7bf 100644
--- a/psxavenc/decoding.c
+++ b/psxavenc/decoding.c
@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
 
 Copyright (c) 2019, 2020 Adrian "asie" Siekierka
 Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
+Copyright (c) 2023, 2025 spicyjpeg
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -23,7 +23,10 @@ freely, subject to the following restrictions:
 */
 
 #include <assert.h>
+#include <math.h>
 #include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -36,6 +39,77 @@ freely, subject to the following restrictions:
 #include "args.h"
 #include "decoding.h"
 
+enum {
+	LOOP_TYPE_FORWARD,
+	LOOP_TYPE_PING_PONG,
+	LOOP_TYPE_BACKWARD
+};
+
+// HACK: FFmpeg does not parse "smpl" chunks out of .wav files on its own, so a
+// minimal RIFF chunk parser needs to be implemented here. (It does however
+// parse "cue" chunk entries as chapters; if no "smpl" chunk is found, the
+// file's first chapter if any is used as a loop point by default.)
+static int parse_wav_loop_point(AVIOContext *pb, const args_t *args) {
+	if (!pb->seekable) {
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Warning: input file is not seekable, cannot parse loop points\n");
+		return -1;
+	}
+
+	int64_t saved_file_pos = avio_tell(pb);
+	int start_offset = -1;
+
+	if (avio_seek(pb, 0, SEEK_SET) != 0)
+		return -1;
+
+	avio_rl32(pb); // "RIFF" magic
+	avio_rl32(pb); // File size
+	avio_rl32(pb); // "WAVE" magic
+
+	while (!avio_feof(pb)) {
+		uint32_t chunk_type = avio_rl32(pb);
+		uint32_t chunk_size = avio_rl32(pb);
+
+		if (chunk_type != MKTAG('s', 'm', 'p', 'l') || chunk_size < (sizeof(uint32_t) * 9)) {
+			avio_skip(pb, chunk_size);
+			continue;
+		}
+
+		avio_rl32(pb); // Manufacturer ID
+		avio_rl32(pb); // Product ID
+		avio_rl32(pb); // Sample period (ns)
+		avio_rl32(pb); // MIDI unity note number
+		avio_rl32(pb); // MIDI pitch fraction
+		avio_rl32(pb); // SMPTE format
+		avio_rl32(pb); // SMPTE offset
+		uint32_t loop_count = avio_rl32(pb);
+		avio_rl32(pb); // Additional data size
+
+		if (loop_count == 0)
+			break;
+		if (loop_count > 1 && !(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Warning: input file has %d loop points, using first one\n", (int)loop_count);
+
+		avio_rl32(pb); // Loop ID
+		uint32_t loop_type = avio_rl32(pb);
+		start_offset = (int)avio_rl32(pb);
+		avio_rl32(pb); // End offset
+		avio_rl32(pb); // Sample fraction
+		uint32_t play_count = avio_rl32(pb);
+
+		if (!(args->flags & FLAG_QUIET)) {
+			if (loop_type != LOOP_TYPE_FORWARD)
+				fprintf(stderr, "Warning: treating %s loop as forward loop\n", (loop_type == LOOP_TYPE_PING_PONG) ? "ping-pong" : "backward");
+			if (play_count != 0)
+				fprintf(stderr, "Warning: treating loop repeating %d times as endless loop\n", (int)play_count);
+		}
+		break;
+	}
+
+	avio_seek(pb, saved_file_pos, SEEK_SET);
+	return start_offset;
+}
+
 static bool decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
 	if (packet != NULL) {
 		if (avcodec_send_packet(codec, packet) != 0)
@@ -152,10 +226,11 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 			layout.order = AV_CHANNEL_ORDER_UNSPEC;
 		}
 
-		if (!(args->flags & FLAG_QUIET)) {
-			if (args->audio_channels > av->audio_codec_context->ch_layout.nb_channels)
-				fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels);
-		}
+		if (
+			args->audio_channels > av->audio_codec_context->ch_layout.nb_channels &&
+			!(args->flags & FLAG_QUIET)
+		)
+			fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels);
 
 		av->sample_count_mul = args->audio_channels;
 
@@ -191,13 +266,11 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 		if (avcodec_open2(av->video_codec_context, codec, NULL) < 0)
 			return false;
 
-		if (!(args->flags & FLAG_QUIET)) {
-			if (
-				decoder->video_width > av->video_codec_context->width ||
-				decoder->video_height > av->video_codec_context->height
-			)
-				fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);
-		}
+		if (
+			(decoder->video_width > av->video_codec_context->width || decoder->video_height > av->video_codec_context->height) &&
+			!(args->flags & FLAG_QUIET)
+		)
+			fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);
 
 		if (!(args->flags & FLAG_BS_IGNORE_ASPECT)) {
 			// Reduce the provided size so that it matches the input file's
@@ -205,11 +278,10 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 			double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height;
 			double dst_ratio = (double)decoder->video_width / (double)decoder->video_height;
 
-			if (src_ratio < dst_ratio) {
-				decoder->video_width = (int)((double)decoder->video_height * src_ratio + 15.0) & ~15;
-			} else {
-				decoder->video_height = (int)((double)decoder->video_width / src_ratio + 15.0) & ~15;
-			}
+			if (src_ratio < dst_ratio)
+				decoder->video_width = ((int)round((double)decoder->video_height * src_ratio) + 15) & ~15;
+			else
+				decoder->video_height = ((int)round((double)decoder->video_width / src_ratio) + 15) & ~15;
 		}
 
 		av->scaler = sws_getContext(
@@ -253,6 +325,38 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 	return true;
 }
 
+int get_av_loop_point(decoder_t *decoder, const args_t *args) {
+	decoder_state_t *av = &(decoder->state);
+
+	if (strcmp(av->format->iformat->name, "wav") == 0 && av->audio_stream != NULL) {
+		int start_offset = parse_wav_loop_point(av->format->pb, args);
+
+		if (start_offset >= 0) {
+			double pts = (double)start_offset / (double)av->audio_codec_context->sample_rate;
+			int loop_point = (int)round(pts * 1000.0);
+
+			if (!(args->flags & FLAG_QUIET))
+				fprintf(stderr, "Detected loop point (from smpl data): %d ms\n", loop_point);
+			return loop_point;
+		}
+	}
+
+	if (av->format->nb_chapters > 0) {
+		if (av->format->nb_chapters > 1 && !(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Warning: input file has %d chapters, using first one as loop point\n", av->format->nb_chapters);
+
+		AVChapter *chapter = av->format->chapters[0];
+		double pts = (double)chapter->start * (double)chapter->time_base.num / (double)chapter->time_base.den;
+		int loop_point = (int)round(pts * 1000.0);
+
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(stderr, "Detected loop point (from first chapter): %d ms\n", loop_point);
+		return loop_point;
+	}
+
+	return -1;
+}
+
 static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) {
 	decoder_state_t *av = &(decoder->state);
 
@@ -309,9 +413,8 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
 
 	// Some files seem to have timestamps starting from a negative value
 	// (but otherwise valid) for whatever reason.
-	double pts =
-		((double)av->frame->pts * (double)av->video_stream->time_base.num)
-		/ av->video_stream->time_base.den;
+	double pts = (double)av->frame->pts * (double)av->video_stream->time_base.num / (double)av->video_stream->time_base.den;
+
 #if 0
 	if (pts < 0.0)
 		return;
@@ -325,10 +428,13 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
 
 	//fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step);
 
-	// Insert duplicate frames if the frame rate of the input stream is
-	// lower than the target frame rate.
-	int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
-	if (dupe_frames < 0) dupe_frames = 0;
+	// Insert duplicate frames if the frame rate of the input stream is lower
+	// than the target frame rate.
+	int dupe_frames = (int)ceil((pts - av->video_next_pts) / pts_step);
+
+	if (dupe_frames < 0)
+		dupe_frames = 0;
+
 	decoder->video_frames = realloc(
 		decoder->video_frames,
 		(decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
diff --git a/psxavenc/decoding.h b/psxavenc/decoding.h
index 311cb69..0e83a31 100644
--- a/psxavenc/decoding.h
+++ b/psxavenc/decoding.h
@@ -25,6 +25,7 @@ freely, subject to the following restrictions:
 #pragma once
 
 #include <stdbool.h>
+#include <stdint.h>
 #include <libavutil/opt.h>
 #include <libavcodec/avcodec.h>
 #include <libavcodec/avdct.h>
@@ -74,6 +75,7 @@ enum {
 };
 
 bool open_av_data(decoder_t *decoder, const args_t *args, int flags);
+int get_av_loop_point(decoder_t *decoder, const args_t *args);
 bool poll_av_data(decoder_t *decoder);
 bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames);
 void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames);
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index cb446df..a4d96e1 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -23,6 +23,7 @@ freely, subject to the following restrictions:
 */
 
 #include <assert.h>
+#include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -104,13 +105,13 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 	else
 	 	header[0x03] = 'p';
 
-	// Version (big-endian)
+	// Version (big endian)
 	header[0x04] = 0x00;
 	header[0x05] = 0x00;
 	header[0x06] = 0x00;
 	header[0x07] = 0x20;
 
-	// Interleave (little-endian)
+	// Interleave (little endian)
 	if (args->format == FORMAT_VAGI) {
 		header[0x08] = (uint8_t)args->audio_interleave;
 		header[0x09] = (uint8_t)(args->audio_interleave >> 8);
@@ -118,24 +119,39 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 		header[0x0B] = (uint8_t)(args->audio_interleave >> 24);
 	}
 
-	// Length of data for each channel (big-endian)
+	// Length of data for each channel (big endian)
 	header[0x0C] = (uint8_t)(size_per_channel >> 24);
 	header[0x0D] = (uint8_t)(size_per_channel >> 16);
 	header[0x0E] = (uint8_t)(size_per_channel >> 8);
 	header[0x0F] = (uint8_t)size_per_channel;
 
-	// Sample rate (big-endian)
+	// Sample rate (big endian)
 	header[0x10] = (uint8_t)(args->audio_frequency >> 24);
 	header[0x11] = (uint8_t)(args->audio_frequency >> 16);
 	header[0x12] = (uint8_t)(args->audio_frequency >> 8);
 	header[0x13] = (uint8_t)args->audio_frequency;
 
-	// Number of channels (little-endian)
+	// Loop point in bytes (little endian, non-standard)
+	if (args->format == FORMAT_VAGI && args->audio_loop_point >= 0) {
+		int loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000);
+
+		if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY))
+			loop_start_block++;
+
+		int loop_point = loop_start_block * PSX_AUDIO_SPU_BLOCK_SIZE;
+		header[0x14] = (uint8_t)loop_point;
+		header[0x15] = (uint8_t)(loop_point >> 8);
+		header[0x16] = (uint8_t)(loop_point >> 16);
+		header[0x17] = (uint8_t)(loop_point >> 24);
+	}
+
+	// Number of channels (little endian, non-standard)
 	header[0x1E] = (uint8_t)args->audio_channels;
 	header[0x1F] = 0x00;
 
 	// Filename
 	int name_offset = strlen(args->output_file);
+
 	while (
 		name_offset > 0 &&
 		args->output_file[name_offset - 1] != '/' &&
@@ -235,7 +251,7 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 
 		if (block_count == loop_start_block)
 			block[1] |= PSX_AUDIO_SPU_LOOP_START;
-		if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input)
+		if ((args->flags & FLAG_SPU_ENABLE_LOOP) && decoder->end_of_input)
 			block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
 
 		retire_av_data(decoder, samples_length, 0);
@@ -253,10 +269,10 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 		}
 	}
 
-	if (!(args->flags & FLAG_SPU_LOOP_END)) {
+	if (!(args->flags & FLAG_SPU_ENABLE_LOOP)) {
 		// Insert trailing looping block
 		memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
-		block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+		block[1] = PSX_AUDIO_SPU_LOOP_TRAP;
 
 		fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
 		block_count++;
@@ -326,14 +342,14 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 			if (length > 0) {
 				uint8_t *last_block = chunk_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE;
 
-				if (args->flags & FLAG_SPU_LOOP_END) {
+				if (args->flags & FLAG_SPU_ENABLE_LOOP) {
 					last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT;
 				} else if (decoder->end_of_input) {
 					// HACK: the trailing block should in theory be appended to
 					// the existing data, but it's easier to just zerofill and
 					// repurpose the last encoded block
 					memset(last_block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
-					last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+					last_block[1] = PSX_AUDIO_SPU_LOOP_TRAP;
 				}
 			}
 		}
@@ -420,7 +436,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 	encoder.state.quant_scale_sum = 0;
 
 	// FIXME: this needs an extra frame to prevent A/V desync
-	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
+	int frames_needed = (int)ceil((double)video_sectors_per_block / frame_size);
 
 	if (frames_needed < 2)
 		frames_needed = 2;
@@ -542,7 +558,7 @@ void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) {
 	encoder.state.quant_scale_sum = 0;
 
 	// FIXME: this needs an extra frame to prevent A/V desync
-	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
+	int frames_needed = (int)ceil((double)video_sectors_per_block / frame_size);
 
 	if (frames_needed < 2)
 		frames_needed = 2;
diff --git a/psxavenc/main.c b/psxavenc/main.c
index 9e584c2..0dd4a49 100644
--- a/psxavenc/main.c
+++ b/psxavenc/main.c
@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
 
 Copyright (c) 2019, 2020 Adrian "asie" Siekierka
 Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
+Copyright (c) 2023, 2025 spicyjpeg
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -72,6 +72,7 @@ int main(int argc, const char **argv) {
 
 	if (output == NULL) {
 		fprintf(stderr, "Failed to open output file: %s\n", args.output_file);
+		close_av_data(&decoder);
 		return 1;
 	}
 
@@ -94,6 +95,13 @@ int main(int argc, const char **argv) {
 
 		case FORMAT_SPU:
 		case FORMAT_VAG:
+			if (!(args.flags & FLAG_OVERRIDE_LOOP_POINT)) {
+				args.audio_loop_point = get_av_loop_point(&decoder, &args);
+
+				if (args.audio_loop_point >= 0)
+					args.flags |= FLAG_SPU_ENABLE_LOOP;
+			}
+
 			if (!(args.flags & FLAG_QUIET))
 				fprintf(
 					stderr,
@@ -106,6 +114,9 @@ int main(int argc, const char **argv) {
 
 		case FORMAT_SPUI:
 		case FORMAT_VAGI:
+			if (!(args.flags & FLAG_OVERRIDE_LOOP_POINT))
+				args.audio_loop_point = get_av_loop_point(&decoder, &args);
+
 			if (!(args.flags & FLAG_QUIET))
 				fprintf(
 					stderr,
@@ -121,7 +132,7 @@ int main(int argc, const char **argv) {
 		case FORMAT_STR:
 		case FORMAT_STRCD:
 			if (!(args.flags & FLAG_QUIET)) {
-				if (decoder.state.audio_stream)
+				if (decoder.state.audio_stream != NULL)
 					fprintf(
 						stderr,
 						"Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
@@ -152,7 +163,7 @@ int main(int argc, const char **argv) {
 
 		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
-				if (decoder.state.audio_stream)
+				if (decoder.state.audio_stream != NULL)
 					fprintf(
 						stderr,
 						"Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",