first commit

2026-02-04 03:58:31 +00:00 · 2023-05-15 16:17:16 +02:00
commit aad1340bf4
11 changed files with 2021 additions and 0 deletions
--- a/psxavenc/cdrom.c
+++ b/psxavenc/cdrom.c
@@ -0,0 +1,53 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "common.h"
+
+void init_sector_buffer_video(uint8_t *buffer, settings_t *settings) {
+	memset(buffer,0,2352);
+	memset(buffer+0x001,0xFF,10);
+
+	buffer[0x00F] = 0x02;
+	buffer[0x010] = settings->file_number;
+	buffer[0x011] = settings->channel_number & 0x1F;
+	buffer[0x012] = 0x08 | 0x40;
+	buffer[0x013] = 0x00;
+	memcpy(buffer + 0x014, buffer + 0x010, 4);
+}
+
+void calculate_edc_data(uint8_t *buffer)
+{
+	uint32_t edc = 0;
+	for (int i = 0x010; i < 0x818; i++) {
+		edc ^= 0xFF&(uint32_t)buffer[i];
+		for (int ibit = 0; ibit < 8; ibit++) {
+			edc = (edc>>1)^(0xD8018001*(edc&0x1));
+		}
+	}
+	buffer[0x818] = (uint8_t)(edc);
+	buffer[0x819] = (uint8_t)(edc >> 8);
+	buffer[0x81A] = (uint8_t)(edc >> 16);
+	buffer[0x81B] = (uint8_t)(edc >> 24);
+
+	// TODO: ECC
+}
--- a/psxavenc/common.h
+++ b/psxavenc/common.h
@@ -0,0 +1,124 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <assert.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libavutil/opt.h>
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libswscale/swscale.h>
+#include <libswresample/swresample.h>
+#include <libpsxav.h>
+
+#define FORMAT_XA 0
+#define FORMAT_XACD 1
+#define FORMAT_SPU 2
+#define FORMAT_STR2 3
+
+#define MAX_UNMUXED_BLOCKS 9
+typedef struct {
+	int frame_index;
+	int frame_block_index;
+	int frame_block_count;
+	int frame_block_base_overflow;
+	int frame_block_overflow_num;
+	int frame_block_overflow_den;
+	uint16_t bits_value;
+	int bits_left;
+	uint8_t unmuxed[2016*MAX_UNMUXED_BLOCKS];
+	int bytes_used;
+	int blocks_used;
+	int uncomp_hwords_used;
+	int quant_scale;
+	int32_t *dct_block_lists[6];
+} vid_encoder_state_t;
+
+typedef struct {
+	int video_frame_src_size;
+	int video_frame_dst_size;
+	int audio_stream_index;
+	int video_stream_index;
+	AVFormatContext* format;
+	AVStream* audio_stream;
+	AVStream* video_stream;
+	AVCodecContext* audio_codec_context;
+	AVCodecContext* video_codec_context;
+	AVCodec* audio_codec;
+	AVCodec* video_codec;
+	struct SwrContext* resampler;
+	struct SwsContext* scaler;
+	AVFrame* frame;
+
+	int sample_count_mul;
+
+	double video_next_pts;
+} av_decoder_state_t;
+
+typedef struct {
+	int format; // FORMAT_*
+	bool stereo; // false or true
+	int frequency; // 18900 or 37800 Hz
+	int bits_per_sample; // 4 or 8
+	int file_number; // 00-FF
+	int channel_number; // 00-1F
+
+	int video_width;
+	int video_height;
+	int video_fps_num; // FPS numerator
+	int video_fps_den; // FPS denominator
+
+	int16_t *audio_samples;
+	int audio_sample_count;
+	uint8_t *video_frames;
+	int video_frame_count;
+
+	av_decoder_state_t decoder_state_av;
+
+	vid_encoder_state_t state_vid;
+} settings_t;
+
+// cdrom.c
+void init_sector_buffer_video(uint8_t *buffer, settings_t *settings);
+void calculate_edc_data(uint8_t *buffer);
+
+// decoding.c
+bool open_av_data(const char *filename, settings_t *settings);
+bool poll_av_data(settings_t *settings);
+bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames);
+void pull_all_av_data(settings_t *settings);
+void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames);
+void close_av_data(settings_t *settings);
+
+// filefmt.c
+void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output);
+void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output);
+void encode_file_str(settings_t *settings, FILE *output);
+
+// mdec.c
+void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings);
--- a/psxavenc/decoding.c
+++ b/psxavenc/decoding.c
@@ -0,0 +1,350 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "common.h"
+
+static void poll_av_packet(settings_t *settings, AVPacket *packet);
+
+int decode_audio_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
+	int ret;
+
+	if (packet != NULL) {
+		ret = avcodec_send_packet(codec, packet);
+		if (ret != 0) {
+			return 0;
+		}
+	}
+
+	ret = avcodec_receive_frame(codec, frame);
+	if (ret >= 0) {
+		*frame_size = ret;
+		return 1;
+	} else {
+		return ret == AVERROR(EAGAIN) ? 1 : 0;
+	}
+}
+
+int decode_video_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
+	int ret;
+
+	if (packet != NULL) {
+		ret = avcodec_send_packet(codec, packet);
+		if (ret != 0) {
+			return 0;
+		}
+	}
+
+	ret = avcodec_receive_frame(codec, frame);
+	if (ret >= 0) {
+		*frame_size = ret;
+		return 1;
+	} else {
+		return ret == AVERROR(EAGAIN) ? 1 : 0;
+	}
+}
+
+bool open_av_data(const char *filename, settings_t *settings)
+{
+	AVPacket packet;
+
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+	av->video_next_pts = 0.0;
+	av->frame = NULL;
+	av->video_frame_src_size = 0;
+	av->video_frame_dst_size = 0;
+	av->audio_stream_index = -1;
+	av->video_stream_index = -1;
+	av->format = NULL;
+	av->audio_stream = NULL;
+	av->video_stream = NULL;
+	av->audio_codec_context = NULL;
+	av->video_codec_context = NULL;
+	av->audio_codec = NULL;
+	av->video_codec = NULL;
+	av->resampler = NULL;
+	av->scaler = NULL;
+
+	av->format = avformat_alloc_context();
+	if (avformat_open_input(&(av->format), filename, NULL, NULL)) {
+		return false;
+	}
+	if (avformat_find_stream_info(av->format, NULL) < 0) {
+		return false;
+	}
+
+	for (int i = 0; i < av->format->nb_streams; i++) {
+		if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+			if (av->audio_stream_index >= 0) {
+				fprintf(stderr, "open_av_data: found multiple audio tracks?\n");
+				return false;
+			}
+			av->audio_stream_index = i;
+		}
+	}
+	if (av->audio_stream_index == -1) {
+		return false;
+	}
+
+	for (int i = 0; i < av->format->nb_streams; i++) {
+		if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
+			if (av->video_stream_index >= 0) {
+				fprintf(stderr, "open_av_data: found multiple video tracks?\n");
+				return false;
+			}
+			av->video_stream_index = i;
+		}
+	}
+
+	av->audio_stream = av->format->streams[av->audio_stream_index];
+	av->video_stream = (av->video_stream_index != -1 ? av->format->streams[av->video_stream_index] : NULL);
+	av->audio_codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id);
+	av->audio_codec_context = avcodec_alloc_context3(av->audio_codec);
+	if (av->audio_codec_context == NULL) {
+		return false;
+	}
+	if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) {
+		return false;
+	}
+	if (avcodec_open2(av->audio_codec_context, av->audio_codec, NULL) < 0) {
+		return false;
+	}
+
+	av->resampler = swr_alloc();
+	av_opt_set_int(av->resampler, "in_channel_count", av->audio_codec_context->channels, 0);
+	av_opt_set_int(av->resampler, "in_channel_layout", av->audio_codec_context->channel_layout, 0);
+	av_opt_set_int(av->resampler, "in_sample_rate", av->audio_codec_context->sample_rate, 0);
+	av_opt_set_sample_fmt(av->resampler, "in_sample_fmt", av->audio_codec_context->sample_fmt, 0);
+
+	av->sample_count_mul = settings->stereo ? 2 : 1;
+	av_opt_set_int(av->resampler, "out_channel_count", settings->stereo ? 2 : 1, 0);
+	av_opt_set_int(av->resampler, "out_channel_layout", settings->stereo ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO, 0);
+	av_opt_set_int(av->resampler, "out_sample_rate", settings->frequency, 0);
+	av_opt_set_sample_fmt(av->resampler, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+
+	if (swr_init(av->resampler) < 0) {
+		return false;
+	}
+
+	if (av->video_stream != NULL) {
+		av->video_codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id);
+		av->video_codec_context = avcodec_alloc_context3(av->video_codec);
+		if(av->video_codec_context == NULL) {
+			return false;
+		}
+		if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) {
+			return false;
+		}
+		if (avcodec_open2(av->video_codec_context, av->video_codec, NULL) < 0) {
+			return false;
+		}
+
+		av->scaler = sws_getContext(
+			av->video_codec_context->width,
+			av->video_codec_context->height,
+			av->video_codec_context->pix_fmt,
+			settings->video_width,
+			settings->video_height,
+			AV_PIX_FMT_RGBA,
+			SWS_BICUBIC,
+			NULL,
+			NULL,
+			NULL);
+		
+		av->video_frame_src_size = 4*av->video_codec_context->width*av->video_codec_context->height;
+		av->video_frame_dst_size = 4*settings->video_width*settings->video_height;
+	}
+
+	av_init_packet(&packet);
+	av->frame = av_frame_alloc();
+	if (av->frame == NULL) {
+		return false;
+	}
+
+	settings->audio_samples = NULL;
+	settings->audio_sample_count = 0;
+	settings->video_frames = NULL;
+	settings->video_frame_count = 0;
+
+	return true;
+}
+
+static void poll_av_packet_audio(settings_t *settings, AVPacket *packet)
+{
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+
+	int frame_size, frame_sample_count;
+	uint8_t *buffer[1];
+
+	if (decode_audio_frame(av->audio_codec_context, av->frame, &frame_size, packet)) {
+		size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples);
+		buffer[0] = malloc(buffer_size);
+		memset(buffer[0], 0, buffer_size);
+		frame_sample_count = swr_convert(av->resampler, buffer, av->frame->nb_samples, (const uint8_t**)av->frame->data, av->frame->nb_samples);
+		settings->audio_samples = realloc(settings->audio_samples, (settings->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t));
+		memmove(&(settings->audio_samples[settings->audio_sample_count]), buffer[0], sizeof(int16_t) * frame_sample_count * av->sample_count_mul);
+		settings->audio_sample_count += frame_sample_count * av->sample_count_mul;
+		free(buffer[0]);
+	}
+}
+
+static void poll_av_packet_video(settings_t *settings, AVPacket *packet)
+{
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+
+	int frame_size;
+
+	if (decode_video_frame(av->video_codec_context, av->frame, &frame_size, packet)) {
+		double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den;
+		//fprintf(stderr, "%f\n", pts);
+		// Drop frames with negative PTS values
+		if(pts < 0.0) {
+			// do nothing
+			return;
+		}
+		if((settings->video_frame_count) >= 1 && pts < av->video_next_pts) {
+			// do nothing
+			return;
+		}
+		if((settings->video_frame_count) < 1) {
+			av->video_next_pts = pts;
+		}
+
+		double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num;
+		//fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step);
+		av->video_next_pts += pts_step;
+		// FIXME: increasing framerate doesn't fill it in with duplicate frames!
+		assert(av->video_next_pts > pts);
+		//size_t buffer_size = frame_count_mul;
+		//buffer[0] = malloc(buffer_size);
+		//memset(buffer[0], 0, buffer_size);
+		settings->video_frames = realloc(settings->video_frames, (settings->video_frame_count + 1) * av->video_frame_dst_size);
+		int dst_strides[1] = {
+			settings->video_width*4,
+		};
+		uint8_t *dst_pointers[1] = {
+			(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count),
+		};
+		sws_scale(av->scaler, av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides);
+
+		settings->video_frame_count += 1;
+		//free(buffer[0]);
+	}
+}
+
+static void poll_av_packet(settings_t *settings, AVPacket *packet)
+{
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+
+	if (packet->stream_index == av->audio_stream_index) {
+		poll_av_packet_audio(settings, packet);
+	}
+	else if (packet->stream_index == av->video_stream_index) {
+		poll_av_packet_video(settings, packet);
+	}
+}
+
+bool poll_av_data(settings_t *settings)
+{
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+	AVPacket packet;
+
+	if (av_read_frame(av->format, &packet) >= 0) {
+		poll_av_packet(settings, &packet);
+		av_packet_unref(&packet);
+		return true;
+	} else {
+		// out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier
+		memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t));
+
+		return false;
+	}
+}
+
+bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames)
+{
+	//
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+
+
+	while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) {
+		//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
+		if(!poll_av_data(settings)) {
+			//fprintf(stderr, "cannot ensure\n");
+			return false;
+		}
+	}
+	//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
+
+	return true;
+}
+
+void pull_all_av_data(settings_t *settings)
+{
+	while (poll_av_data(settings)) {
+		// do nothing
+	}
+
+	fprintf(stderr, "Loaded %d samples.\n", settings->audio_sample_count);
+	fprintf(stderr, "Loaded %d frames.\n", settings->video_frame_count);
+}
+
+void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames)
+{
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+
+	//fprintf(stderr, "retire %d -> %d, %d -> %d\n", settings->audio_sample_count, retired_audio_samples, settings->video_frame_count, retired_video_frames);
+	assert(retired_audio_samples <= settings->audio_sample_count);
+	assert(retired_video_frames <= settings->video_frame_count);
+
+	int sample_size = sizeof(int16_t);
+	if (settings->audio_sample_count > retired_audio_samples) {
+		memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size);
+		settings->audio_sample_count -= retired_audio_samples;
+	}
+
+	int frame_size = av->video_frame_dst_size;
+	if (settings->video_frame_count > retired_video_frames) {
+		memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size);
+		settings->video_frame_count -= retired_video_frames;
+	}
+}
+
+void close_av_data(settings_t *settings)
+{
+	av_decoder_state_t* av = &(settings->decoder_state_av);
+
+	av_frame_free(&(av->frame));
+	swr_free(&(av->resampler));
+	avcodec_close(av->audio_codec_context);
+	avcodec_free_context(&(av->audio_codec_context));
+	avformat_free_context(av->format);
+
+	if(settings->audio_samples != NULL) {
+		free(settings->audio_samples);
+		settings->audio_samples = NULL;
+	}
+	if(settings->video_frames != NULL) {
+		free(settings->video_frames);
+		settings->video_frames = NULL;
+	}
+}
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -0,0 +1,136 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "common.h"
+#include "libpsxav.h"
+
+static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) {
+	psx_audio_xa_settings_t new_settings;
+	new_settings.bits_per_sample = settings->bits_per_sample;
+	new_settings.frequency = settings->frequency;
+	new_settings.stereo = settings->stereo;
+	new_settings.file_number = settings->file_number;
+	new_settings.channel_number = settings->channel_number;
+
+	switch (settings->format) {
+		case FORMAT_XA:
+			new_settings.format = PSX_AUDIO_XA_FORMAT_XA;
+			break;
+		default:
+			new_settings.format = PSX_AUDIO_XA_FORMAT_XACD;
+			break;
+	}
+
+	return new_settings;
+};
+
+void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) {
+	psx_audio_encoder_state_t audio_state;	
+	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
+	uint8_t buffer[16];
+
+	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
+
+	for (int i = 0; i < audio_sample_count; i += audio_samples_per_block) {
+		int samples_length = audio_sample_count - i;
+		if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block;
+		int length = psx_audio_spu_encode(&audio_state, audio_samples + i, samples_length, buffer);
+		if (i == 0) {
+			buffer[1] = PSX_AUDIO_SPU_LOOP_START;
+		} else if ((i + audio_samples_per_block) >= audio_sample_count) {
+			buffer[1] = PSX_AUDIO_SPU_LOOP_END;
+		}
+		fwrite(buffer, length, 1, output);
+	}
+}
+
+void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) {
+	psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
+	psx_audio_encoder_state_t audio_state;	
+	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
+	int av_sample_mul = settings->stereo ? 2 : 1;
+	uint8_t buffer[2352];
+
+	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
+
+	for (int i = 0; i < audio_sample_count; i += audio_samples_per_sector) {
+		int samples_length = audio_sample_count - i;
+		if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
+		int length = psx_audio_xa_encode(xa_settings, &audio_state, audio_samples + (i * av_sample_mul), samples_length, buffer);
+		if ((i + audio_samples_per_sector) >= audio_sample_count) {
+			psx_audio_xa_encode_finalize(xa_settings, buffer, length);
+		}
+		fwrite(buffer, length, 1, output);
+	}
+}
+
+void encode_file_str(settings_t *settings, FILE *output) {
+	uint8_t buffer[2352*8];
+	psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
+	psx_audio_encoder_state_t audio_state;	
+	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
+	int av_sample_mul = settings->stereo ? 2 : 1;
+
+	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
+
+	settings->state_vid.frame_index = 0;
+	settings->state_vid.bits_value = 0;
+	settings->state_vid.bits_left = 16;
+	settings->state_vid.frame_block_index = 0;
+	settings->state_vid.frame_block_count = 0;
+
+	settings->state_vid.frame_block_overflow_num = 0;
+
+	// Number of total sectors per second: 150
+	// Proportion of sectors for video due to A/V interleave: 7/8
+	// 15FPS = (150*7/8/15) = 8.75 blocks per frame
+	settings->state_vid.frame_block_base_overflow = 150*7*settings->video_fps_den;
+	settings->state_vid.frame_block_overflow_den = 8*settings->video_fps_num;
+	//fprintf(stderr, "%f\n", ((double)settings->state_vid.frame_block_base_overflow)/((double)settings->state_vid.frame_block_overflow_den)); abort();
+
+	// FIXME: this needs an extra frame to prevent A/V desync
+	const int frames_needed = 2;
+	for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*av_sample_mul*frames_needed, 1*frames_needed); j+=18) {
+		psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, audio_samples_per_sector, buffer + 2352 * 7);
+		
+		// TODO: the final buffer
+		for(int k = 0; k < 7; k++) {
+			init_sector_buffer_video(buffer + 2352*k, settings);
+		}
+		encode_block_str(settings->video_frames, settings->video_frame_count, buffer, settings);
+		for(int k = 0; k < 8; k++) {
+			int t = k + (j/18)*8 + 75*2;
+
+			// Put the time in
+			buffer[0x00C + 2352*k] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
+			buffer[0x00D + 2352*k] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
+			buffer[0x00E + 2352*k] = ((t%75)%10)|(((t%75)/10)<<4);
+
+			if(k != 7) {
+				calculate_edc_data(buffer + 2352*k);
+			}
+		}
+		retire_av_data(settings, audio_samples_per_sector*av_sample_mul, 0);
+		fwrite(buffer, 2352*8, 1, output);
+	}
+}
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -0,0 +1,644 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "common.h"
+
+// high 8 bits = bit count
+// low 24 bits = value
+uint32_t huffman_encoding_map[0x10000];
+bool dct_done_init = false;
+
+#define MAKE_HUFFMAN_PAIR(zeroes, value) (((zeroes)<<10)|((+(value))&0x3FF)),(((zeroes)<<10)|((-(value))&0x3FF))
+const struct {
+	int c_bits;
+	uint32_t c_value;
+	uint16_t u_hword_pos;
+	uint16_t u_hword_neg;
+} huffman_lookup[] = {
+	// Fuck this Huffman tree in particular --GM
+	2,0x3,MAKE_HUFFMAN_PAIR(0,1),
+	3,0x3,MAKE_HUFFMAN_PAIR(1,1),
+	4,0x4,MAKE_HUFFMAN_PAIR(0,2),
+	4,0x5,MAKE_HUFFMAN_PAIR(2,1),
+	5,0x05,MAKE_HUFFMAN_PAIR(0,3),
+	5,0x06,MAKE_HUFFMAN_PAIR(4,1),
+	5,0x07,MAKE_HUFFMAN_PAIR(3,1),
+	6,0x04,MAKE_HUFFMAN_PAIR(7,1),
+	6,0x05,MAKE_HUFFMAN_PAIR(6,1),
+	6,0x06,MAKE_HUFFMAN_PAIR(1,2),
+	6,0x07,MAKE_HUFFMAN_PAIR(5,1),
+	7,0x04,MAKE_HUFFMAN_PAIR(2,2),
+	7,0x05,MAKE_HUFFMAN_PAIR(9,1),
+	7,0x06,MAKE_HUFFMAN_PAIR(0,4),
+	7,0x07,MAKE_HUFFMAN_PAIR(8,1),
+	8,0x20,MAKE_HUFFMAN_PAIR(13,1),
+	8,0x21,MAKE_HUFFMAN_PAIR(0,6),
+	8,0x22,MAKE_HUFFMAN_PAIR(12,1),
+	8,0x23,MAKE_HUFFMAN_PAIR(11,1),
+	8,0x24,MAKE_HUFFMAN_PAIR(3,2),
+	8,0x25,MAKE_HUFFMAN_PAIR(1,3),
+	8,0x26,MAKE_HUFFMAN_PAIR(0,5),
+	8,0x27,MAKE_HUFFMAN_PAIR(10,1),
+	10,0x008,MAKE_HUFFMAN_PAIR(16,1),
+	10,0x009,MAKE_HUFFMAN_PAIR(5,2),
+	10,0x00A,MAKE_HUFFMAN_PAIR(0,7),
+	10,0x00B,MAKE_HUFFMAN_PAIR(2,3),
+	10,0x00C,MAKE_HUFFMAN_PAIR(1,4),
+	10,0x00D,MAKE_HUFFMAN_PAIR(15,1),
+	10,0x00E,MAKE_HUFFMAN_PAIR(14,1),
+	10,0x00F,MAKE_HUFFMAN_PAIR(4,2),
+	12,0x010,MAKE_HUFFMAN_PAIR(0,11),
+	12,0x011,MAKE_HUFFMAN_PAIR(8,2),
+	12,0x012,MAKE_HUFFMAN_PAIR(4,3),
+	12,0x013,MAKE_HUFFMAN_PAIR(0,10),
+	12,0x014,MAKE_HUFFMAN_PAIR(2,4),
+	12,0x015,MAKE_HUFFMAN_PAIR(7,2),
+	12,0x016,MAKE_HUFFMAN_PAIR(21,1),
+	12,0x017,MAKE_HUFFMAN_PAIR(20,1),
+	12,0x018,MAKE_HUFFMAN_PAIR(0,9),
+	12,0x019,MAKE_HUFFMAN_PAIR(19,1),
+	12,0x01A,MAKE_HUFFMAN_PAIR(18,1),
+	12,0x01B,MAKE_HUFFMAN_PAIR(1,5),
+	12,0x01C,MAKE_HUFFMAN_PAIR(3,3),
+	12,0x01D,MAKE_HUFFMAN_PAIR(0,8),
+	12,0x01E,MAKE_HUFFMAN_PAIR(6,2),
+	12,0x01F,MAKE_HUFFMAN_PAIR(17,1),
+	13,0x0010,MAKE_HUFFMAN_PAIR(10,2),
+	13,0x0011,MAKE_HUFFMAN_PAIR(9,2),
+	13,0x0012,MAKE_HUFFMAN_PAIR(5,3),
+	13,0x0013,MAKE_HUFFMAN_PAIR(3,4),
+	13,0x0014,MAKE_HUFFMAN_PAIR(2,5),
+	13,0x0015,MAKE_HUFFMAN_PAIR(1,7),
+	13,0x0016,MAKE_HUFFMAN_PAIR(1,6),
+	13,0x0017,MAKE_HUFFMAN_PAIR(0,15),
+	13,0x0018,MAKE_HUFFMAN_PAIR(0,14),
+	13,0x0019,MAKE_HUFFMAN_PAIR(0,13),
+	13,0x001A,MAKE_HUFFMAN_PAIR(0,12),
+	13,0x001B,MAKE_HUFFMAN_PAIR(26,1),
+	13,0x001C,MAKE_HUFFMAN_PAIR(25,1),
+	13,0x001D,MAKE_HUFFMAN_PAIR(24,1),
+	13,0x001E,MAKE_HUFFMAN_PAIR(23,1),
+	13,0x001F,MAKE_HUFFMAN_PAIR(22,1),
+	14,0x0010,MAKE_HUFFMAN_PAIR(0,31),
+	14,0x0011,MAKE_HUFFMAN_PAIR(0,30),
+	14,0x0012,MAKE_HUFFMAN_PAIR(0,29),
+	14,0x0013,MAKE_HUFFMAN_PAIR(0,28),
+	14,0x0014,MAKE_HUFFMAN_PAIR(0,27),
+	14,0x0015,MAKE_HUFFMAN_PAIR(0,26),
+	14,0x0016,MAKE_HUFFMAN_PAIR(0,25),
+	14,0x0017,MAKE_HUFFMAN_PAIR(0,24),
+	14,0x0018,MAKE_HUFFMAN_PAIR(0,23),
+	14,0x0019,MAKE_HUFFMAN_PAIR(0,22),
+	14,0x001A,MAKE_HUFFMAN_PAIR(0,21),
+	14,0x001B,MAKE_HUFFMAN_PAIR(0,20),
+	14,0x001C,MAKE_HUFFMAN_PAIR(0,19),
+	14,0x001D,MAKE_HUFFMAN_PAIR(0,18),
+	14,0x001E,MAKE_HUFFMAN_PAIR(0,17),
+	14,0x001F,MAKE_HUFFMAN_PAIR(0,16),
+	15,0x0010,MAKE_HUFFMAN_PAIR(0,40),
+	15,0x0011,MAKE_HUFFMAN_PAIR(0,39),
+	15,0x0012,MAKE_HUFFMAN_PAIR(0,38),
+	15,0x0013,MAKE_HUFFMAN_PAIR(0,37),
+	15,0x0014,MAKE_HUFFMAN_PAIR(0,36),
+	15,0x0015,MAKE_HUFFMAN_PAIR(0,35),
+	15,0x0016,MAKE_HUFFMAN_PAIR(0,34),
+	15,0x0017,MAKE_HUFFMAN_PAIR(0,33),
+	15,0x0018,MAKE_HUFFMAN_PAIR(0,32),
+	15,0x0019,MAKE_HUFFMAN_PAIR(1,14),
+	15,0x001A,MAKE_HUFFMAN_PAIR(1,13),
+	15,0x001B,MAKE_HUFFMAN_PAIR(1,12),
+	15,0x001C,MAKE_HUFFMAN_PAIR(1,11),
+	15,0x001D,MAKE_HUFFMAN_PAIR(1,10),
+	15,0x001E,MAKE_HUFFMAN_PAIR(1,9),
+	15,0x001F,MAKE_HUFFMAN_PAIR(1,8),
+	16,0x0010,MAKE_HUFFMAN_PAIR(1,18),
+	16,0x0011,MAKE_HUFFMAN_PAIR(1,17),
+	16,0x0012,MAKE_HUFFMAN_PAIR(1,16),
+	16,0x0013,MAKE_HUFFMAN_PAIR(1,15),
+	16,0x0014,MAKE_HUFFMAN_PAIR(6,3),
+	16,0x0015,MAKE_HUFFMAN_PAIR(16,2),
+	16,0x0016,MAKE_HUFFMAN_PAIR(15,2),
+	16,0x0017,MAKE_HUFFMAN_PAIR(14,2),
+	16,0x0018,MAKE_HUFFMAN_PAIR(13,2),
+	16,0x0019,MAKE_HUFFMAN_PAIR(12,2),
+	16,0x001A,MAKE_HUFFMAN_PAIR(11,2),
+	16,0x001B,MAKE_HUFFMAN_PAIR(31,1),
+	16,0x001C,MAKE_HUFFMAN_PAIR(30,1),
+	16,0x001D,MAKE_HUFFMAN_PAIR(29,1),
+	16,0x001E,MAKE_HUFFMAN_PAIR(28,1),
+	16,0x001F,MAKE_HUFFMAN_PAIR(27,1),
+};
+#undef MAKE_HUFFMAN_PAIR
+
+const uint8_t quant_dec[8*8] = {
+	 2, 16, 19, 22, 26, 27, 29, 34,
+	16, 16, 22, 24, 27, 29, 34, 37,
+	19, 22, 26, 27, 29, 34, 34, 38,
+	22, 22, 26, 27, 29, 34, 37, 40,
+	22, 26, 27, 29, 32, 35, 40, 48,
+	26, 27, 29, 32, 35, 40, 48, 58,
+	26, 27, 29, 34, 38, 46, 56, 69,
+	27, 29, 35, 38, 46, 56, 69, 83,
+};
+
+const uint8_t dct_zigzag_table[8*8] = {
+	0x00,0x01,0x05,0x06,0x0E,0x0F,0x1B,0x1C,
+	0x02,0x04,0x07,0x0D,0x10,0x1A,0x1D,0x2A,
+	0x03,0x08,0x0C,0x11,0x19,0x1E,0x29,0x2B,
+	0x09,0x0B,0x12,0x18,0x1F,0x28,0x2C,0x35,
+	0x0A,0x13,0x17,0x20,0x27,0x2D,0x34,0x36,
+	0x14,0x16,0x21,0x26,0x2E,0x33,0x37,0x3C,
+	0x15,0x22,0x25,0x2F,0x32,0x38,0x3B,0x3D,
+	0x23,0x24,0x30,0x31,0x39,0x3A,0x3E,0x3F,
+};
+
+const uint8_t dct_zagzig_table[8*8] = {
+	0x00,0x01,0x08,0x10,0x09,0x02,0x03,0x0A,
+	0x11,0x18,0x20,0x19,0x12,0x0B,0x04,0x05,
+	0x0C,0x13,0x1A,0x21,0x28,0x30,0x29,0x22,
+	0x1B,0x14,0x0D,0x06,0x07,0x0E,0x15,0x1C,
+	0x23,0x2A,0x31,0x38,0x39,0x32,0x2B,0x24,
+	0x1D,0x16,0x0F,0x17,0x1E,0x25,0x2C,0x33,
+	0x3A,0x3B,0x34,0x2D,0x26,0x1F,0x27,0x2E,
+	0x35,0x3C,0x3D,0x36,0x2F,0x37,0x3E,0x3F,
+};
+
+const int16_t dct_scale_table[8*8] = {
+	+0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82,
+	+0x7D8A, +0x6A6D, +0x471C, +0x18F8, -0x18F9, -0x471D, -0x6A6E, -0x7D8B,
+	+0x7641, +0x30FB, -0x30FC, -0x7642, -0x7642, -0x30FC, +0x30FB, +0x7641,
+	+0x6A6D, -0x18F9, -0x7D8B, -0x471D, +0x471C, +0x7D8A, +0x18F8, -0x6A6E,
+	+0x5A82, -0x5A83, -0x5A83, +0x5A82, +0x5A82, -0x5A83, -0x5A83, +0x5A82,
+	+0x471C, -0x7D8B, +0x18F8, +0x6A6D, -0x6A6E, -0x18F9, +0x7D8A, -0x471D,
+	+0x30FB, -0x7642, +0x7641, -0x30FC, -0x30FC, +0x7641, -0x7642, +0x30FB,
+	+0x18F8, -0x471D, +0x6A6D, -0x7D8B, +0x7D8A, -0x6A6E, +0x471C, -0x18F9,
+};
+
+static void init_dct_data(void)
+{
+	for(int i = 0; i <= 0xFFFF; i++) {
+		huffman_encoding_map[i] = ((6+16)<<24)|((0x01<<16)|(i));
+	}
+
+	for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
+		int bits = huffman_lookup[i].c_bits+1;
+		uint32_t base_value = huffman_lookup[i].c_value;
+		huffman_encoding_map[huffman_lookup[i].u_hword_pos] = (bits<<24)|(base_value<<1)|0;
+		huffman_encoding_map[huffman_lookup[i].u_hword_neg] = (bits<<24)|(base_value<<1)|1;
+	}
+
+}
+
+static void flush_bits(vid_encoder_state_t *state)
+{
+	if(state->bits_left < 16) {
+		assert(state->bytes_used < sizeof(state->unmuxed));
+		state->unmuxed[state->bytes_used++] = (uint8_t)state->bits_value;
+		assert(state->bytes_used < sizeof(state->unmuxed));
+		assert(state->bytes_used < 2016*state->frame_block_count);
+		state->unmuxed[state->bytes_used++] = (uint8_t)(state->bits_value>>8);
+	}
+	state->bits_left = 16;
+	state->bits_value = 0;
+}
+
+static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
+{
+	assert(val < (1<<bits));
+
+	// FIXME: for some reason the main logic breaks when bits > 16
+	// and I have no idea why, so I have to split this up --GM
+	if (bits > 16) {
+		encode_bits(state, bits-16, val>>16);
+		bits = 16;
+		val &= 0xFFFF;
+	}
+
+	if (state->bits_left == 0) {
+		flush_bits(state);
+	}
+
+	while (bits > state->bits_left) {
+		// Bits need truncating
+		uint32_t outval = val;
+		outval >>= bits - state->bits_left;
+		assert(outval < (1<<16));
+		uint16_t old_value = state->bits_value;
+		assert((state->bits_value & outval) == 0);
+		state->bits_value |= (uint16_t)outval;
+		//fprintf(stderr, "trunc %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, old_value, state->bits_value);
+		bits -= state->bits_left;
+		uint32_t mask = (1<<bits)-1;
+		val &= mask;
+		assert(mask >= 1);
+		assert(val < (1<<bits));
+		flush_bits(state);
+	}
+
+	if (bits >= 1) {
+		assert(bits <= 16);
+		// Bits may need shifting into place
+		uint32_t outval = val;
+		outval <<= state->bits_left - bits;
+		assert(outval < (1<<16));
+		uint16_t old_value = state->bits_value;
+		assert((state->bits_value & outval) == 0);
+		state->bits_value |= (uint16_t)outval;
+		//fprintf(stderr, "plop  %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, state->bits_value);
+		state->bits_left -= bits;
+	}
+}
+
+static void encode_ac_value(vid_encoder_state_t *state, uint16_t value)
+{
+	assert(0 <= value && value <= 0xFFFF);
+
+#if 0
+	for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
+		if(value == huffman_lookup[i].u_hword_pos) {
+			encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0);
+			return;
+		}
+		else if(value == huffman_lookup[i].u_hword_neg) {
+			encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1);
+			return;
+		}
+	}
+
+	// Use an escape
+	encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value));
+#else
+	uint32_t outword = huffman_encoding_map[value];
+	encode_bits(state, outword>>24, outword&0xFFFFFF);
+#endif
+}
+
+static void transform_dct_block(vid_encoder_state_t *state, int32_t *block)
+{
+	// Apply DCT to block
+	int32_t midblock[8*8];
+
+	for (int reps = 0; reps < 2; reps++) {
+		for (int i = 0; i < 8; i++) {
+		for (int j = 0; j < 8; j++) {
+			int32_t v = 0;
+			for(int k = 0; k < 8; k++) {
+				v += block[8*j+k]*dct_scale_table[8*i+k];
+			}
+			midblock[8*i+j] = (v + (1<<((14)-1)))>>(14);
+		}
+		}
+		memcpy(block, midblock, sizeof(midblock));
+	}
+
+	// FIXME: Work out why the math has to go this way
+	block[0] /= 8;
+	for (int i = 0; i < 64; i++) {
+		// Finish reducing it
+		block[i] /= 4;
+
+		// If it's below the quantisation threshold, zero it
+		if(abs(block[i]) < quant_dec[i]) {
+			block[i] = 0;
+		}
+	}
+
+}
+
+static void encode_dct_block(vid_encoder_state_t *state, int32_t *block)
+{
+	int dc_value = 0;
+
+	for (int i = 0; i < 64; i++) {
+		// Quantise it
+		block[i] = (block[i])/quant_dec[i];
+
+		// Clamp it
+		if (block[i] < -0x200) { block[i] = -0x200; }
+		if (block[i] > +0x1FF) { block[i] = +0x1FF; }
+	}
+
+	// Get DC value
+	dc_value = block[0];
+	//dc_value = 0;
+	encode_bits(state, 10, dc_value&0x3FF);
+
+	// Build RLE output
+	uint16_t zero_rle_data[8*8];
+	int zero_rle_words = 0;
+	for (int i = 1, zeroes = 0; i < 64; i++) {
+		int ri = dct_zagzig_table[i];
+		//int ri = dct_zigzag_table[i];
+		if (block[ri] == 0) {
+			zeroes++;
+		} else {
+			zero_rle_data[zero_rle_words++] = (zeroes<<10)|(block[ri]&0x3FF);
+			zeroes = 0;
+			state->uncomp_hwords_used += 1;
+		}
+	}
+
+	// Now Huffman-code the data
+	for (int i = 0; i < zero_rle_words; i++) {
+		encode_ac_value(state, zero_rle_data[i]);
+	}
+
+	//fprintf(stderr, "dc %08X rles %2d\n", dc_value, zero_rle_words);
+	//assert(dc_value >= -0x200); assert(dc_value <  +0x200);
+
+	// Store end of block
+	encode_bits(state, 2, 0x2);
+	state->uncomp_hwords_used += 2;
+
+	state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
+}
+
+static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed)
+{
+	// Reduce so it can all fit
+	int nonzeroes = 0;
+
+	for (int i = 1; i < 64; i++) {
+		//int ri = dct_zigzag_table[i];
+		if (block[i] != 0) {
+			//if (abs(block[i])+(ri>>3) < min_val+(64>>3)) {
+			if ((*values_to_shed) > 0 && abs(block[i]) < min_val*1) {
+				block[i] = 0;
+				(*values_to_shed)--;
+			} else {
+				nonzeroes++;
+			}
+		}
+	}
+
+	// Factor in DC + EOF values
+	return nonzeroes+2;
+}
+
+static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings)
+{
+	int pitch = settings->video_width*4;
+	int real_index = (settings->state_vid.frame_index-1);
+	if (real_index > video_frame_count-1) {
+		real_index = video_frame_count-1;
+	}
+	//uint8_t *video_frame = video_frames + settings->video_width*settings->video_height*4*real_index;
+	uint8_t *video_frame = video_frames;
+
+	if (!dct_done_init) {
+		init_dct_data();
+		dct_done_init = true;
+	}
+
+	if (settings->state_vid.dct_block_lists[0] == NULL) {
+		int dct_block_count_x = (settings->video_width+15)/16;
+		int dct_block_count_y = (settings->video_height+15)/16;
+		int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(int32_t)*8*8;
+		for (int i = 0; i < 6; i++) {
+			settings->state_vid.dct_block_lists[i] = malloc(dct_block_size);
+		}
+	}
+
+	memset(settings->state_vid.unmuxed, 0, sizeof(settings->state_vid.unmuxed));
+
+	settings->state_vid.quant_scale = 1;
+	settings->state_vid.uncomp_hwords_used = 0;
+	settings->state_vid.bytes_used = 8;
+	settings->state_vid.blocks_used = 0;
+
+	// TODO: non-16x16-aligned videos
+	assert((settings->video_width % 16) == 0);
+	assert((settings->video_height % 16) == 0);
+
+	// Do the initial transform
+	for(int fx = 0; fx < settings->video_width; fx += 16) {
+	for(int fy = 0; fy < settings->video_height; fy += 16) {
+		// Order: Cr Cb [Y1|Y2\nY3|Y4]
+		int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4));
+		int32_t *blocks[6] = {
+			settings->state_vid.dct_block_lists[0] + block_offs,
+			settings->state_vid.dct_block_lists[1] + block_offs,
+			settings->state_vid.dct_block_lists[2] + block_offs,
+			settings->state_vid.dct_block_lists[3] + block_offs,
+			settings->state_vid.dct_block_lists[4] + block_offs,
+			settings->state_vid.dct_block_lists[5] + block_offs,
+		};
+
+		for(int y = 0; y < 8; y++) {
+		for(int x = 0; x < 8; x++) {
+			int k = y*8+x;
+
+			int cr = 0;
+			int cg = 0;
+			int cb = 0;
+			for(int cy = 0; cy < 2; cy++) {
+			for(int cx = 0; cx < 2; cx++) {
+				int coffs = pitch*(fy+y*2+cy) + 4*(fx+x*2+cx);
+				cr += video_frame[coffs+0];
+				cg += video_frame[coffs+1];
+				cb += video_frame[coffs+2];
+			}
+			}
+
+			// TODO: Get the real math for this
+			int cluma = cr+cg*2+cb;
+#if 1
+			blocks[0][k] = ((cr<<2) - cluma + (1<<(4-1)))>>4;
+			blocks[1][k] = ((cb<<2) - cluma + (1<<(4-1)))>>4;
+#else
+			blocks[0][k] = 0;
+			blocks[1][k] = 0;
+#endif
+
+			for(int ly = 0; ly < 2; ly++) {
+			for(int lx = 0; lx < 2; lx++) {
+				int loffs = pitch*(fy+ly*8+y) + 4*(fx+lx*8+x);
+				int lr = video_frame[loffs+0];
+				int lg = video_frame[loffs+1];
+				int lb = video_frame[loffs+2];
+
+				// TODO: Get the real math for this
+				int lluma = (lr+lg*2+lb+2)-0x200;
+				if(lluma < -0x200) { lluma = -0x200; }
+				if(lluma > +0x1FF) { lluma = +0x1FF; }
+				lluma >>= 1;
+				blocks[2+2*ly+lx][k] = lluma;
+			}
+			}
+		}
+		}
+		for(int i = 0; i < 6; i++) {
+			transform_dct_block(&(settings->state_vid), blocks[i]);
+		}
+	}
+	}
+
+	// Now reduce all the blocks
+	// TODO: Base this on actual bit count
+	//const int accum_threshold = 6500;
+	const int accum_threshold = 1025*settings->state_vid.frame_block_count;
+	//const int accum_threshold = 900*settings->state_vid.frame_block_count;
+	int values_to_shed = 0;
+	for(int min_val = 0;; min_val += 1) {
+		int accum = 0;
+		for(int fx = 0; fx < settings->video_width; fx += 16) {
+		for(int fy = 0; fy < settings->video_height; fy += 16) {
+			// Order: Cr Cb [Y1|Y2\nY3|Y4]
+			int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4));
+			int32_t *blocks[6] = {
+				settings->state_vid.dct_block_lists[0] + block_offs,
+				settings->state_vid.dct_block_lists[1] + block_offs,
+				settings->state_vid.dct_block_lists[2] + block_offs,
+				settings->state_vid.dct_block_lists[3] + block_offs,
+				settings->state_vid.dct_block_lists[4] + block_offs,
+				settings->state_vid.dct_block_lists[5] + block_offs,
+			};
+			const int luma_reduce_mul = 8;
+			const int chroma_reduce_mul = 8;
+			for(int i = 6-1; i >= 0; i--) {
+				accum += reduce_dct_block(&(settings->state_vid), blocks[i], (i < 2 ? min_val*luma_reduce_mul+1 : min_val*chroma_reduce_mul+1), &values_to_shed);
+			}
+		}
+		}
+
+		if(accum <= accum_threshold) {
+			break;
+		}
+
+		values_to_shed = accum - accum_threshold;
+	}
+
+	// Now encode all the blocks
+	for(int fx = 0; fx < settings->video_width; fx += 16) {
+	for(int fy = 0; fy < settings->video_height; fy += 16) {
+		// Order: Cr Cb [Y1|Y2\nY3|Y4]
+		int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4));
+		int32_t *blocks[6] = {
+			settings->state_vid.dct_block_lists[0] + block_offs,
+			settings->state_vid.dct_block_lists[1] + block_offs,
+			settings->state_vid.dct_block_lists[2] + block_offs,
+			settings->state_vid.dct_block_lists[3] + block_offs,
+			settings->state_vid.dct_block_lists[4] + block_offs,
+			settings->state_vid.dct_block_lists[5] + block_offs,
+		};
+		for(int i = 0; i < 6; i++) {
+			encode_dct_block(&(settings->state_vid), blocks[i]);
+		}
+	}
+	}
+
+	encode_bits(&(settings->state_vid), 10, 0x1FF);
+	encode_bits(&(settings->state_vid), 2, 0x2);
+	settings->state_vid.uncomp_hwords_used += 2;
+	settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0xF)&~0xF;
+
+	flush_bits(&(settings->state_vid));
+
+	settings->state_vid.blocks_used = ((settings->state_vid.uncomp_hwords_used+0xF)&~0xF)>>4;
+
+	// We need a multiple of 4
+	settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3;
+
+	// Build the demuxed header
+	settings->state_vid.unmuxed[0x000] = (uint8_t)settings->state_vid.blocks_used;
+	settings->state_vid.unmuxed[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8);
+	settings->state_vid.unmuxed[0x002] = (uint8_t)0x00;
+	settings->state_vid.unmuxed[0x003] = (uint8_t)0x38;
+	settings->state_vid.unmuxed[0x004] = (uint8_t)settings->state_vid.quant_scale;
+	settings->state_vid.unmuxed[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8);
+	settings->state_vid.unmuxed[0x006] = 0x02; // Version 2
+	settings->state_vid.unmuxed[0x007] = 0x00;
+
+	retire_av_data(settings, 0, 1);
+}
+
+void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings)
+{
+	uint8_t header[32];
+	memset(header, 0, sizeof(header));
+
+	for(int i = 0; i < 7; i++) {
+		while(settings->state_vid.frame_block_index >= settings->state_vid.frame_block_count) {
+			settings->state_vid.frame_index++;
+			// TODO: work out an optimal block count for this
+			// TODO: calculate this all based on FPS
+			settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow;
+			settings->state_vid.frame_block_count = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den;
+			settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den;
+			settings->state_vid.frame_block_index = 0;
+			encode_frame_str(video_frames, video_frame_count, output, settings);
+		}
+		// Header: MDEC0 register
+		header[0x000] = 0x60;
+		header[0x001] = 0x01;
+		header[0x002] = 0x01;
+		header[0x003] = 0x80;
+
+		// Muxed chunk index/count
+		int chunk_index = settings->state_vid.frame_block_index;
+		int chunk_count = settings->state_vid.frame_block_count;
+		header[0x004] = (uint8_t)chunk_index;
+		header[0x005] = (uint8_t)(chunk_index>>8);
+		header[0x006] = (uint8_t)chunk_count;
+		header[0x007] = (uint8_t)(chunk_count>>8);
+
+		// Frame index
+		header[0x008] = (uint8_t)settings->state_vid.frame_index;
+		header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8);
+		header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16);
+		header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24);
+
+		// Video frame size
+		header[0x010] = (uint8_t)settings->video_width;
+		header[0x011] = (uint8_t)(settings->video_width>>8);
+		header[0x012] = (uint8_t)settings->video_height;
+		header[0x013] = (uint8_t)(settings->video_height>>8);
+
+		// 32-byte blocks required for MDEC data
+		header[0x014] = (uint8_t)settings->state_vid.blocks_used;
+		header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8);
+
+		// Some weird thing
+		header[0x016] = 0x00;
+		header[0x017] = 0x38;
+
+		// Quantization scale
+		header[0x018] = (uint8_t)settings->state_vid.quant_scale;
+		header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8);
+
+		// Version
+		header[0x01A] = 0x02; // Version 2
+		header[0x01B] = 0x00;
+
+		// Demuxed bytes used as a multiple of 4
+		header[0x00C] = (uint8_t)settings->state_vid.bytes_used;
+		header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8);
+		header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16);
+		header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24);
+
+		memcpy(output + 2352*i + 0x018, header, sizeof(header));
+		memcpy(output + 2352*i + 0x018 + 0x020, settings->state_vid.unmuxed + 2016*settings->state_vid.frame_block_index, 2016);
+
+		settings->state_vid.frame_block_index++;
+	}
+}
--- a/psxavenc/psxavenc.c
+++ b/psxavenc/psxavenc.c
@@ -0,0 +1,187 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "common.h"
+
+void print_help(void) {
+	fprintf(stderr, "Usage: psxavenc [-f freq] [-b bitdepth] [-c channels] [-F num] [-C num] [-t xa|xacd|spu|str2] <in> <out>\n\n");
+	fprintf(stderr, "    -f freq          Use specified frequency\n");
+	fprintf(stderr, "    -t format        Use specified output type:\n");
+	fprintf(stderr, "                       xa     [A.] .xa 2336-byte sectors\n");
+	fprintf(stderr, "                       xacd   [A.] .xa 2352-byte sectors\n");
+	fprintf(stderr, "                       spu    [A.] raw SPU-ADPCM data\n");
+	fprintf(stderr, "                       str2   [AV] v2 .str video 2352-byte sectors\n");
+	fprintf(stderr, "    -b bitdepth      Use specified bit depth (only 4 bits supported)\n");
+	fprintf(stderr, "    -c channels      Use specified channel count (1 or 2)\n");
+	fprintf(stderr, "    -F num           [.xa] Set the file number to num (0-255)\n");
+	fprintf(stderr, "    -C num           [.xa] Set the channel number to num (0-31)\n");
+}
+
+int parse_args(settings_t* settings, int argc, char** argv) {
+	int c;
+	while ((c = getopt(argc, argv, "t:f:b:c:F:C:")) != -1) {
+		switch (c) {
+			case 't': {
+				if (strcmp(optarg, "xa") == 0) {
+					settings->format = FORMAT_XA;
+				} else if (strcmp(optarg, "xacd") == 0) {
+					settings->format = FORMAT_XACD;
+				} else if (strcmp(optarg, "spu") == 0) {
+					settings->format = FORMAT_SPU;
+				} else if (strcmp(optarg, "str2") == 0) {
+					settings->format = FORMAT_STR2;
+				} else {
+					fprintf(stderr, "Invalid format: %s\n", optarg);
+					return -1;
+				}
+			} break;
+			case 'f': {
+				settings->frequency = atoi(optarg);
+			} break;
+			case 'b': {
+				settings->bits_per_sample = atoi(optarg);
+				if (settings->bits_per_sample != 4) {
+					fprintf(stderr, "Invalid bit depth: %d\n", settings->frequency);
+					return -1;
+				}
+			} break;
+			case 'c': {
+				int ch = atoi(optarg);
+				if (ch <= 0 || ch > 2) {
+					fprintf(stderr, "Invalid channel count: %d\n", ch);
+					return -1;
+				}
+				settings->stereo = (ch == 2 ? 1 : 0);
+			} break;
+			case 'F': {
+				settings->file_number = atoi(optarg);
+				if (settings->file_number < 0 || settings->file_number > 255) {
+					fprintf(stderr, "Invalid file number: %d\n", settings->file_number);
+					return -1;
+				}
+			} break;
+			case 'C': {
+				settings->channel_number = atoi(optarg);
+				if (settings->channel_number < 0 || settings->channel_number > 31) {
+					fprintf(stderr, "Invalid channel number: %d\n", settings->channel_number);
+					return -1;
+				}
+			} break;
+			case '?':
+			case 'h': {
+				print_help();
+				return -1;
+			} break;
+		}
+	}
+
+	if (settings->format == FORMAT_XA || settings->format == FORMAT_XACD) {
+		if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) {
+			fprintf(stderr, "Invalid frequency: %d Hz\n", settings->frequency);
+			return -1;
+		}
+	}
+
+	if (settings->format == FORMAT_SPU) {
+		settings->stereo = false;
+	}
+
+	return optind;
+}
+
+int main(int argc, char **argv) {
+	settings_t settings;
+	int arg_offset;
+	FILE* output;
+
+	memset(&settings,0,sizeof(settings_t));
+
+	settings.file_number = 0;
+	settings.channel_number = 0;
+	settings.stereo = true;
+	settings.frequency = PSX_AUDIO_XA_FREQ_DOUBLE;
+	settings.bits_per_sample = 4;
+
+	settings.video_width = 320;
+	settings.video_height = 240;
+
+	settings.audio_samples = NULL;
+	settings.audio_sample_count = 0;
+	settings.video_frames = NULL;
+	settings.video_frame_count = 0;
+
+	// TODO: make this adjustable
+	// also for some reason ffmpeg seems to hard-code the framerate to 15fps
+	settings.video_fps_num = 15;
+	settings.video_fps_den = 1;
+	for(int i = 0; i < 6; i++) {
+		settings.state_vid.dct_block_lists[i] = NULL;
+	}
+
+	arg_offset = parse_args(&settings, argc, argv);
+	if (arg_offset < 0) {
+		return 1;
+	} else if (argc < arg_offset + 2) {
+		print_help();
+		return 1;
+	}
+
+	fprintf(stderr, "Using settings: %d Hz @ %d bit depth, %s. F%d C%d\n",
+		settings.frequency, settings.bits_per_sample,
+		settings.stereo ? "stereo" : "mono",
+		settings.file_number, settings.channel_number
+	);
+
+	bool did_open_data = open_av_data(argv[arg_offset + 0], &settings);
+	if (!did_open_data) {
+		fprintf(stderr, "Could not open input file!\n");
+		return 1;
+	}
+
+	output = fopen(argv[arg_offset + 1], "wb");
+	if (output == NULL) {
+		fprintf(stderr, "Could not open output file!\n");
+		return 1;
+	}
+
+	int av_sample_mul = settings.stereo ? 2 : 1;
+
+	switch (settings.format) {
+		case FORMAT_XA:
+		case FORMAT_XACD:
+			pull_all_av_data(&settings);
+			encode_file_xa(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output);
+			break;
+		case FORMAT_SPU:
+			pull_all_av_data(&settings);
+			encode_file_spu(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output);
+			break;
+		case FORMAT_STR2:
+			encode_file_str(&settings, output);
+			break;
+	}
+
+	fclose(output);
+	close_av_data(&settings);
+	return 0;
+}