diff --git a/libobs-d3d11/d3d11-subsystem.cpp b/libobs-d3d11/d3d11-subsystem.cpp
index da4eaeb6e3852685dcc75bd95928fced26a69353..a4953c83e94a1b603bef10556074a6aec6533713 100644
--- a/libobs-d3d11/d3d11-subsystem.cpp
+++ b/libobs-d3d11/d3d11-subsystem.cpp
@@ -367,10 +367,7 @@ void gs_device::UpdateBlendState()
 
 void gs_device::UpdateViewProjMatrix()
 {
-	matrix3 cur_matrix;
-	gs_matrix_get(&cur_matrix);
-
-	matrix4_from_matrix3(&curViewMatrix, &cur_matrix);
+	gs_matrix_get(&curViewMatrix);
 
 	/* negate Z col of the view matrix for right-handed coordinate system */
 	curViewMatrix.x.z = -curViewMatrix.x.z;
diff --git a/libobs-opengl/gl-subsystem.c b/libobs-opengl/gl-subsystem.c
index be89840f7358c84b2a2a0ca1451b49dac6295272..c3e92c7642d975a3f7bae994d04d396f645ed320 100644
--- a/libobs-opengl/gl-subsystem.c
+++ b/libobs-opengl/gl-subsystem.c
@@ -866,10 +866,8 @@ static inline bool can_render(device_t device)
 static void update_viewproj_matrix(struct gs_device *device)
 {
 	struct gs_shader *vs = device->cur_vertex_shader;
-	struct matrix3 cur_matrix;
-	gs_matrix_get(&cur_matrix);
+	gs_matrix_get(&device->cur_view);
 
-	matrix4_from_matrix3(&device->cur_view, &cur_matrix);
 	matrix4_mul(&device->cur_viewproj, &device->cur_view,
 			&device->cur_proj);
 	matrix4_transpose(&device->cur_viewproj, &device->cur_viewproj);
diff --git a/libobs/graphics/bounds.c b/libobs/graphics/bounds.c
index 93dbbfacb6d00789d708d307f379142525bd2a74..15aa37cbb01e4ebd30f1485891895b6f827caea4 100644
--- a/libobs/graphics/bounds.c
+++ b/libobs/graphics/bounds.c
@@ -17,6 +17,7 @@
 
 #include "bounds.h"
 #include "matrix3.h"
+#include "matrix4.h"
 #include "plane.h"
 
 void bounds_move(struct bounds *dst, const struct bounds *b,
@@ -83,7 +84,7 @@ void bounds_get_center(struct vec3 *dst, const struct bounds *b)
 }
 
 void bounds_transform(struct bounds *dst, const struct bounds *b,
-		const struct matrix3 *m)
+		const struct matrix4 *m)
 {
 	struct bounds temp;
 	bool b_init = false;
@@ -101,17 +102,54 @@ void bounds_transform(struct bounds *dst, const struct bounds *b,
 		} else {
 			if (p.x < temp.min.x)
 				temp.min.x = p.x;
-			else if(p.x > temp.max.x)
+			else if (p.x > temp.max.x)
+				temp.max.x = p.x;
+
+			if (p.y < temp.min.y)
+				temp.min.y = p.y;
+			else if (p.y > temp.max.y)
+				temp.max.y = p.y;
+
+			if (p.z < temp.min.z)
+				temp.min.z = p.z;
+			else if (p.z > temp.max.z)
+				temp.max.z = p.z;
+		}
+	}
+
+	bounds_copy(dst, &temp);
+}
+
+void bounds_transform3x4(struct bounds *dst, const struct bounds *b,
+		const struct matrix3 *m)
+{
+	struct bounds temp;
+	bool b_init = false;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		struct vec3 p;
+		bounds_get_point(&p, b, i);
+		vec3_transform3x4(&p, &p, m);
+
+		if (!b_init) {
+			vec3_copy(&temp.min, &p);
+			vec3_copy(&temp.max, &p);
+			b_init = true;
+		} else {
+			if (p.x < temp.min.x)
+				temp.min.x = p.x;
+			else if (p.x > temp.max.x)
 				temp.max.x = p.x;
 
-			if(p.y < temp.min.y)
+			if (p.y < temp.min.y)
 				temp.min.y = p.y;
-			else if(p.y > temp.max.y)
+			else if (p.y > temp.max.y)
 				temp.max.y = p.y;
 
-			if(p.z < temp.min.z)
+			if (p.z < temp.min.z)
 				temp.min.z = p.z;
-			else if(p.z > temp.max.z)
+			else if (p.z > temp.max.z)
 				temp.max.z = p.z;
 		}
 	}
@@ -227,6 +265,21 @@ bool bounds_intersects(const struct bounds *b, const struct bounds *test,
 }
 
 bool bounds_intersects_obb(const struct bounds *b, const struct bounds *test,
+		const struct matrix4 *m, float epsilon)
+{
+	struct bounds b_tr, test_tr;
+	struct matrix4 m_inv;
+
+	matrix4_inv(&m_inv, m);
+
+	bounds_transform(&b_tr, b, m);
+	bounds_transform(&test_tr, test, &m_inv);
+
+	return bounds_intersects(b, &test_tr, epsilon) &&
+	       bounds_intersects(&b_tr, test, epsilon);
+}
+
+bool bounds_intersects_obb3x4(const struct bounds *b, const struct bounds *test,
 		const struct matrix3 *m, float epsilon)
 {
 	struct bounds b_tr, test_tr;
@@ -234,8 +287,8 @@ bool bounds_intersects_obb(const struct bounds *b, const struct bounds *test,
 
 	matrix3_transpose(&m_inv, m);
 
-	bounds_transform(&b_tr, b, m);
-	bounds_transform(&test_tr, test, &m_inv);
+	bounds_transform3x4(&b_tr, b, m);
+	bounds_transform3x4(&test_tr, test, &m_inv);
 
 	return bounds_intersects(b, &test_tr, epsilon) &&
 	       bounds_intersects(&b_tr, test, epsilon);
diff --git a/libobs/graphics/bounds.h b/libobs/graphics/bounds.h
index 40b0b2d199952633b61ab4b7228f5eaf9ce508a1..c47c2583d8492db774ffe59bb3e97cc4871c6497 100644
--- a/libobs/graphics/bounds.h
+++ b/libobs/graphics/bounds.h
@@ -72,6 +72,8 @@ EXPORT void bounds_get_center(struct vec3 *dst, const struct bounds *b);
  * the actual size becoming larger than it originally was.
  */
 EXPORT void bounds_transform(struct bounds *dst, const struct bounds *b,
+		const struct matrix4 *m);
+EXPORT void bounds_transform3x4(struct bounds *dst, const struct bounds *b,
 		const struct matrix3 *m);
 
 EXPORT bool bounds_intersection_ray(const struct bounds *b,
@@ -108,6 +110,9 @@ static inline bool bounds_vec3_inside(const struct bounds *b,
 EXPORT bool bounds_intersects(const struct bounds *b,
 		const struct bounds *test, float epsilon);
 EXPORT bool bounds_intersects_obb(const struct bounds *b,
+		const struct bounds *test, const struct matrix4 *m,
+		float epsilon);
+EXPORT bool bounds_intersects_obb3x4(const struct bounds *b,
 		const struct bounds *test, const struct matrix3 *m,
 		float epsilon);
 
diff --git a/libobs/graphics/graphics-internal.h b/libobs/graphics/graphics-internal.h
index acedfe5a2d53fe55bed7bc4449665b8e14d55ca7..4448392f6dff794921540be09e636eb39bf8740b 100644
--- a/libobs/graphics/graphics-internal.h
+++ b/libobs/graphics/graphics-internal.h
@@ -229,7 +229,7 @@ struct graphics_subsystem {
 
 	DARRAY(struct gs_rect) viewport_stack;
 
-	DARRAY(struct matrix3) matrix_stack;
+	DARRAY(struct matrix4) matrix_stack;
 	size_t                 cur_matrix;
 
 	struct matrix4         projection;
diff --git a/libobs/graphics/graphics.c b/libobs/graphics/graphics.c
index 12533615e2ddbf4f8af2fb00f27b0a5f46efa51e..b9911e029e2c44d1dc46ebe5e8fe7d84579a6225 100644
--- a/libobs/graphics/graphics.c
+++ b/libobs/graphics/graphics.c
@@ -87,9 +87,9 @@ static bool graphics_init_sprite_vb(struct graphics_subsystem *graphics)
 
 static bool graphics_init(struct graphics_subsystem *graphics)
 {
-	struct matrix3 top_mat;
+	struct matrix4 top_mat;
 
-	matrix3_identity(&top_mat);
+	matrix4_identity(&top_mat);
 	da_push_back(graphics->matrix_stack, &top_mat);
 
 	graphics->exports.device_entercontext(graphics->device);
@@ -203,7 +203,7 @@ graphics_t gs_getcontext(void)
 	return thread_graphics;
 }
 
-static inline struct matrix3 *top_matrix(graphics_t graphics)
+static inline struct matrix4 *top_matrix(graphics_t graphics)
 {
 	return graphics ? 
 		(graphics->matrix_stack.array + graphics->cur_matrix) : NULL;
@@ -215,9 +215,9 @@ void gs_matrix_push(void)
 	if (!graphics)
 		return;
 
-	struct matrix3 mat, *top_mat = top_matrix(graphics);
+	struct matrix4 mat, *top_mat = top_matrix(graphics);
 
-	memcpy(&mat, top_mat, sizeof(struct matrix3));
+	memcpy(&mat, top_mat, sizeof(struct matrix4));
 	da_push_back(graphics->matrix_stack, &mat);
 	graphics->cur_matrix++;
 }
@@ -239,97 +239,97 @@ void gs_matrix_pop(void)
 
 void gs_matrix_identity(void)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_identity(top_mat);
+		matrix4_identity(top_mat);
 }
 
 void gs_matrix_transpose(void)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_transpose(top_mat, top_mat);
+		matrix4_transpose(top_mat, top_mat);
 }
 
-void gs_matrix_set(const struct matrix3 *matrix)
+void gs_matrix_set(const struct matrix4 *matrix)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_copy(top_mat, matrix);
+		matrix4_copy(top_mat, matrix);
 }
 
-void gs_matrix_get(struct matrix3 *dst)
+void gs_matrix_get(struct matrix4 *dst)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_copy(dst, top_mat);
+		matrix4_copy(dst, top_mat);
 }
 
-void gs_matrix_mul(const struct matrix3 *matrix)
+void gs_matrix_mul(const struct matrix4 *matrix)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_mul(top_mat, top_mat, matrix);
+		matrix4_mul(top_mat, top_mat, matrix);
 }
 
 void gs_matrix_rotquat(const struct quat *rot)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_rotate(top_mat, top_mat, rot);
+		matrix4_rotate(top_mat, top_mat, rot);
 }
 
 void gs_matrix_rotaa(const struct axisang *rot)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_rotate_aa(top_mat, top_mat, rot);
+		matrix4_rotate_aa(top_mat, top_mat, rot);
 }
 
 void gs_matrix_translate(const struct vec3 *pos)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_translate(top_mat, top_mat, pos);
+		matrix4_translate3v(top_mat, top_mat, pos);
 }
 
 void gs_matrix_scale(const struct vec3 *scale)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	if (top_mat)
-		matrix3_scale(top_mat, top_mat, scale);
+		matrix4_scale(top_mat, top_mat, scale);
 }
 
 void gs_matrix_rotaa4f(float x, float y, float z, float angle)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	struct axisang aa;
 
 	if (top_mat) {
 		axisang_set(&aa, x, y, z, angle);
-		matrix3_rotate_aa(top_mat, top_mat, &aa);
+		matrix4_rotate_aa(top_mat, top_mat, &aa);
 	}
 }
 
 void gs_matrix_translate3f(float x, float y, float z)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	struct vec3 p;
 
 	if (top_mat) {
 		vec3_set(&p, x, y, z);
-		matrix3_translate(top_mat, top_mat, &p);
+		matrix4_translate3v(top_mat, top_mat, &p);
 	}
 }
 
 void gs_matrix_scale3f(float x, float y, float z)
 {
-	struct matrix3 *top_mat = top_matrix(thread_graphics);
+	struct matrix4 *top_mat = top_matrix(thread_graphics);
 	struct vec3 p;
 
 	if (top_mat) {
 		vec3_set(&p, x, y, z);
-		matrix3_scale(top_mat, top_mat, &p);
+		matrix4_scale(top_mat, top_mat, &p);
 	}
 }
 
diff --git a/libobs/graphics/graphics.h b/libobs/graphics/graphics.h
index 747a7a98ba96823a186e1710413356f377b82a42..f33b942c359bc87abef87f7376a685f5286d4599 100644
--- a/libobs/graphics/graphics.h
+++ b/libobs/graphics/graphics.h
@@ -441,9 +441,9 @@ EXPORT void gs_matrix_push(void);
 EXPORT void gs_matrix_pop(void);
 EXPORT void gs_matrix_identity(void);
 EXPORT void gs_matrix_transpose(void);
-EXPORT void gs_matrix_set(const struct matrix3 *matrix);
-EXPORT void gs_matrix_get(struct matrix3 *dst);
-EXPORT void gs_matrix_mul(const struct matrix3 *matrix);
+EXPORT void gs_matrix_set(const struct matrix4 *matrix);
+EXPORT void gs_matrix_get(struct matrix4 *dst);
+EXPORT void gs_matrix_mul(const struct matrix4 *matrix);
 EXPORT void gs_matrix_rotquat(const struct quat *rot);
 EXPORT void gs_matrix_rotaa(const struct axisang *rot);
 EXPORT void gs_matrix_translate(const struct vec3 *pos);
diff --git a/libobs/graphics/matrix3.c b/libobs/graphics/matrix3.c
index 3abdb58674795e899f16249bba5d134bcb952776..06326e71530f366e6ee4f2f3f7ad47340752b50e 100644
--- a/libobs/graphics/matrix3.c
+++ b/libobs/graphics/matrix3.c
@@ -37,21 +37,9 @@ void matrix3_from_quat(struct matrix3 *dst, const struct quat *q)
 	float wy = q->w * q->y * s;
 	float wz = q->w * q->z * s;
 
-	dst->x.x = 1.0f - (yy + zz);
-	dst->x.y = xy + wz;
-	dst->x.z = xz - wy;
-	dst->x.w = 0.0f;
-
-	dst->y.x = xy - wz;
-	dst->y.y = 1.0f - (xx + zz);
-	dst->y.z = yz + wx;
-	dst->y.w = 0.0f;
-
-	dst->z.x = xz + wy;
-	dst->z.y = yz - wx;
-	dst->z.z = 1.0f - (xx + yy);
-	dst->z.w = 0.0f;
-
+	vec3_set(&dst->x, 1.0f - (yy + zz), xy + wz, xz - wy);
+	vec3_set(&dst->y, xy - wz, 1.0f - (xx + zz), yz + wx);
+	vec3_set(&dst->z, xz + wy, yz - wx, 1.0f - (xx + yy));
 	vec3_zero(&dst->t);
 }
 
@@ -77,10 +65,19 @@ void matrix3_from_matrix4(struct matrix3 *dst, const struct matrix4 *m)
 void matrix3_mul(struct matrix3 *dst, const struct matrix3 *m1,
 		const struct matrix3 *m2)
 {
-	vec3_rotate(&dst->x, &m1->x, m2);
-	vec3_rotate(&dst->y, &m1->y, m2);
-	vec3_rotate(&dst->z, &m1->z, m2);
-	vec3_transform(&dst->t, &m1->t, m2);
+	if (dst == m2) {
+		struct matrix3 temp;
+		vec3_rotate(&temp.x, &m1->x, m2);
+		vec3_rotate(&temp.y, &m1->y, m2);
+		vec3_rotate(&temp.z, &m1->z, m2);
+		vec3_transform3x4(&temp.t, &m1->t, m2);
+		matrix3_copy(dst, &temp);
+	} else {
+		vec3_rotate(&dst->x, &m1->x, m2);
+		vec3_rotate(&dst->y, &m1->y, m2);
+		vec3_rotate(&dst->z, &m1->z, m2);
+		vec3_transform3x4(&dst->t, &m1->t, m2);
+	}
 }
 
 void matrix3_rotate(struct matrix3 *dst, const struct matrix3 *m,
@@ -123,7 +120,10 @@ void matrix3_transpose(struct matrix3 *dst, const struct matrix3 *m)
 
 void matrix3_inv(struct matrix3 *dst, const struct matrix3 *m)
 {
-	matrix4_inv((struct matrix4*)dst, (struct matrix4*)m);
+	struct matrix4 m4;
+	matrix4_from_matrix3(&m4, m);
+	matrix4_inv((struct matrix4*)dst, &m4);
+	dst->t.w = 0.0f;
 }
 
 void matrix3_mirror(struct matrix3 *dst, const struct matrix3 *m,
diff --git a/libobs/graphics/matrix3.h b/libobs/graphics/matrix3.h
index fecb06ec7d159e2d785f85093b078283e8c59c70..d04ec4f7f05a60fa93851d1c34f44c41d242c4a5 100644
--- a/libobs/graphics/matrix3.h
+++ b/libobs/graphics/matrix3.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "vec3.h"
+#include "axisang.h"
 
 /* 3x4 Matrix */
 
@@ -25,7 +26,6 @@
 extern "C" {
 #endif
 
-struct axisang;
 struct matrix4;
 
 struct matrix3 {
@@ -79,6 +79,30 @@ EXPORT void matrix3_mirror(struct matrix3 *dst, const struct matrix3 *m,
 EXPORT void matrix3_mirrorv(struct matrix3 *dst, const struct matrix3 *m,
 		const struct vec3 *v);
 
+static inline void matrix3_translate3f(struct matrix3 *dst,
+		const struct matrix3 *m, float x, float y, float z)
+{
+	struct vec3 v;
+	vec3_set(&v, x, y, z);
+	matrix3_translate(dst, m, &v);
+}
+
+static inline void matrix3_rotate_aa4f(struct matrix3 *dst,
+		const struct matrix3 *m, float x, float y, float z, float rot)
+{
+	struct axisang aa;
+	axisang_set(&aa, x, y, z, rot);
+	matrix3_rotate_aa(dst, m, &aa);
+}
+
+static inline void matrix3_scale3f(struct matrix3 *dst,
+		const struct matrix3 *m, float x, float y, float z)
+{
+	struct vec3 v;
+	vec3_set(&v, x, y, z);
+	matrix3_scale(dst, m, &v);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/libobs/graphics/matrix4.c b/libobs/graphics/matrix4.c
index f837b0731c12ec7fba5532f451f33b1d3910c7ab..c8bb6bf55712a5820d81981af14f9fa5a01bbd94 100644
--- a/libobs/graphics/matrix4.c
+++ b/libobs/graphics/matrix4.c
@@ -18,6 +18,7 @@
 #include "math-defs.h"
 #include "matrix4.h"
 #include "matrix3.h"
+#include "quat.h"
 
 void matrix4_from_matrix3(struct matrix4 *dst, const struct matrix3 *m)
 {
@@ -28,6 +29,34 @@ void matrix4_from_matrix3(struct matrix4 *dst, const struct matrix3 *m)
 	dst->t.w = 1.0f;
 }
 
+void matrix4_from_quat(struct matrix4 *dst, const struct quat *q)
+{
+	float norm = quat_dot(q, q);
+	float s = (norm > 0.0f) ? (2.0f/norm) : 0.0f;
+
+	float xx = q->x * q->x * s;
+	float yy = q->y * q->y * s;
+	float zz = q->z * q->z * s;
+	float xy = q->x * q->y * s;
+	float xz = q->x * q->z * s;
+	float yz = q->y * q->z * s;
+	float wx = q->w * q->x * s;
+	float wy = q->w * q->y * s;
+	float wz = q->w * q->z * s;
+
+	vec4_set(&dst->x, 1.0f - (yy + zz), xy + wz, xz - wy, 0.0f);
+	vec4_set(&dst->y, xy - wz, 1.0f - (xx + zz), yz + wx, 0.0f);
+	vec4_set(&dst->z, xz + wy, yz - wx, 1.0f - (xx + yy), 0.0f);
+	vec4_set(&dst->t, 0.0f, 0.0f, 0.0f, 1.0f);
+}
+
+void matrix4_from_axisang(struct matrix4 *dst, const struct axisang *aa)
+{
+	struct quat q;
+	quat_from_axisang(&q, aa);
+	matrix4_from_quat(dst, &q);
+}
+
 void matrix4_mul(struct matrix4 *dst, const struct matrix4 *m1,
 		const struct matrix4 *m2)
 {
@@ -98,6 +127,57 @@ float matrix4_determinant(const struct matrix4 *m)
 	return result;
 }
 
+void matrix4_translate3v(struct matrix4 *dst, const struct matrix4 *m,
+		const struct vec3 *v)
+{
+	struct matrix4 temp;
+	vec4_set(&temp.x, 1.0f, 0.0f, 0.0f, 0.0f);
+	vec4_set(&temp.y, 0.0f, 1.0f, 0.0f, 0.0f);
+	vec4_set(&temp.z, 0.0f, 0.0f, 1.0f, 0.0f);
+	vec4_from_vec3(&temp.t, v);
+
+	matrix4_mul(dst, m, &temp);
+}
+
+void matrix4_translate4v(struct matrix4 *dst, const struct matrix4 *m,
+		const struct vec4 *v)
+{
+	struct matrix4 temp;
+	vec4_set(&temp.x, 1.0f, 0.0f, 0.0f, 0.0f);
+	vec4_set(&temp.y, 0.0f, 1.0f, 0.0f, 0.0f);
+	vec4_set(&temp.z, 0.0f, 0.0f, 1.0f, 0.0f);
+	vec4_copy(&temp.t, v);
+
+	matrix4_mul(dst, m, &temp);
+}
+
+void matrix4_rotate(struct matrix4 *dst, const struct matrix4 *m,
+		const struct quat *q)
+{
+	struct matrix4 temp;
+	matrix4_from_quat(&temp, q);
+	matrix4_mul(dst, m, &temp);
+}
+
+void matrix4_rotate_aa(struct matrix4 *dst, const struct matrix4 *m,
+		const struct axisang *aa)
+{
+	struct matrix4 temp;
+	matrix4_from_axisang(&temp, aa);
+	matrix4_mul(dst, m, &temp);
+}
+
+void matrix4_scale(struct matrix4 *dst, const struct matrix4 *m,
+		const struct vec3 *v)
+{
+	struct matrix4 temp;
+	vec4_set(&temp.x, v->x, 0.0f, 0.0f, 0.0f);
+	vec4_set(&temp.y, 0.0f, v->y, 0.0f, 0.0f);
+	vec4_set(&temp.z, 0.0f, 0.0f, v->z, 0.0f);
+	vec4_set(&temp.t, 0.0f, 0.0f, 0.0f, 1.0f);
+	matrix4_mul(dst, m, &temp);
+}
+
 bool matrix4_inv(struct matrix4 *dst, const struct matrix4 *m)
 {
 	struct vec4 *dstv = (struct vec4 *)dst;
@@ -105,6 +185,14 @@ bool matrix4_inv(struct matrix4 *dst, const struct matrix4 *m)
 	float m3x3[9];
 	int   i, j, sign;
 
+	if (dst == m) {
+		struct matrix4 temp = *m;
+		return matrix4_inv(dst, &temp);
+	}
+
+	dstv = (struct vec4 *)dst;
+	det  = matrix4_determinant(m);
+
 	if (fabs(det) < 0.0005f)
 		return false;
 
diff --git a/libobs/graphics/matrix4.h b/libobs/graphics/matrix4.h
index 55d5c01999c76161fe9d3a82ba8a0c2940e40c6a..4c9426ae098901d774888157c8751f409f6a5e77 100644
--- a/libobs/graphics/matrix4.h
+++ b/libobs/graphics/matrix4.h
@@ -17,7 +17,9 @@
 
 #pragma once
 
+#include "vec3.h"
 #include "vec4.h"
+#include "axisang.h"
 
 /* 4x4 Matrix */
 
@@ -52,15 +54,52 @@ static inline void matrix4_identity(struct matrix4 *dst)
 }
 
 EXPORT void matrix4_from_matrix3(struct matrix4 *dst, const struct matrix3 *m);
+EXPORT void matrix4_from_quat(struct matrix4 *dst, const struct quat *q);
+EXPORT void matrix4_from_axisang(struct matrix4 *dst,
+		const struct axisang *aa);
 
 EXPORT void matrix4_mul(struct matrix4 *dst, const struct matrix4 *m1,
 		const struct matrix4 *m2);
 
 EXPORT float matrix4_determinant(const struct matrix4 *m);
 
+EXPORT void matrix4_translate3v(struct matrix4 *dst, const struct matrix4 *m,
+		const struct vec3 *v);
+EXPORT void matrix4_translate4v(struct matrix4 *dst, const struct matrix4 *m,
+		const struct vec4 *v);
+EXPORT void matrix4_rotate(struct matrix4 *dst, const struct matrix4 *m,
+		const struct quat *q);
+EXPORT void matrix4_rotate_aa(struct matrix4 *dst, const struct matrix4 *m,
+		const struct axisang *aa);
+EXPORT void matrix4_scale(struct matrix4 *dst, const struct matrix4 *m,
+		const struct vec3 *v);
 EXPORT bool matrix4_inv(struct matrix4 *dst, const struct matrix4 *m);
 EXPORT void matrix4_transpose(struct matrix4 *dst, const struct matrix4 *m);
 
+static inline void matrix4_translate3f(struct matrix4 *dst,
+		const struct matrix4 *m, float x, float y, float z)
+{
+	struct vec3 v;
+	vec3_set(&v, x, y, z);
+	matrix4_translate3v(dst, m, &v);
+}
+
+static inline void matrix4_rotate_aa4f(struct matrix4 *dst,
+		const struct matrix4 *m, float x, float y, float z, float rot)
+{
+	struct axisang aa;
+	axisang_set(&aa, x, y, z, rot);
+	matrix4_rotate_aa(dst, m, &aa);
+}
+
+static inline void matrix4_scale3f(struct matrix4 *dst,
+		const struct matrix4 *m, float x, float y, float z)
+{
+	struct vec3 v;
+	vec3_set(&v, x, y, z);
+	matrix4_scale(dst, m, &v);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/libobs/graphics/plane.c b/libobs/graphics/plane.c
index 19f677d4af1588717fd206e2e18eb633e70c404a..f8fb0cae17ee594e842df21274c608b0ebafba2e 100644
--- a/libobs/graphics/plane.c
+++ b/libobs/graphics/plane.c
@@ -34,14 +34,28 @@ void plane_from_tri(struct plane *dst,
 }
 
 void plane_transform(struct plane *dst, const struct plane *p,
-		const struct matrix3 *m)
+		const struct matrix4 *m)
 {
 	struct vec3 temp;
 
+	vec3_zero(&temp);
+
 	vec3_transform(&dst->dir, &p->dir, m);
 	vec3_norm(&dst->dir, &dst->dir);
 
-	vec3_transform(&temp, &m->t, m);
+	vec3_transform(&temp, &temp, m);
+	dst->dist = p->dist - vec3_dot(&dst->dir, &temp);
+}
+
+void plane_transform3x4(struct plane *dst, const struct plane *p,
+		const struct matrix3 *m)
+{
+	struct vec3 temp;
+
+	vec3_transform3x4(&dst->dir, &p->dir, m);
+	vec3_norm(&dst->dir, &dst->dir);
+
+	vec3_transform3x4(&temp, &m->t, m);
 	dst->dist = p->dist - vec3_dot(&dst->dir, &temp);
 }
 
diff --git a/libobs/graphics/plane.h b/libobs/graphics/plane.h
index e6bb9efc871ff639c9bae74f2ef5526ffb8d979c..439f4521d18ddb277a265fd414a657af00bf4e89 100644
--- a/libobs/graphics/plane.h
+++ b/libobs/graphics/plane.h
@@ -25,6 +25,7 @@ extern "C" {
 #endif
 
 struct matrix3;
+struct matrix4;
 
 struct plane {
 	struct vec3 dir;
@@ -57,6 +58,8 @@ EXPORT void plane_from_tri(struct plane *dst,
                            const struct vec3 *v3);
 
 EXPORT void plane_transform(struct plane *dst, const struct plane *p,
+		const struct matrix4 *m);
+EXPORT void plane_transform3x4(struct plane *dst, const struct plane *p,
 		const struct matrix3 *m);
 
 EXPORT bool plane_intersection_ray(const struct plane *p,
diff --git a/libobs/graphics/quat.c b/libobs/graphics/quat.c
index e660adb4b2bc28735a68f8180423e0d9d8a5f196..48af8a5fa6ddd1046fb87aa2cec637b31a2cea44 100644
--- a/libobs/graphics/quat.c
+++ b/libobs/graphics/quat.c
@@ -18,6 +18,7 @@
 #include "quat.h"
 #include "vec3.h"
 #include "matrix3.h"
+#include "matrix4.h"
 #include "axisang.h"
 
 static inline void quat_vec3(struct vec3 *v, const struct quat *q)
@@ -59,6 +60,11 @@ struct f4x4 {
 };
 
 void quat_from_matrix3(struct quat *dst, const struct matrix3 *m)
+{
+	quat_from_matrix4(dst, (const struct matrix4*)m);
+}
+
+void quat_from_matrix4(struct quat *dst, const struct matrix4 *m)
 {
 	float tr = (m->x.x + m->y.y + m->z.z);
 	float inv_half;
@@ -66,13 +72,13 @@ void quat_from_matrix3(struct quat *dst, const struct matrix3 *m)
 	int i,j,k;
 
 	if (tr > 0.0f) {
-		four_d = sqrtf(tr+1.0f);
-		dst->w = four_d*0.5f;
+		four_d = sqrtf(tr + 1.0f);
+		dst->w = four_d * 0.5f;
 
-		inv_half = 0.5f/four_d;
-		dst->x = (m->y.z - m->z.y)*inv_half;
-		dst->y = (m->z.x - m->x.z)*inv_half;
-		dst->z = (m->x.y - m->y.x)*inv_half;
+		inv_half = 0.5f / four_d;
+		dst->x = (m->y.z - m->z.y) * inv_half;
+		dst->y = (m->z.x - m->x.z) * inv_half;
+		dst->z = (m->x.y - m->y.x) * inv_half;
 	} else {
 		struct f4x4 *val = (struct f4x4*)m;
 
@@ -81,20 +87,20 @@ void quat_from_matrix3(struct quat *dst, const struct matrix3 *m)
 		if (m->z.z > val->ptr[i][i])
 			i = 2;
 
-		j = (i+1)%3;
-		k = (i+2)%3;
+		j = (i+1) % 3;
+		k = (i+2) % 3;
 
 		/* ---------------------------------- */
 
 		four_d = sqrtf((val->ptr[i][i] - val->ptr[j][j] -
 					val->ptr[k][k]) + 1.0f);
 
-		dst->ptr[i] = four_d*0.5f;
+		dst->ptr[i] = four_d * 0.5f;
 
-		inv_half = 0.5f/four_d;
-		dst->ptr[j]  = (val->ptr[i][j] + val->ptr[j][i])*inv_half;
-		dst->ptr[k]  = (val->ptr[i][k] + val->ptr[k][i])*inv_half;
-		dst->w =       (val->ptr[j][k] - val->ptr[k][j])*inv_half;
+		inv_half = 0.5f / four_d;
+		dst->ptr[j]  = (val->ptr[i][j] + val->ptr[j][i]) * inv_half;
+		dst->ptr[k]  = (val->ptr[i][k] + val->ptr[k][i]) * inv_half;
+		dst->w =       (val->ptr[j][k] - val->ptr[k][j]) * inv_half;
 	}
 }
 
diff --git a/libobs/graphics/quat.h b/libobs/graphics/quat.h
index 6e52dbf2399f2910b22427d8e06dc8f635627809..adecac1acd72f768d4cdf7814c6227f0bbadb947 100644
--- a/libobs/graphics/quat.h
+++ b/libobs/graphics/quat.h
@@ -35,6 +35,7 @@ extern "C" {
 #endif
 
 struct matrix3;
+struct matrix4;
 struct axisang;
 
 struct quat {
@@ -162,6 +163,7 @@ static inline bool quat_close(const struct quat *q1, const struct quat *q2,
 
 EXPORT void quat_from_axisang(struct quat *dst, const struct axisang *aa);
 EXPORT void quat_from_matrix3(struct quat *dst, const struct matrix3 *m);
+EXPORT void quat_from_matrix4(struct quat *dst, const struct matrix4 *m);
 
 EXPORT void quat_get_dir(struct vec3 *dst, const struct quat *q);
 EXPORT void quat_set_look_dir(struct quat *dst, const struct vec3 *dir);
diff --git a/libobs/graphics/vec3.c b/libobs/graphics/vec3.c
index 2fdf3b88c392de47acfaedf9c76a22fcbd96bf96..7ccd0ea96d322af80d244fa971c94c8ca418d229 100644
--- a/libobs/graphics/vec3.c
+++ b/libobs/graphics/vec3.c
@@ -16,12 +16,19 @@
 ******************************************************************************/
 
 #include "vec3.h"
+#include "vec4.h"
 #include "quat.h"
 #include "axisang.h"
 #include "plane.h"
 #include "matrix3.h"
 #include "math-extra.h"
 
+void vec3_from_vec4(struct vec3 *dst, const struct vec4 *v)
+{
+	dst->m = v->m;
+	dst->w = 0.0f;
+}
+
 float vec3_plane_dist(const struct vec3 *v, const struct plane *p)
 {
 	return vec3_dot(v, &p->dir) - p->dist;
@@ -36,9 +43,19 @@ void vec3_rotate(struct vec3 *dst, const struct vec3 *v,
 	dst->x = vec3_dot(&temp, &m->x);
 	dst->y = vec3_dot(&temp, &m->y);
 	dst->z = vec3_dot(&temp, &m->z);
+	dst->w = 0.0f;
 }
 
 void vec3_transform(struct vec3 *dst, const struct vec3 *v,
+		const struct matrix4 *m)
+{
+	struct vec4 v4;
+	vec4_from_vec3(&v4, v);
+	vec4_transform(&v4, &v4, m);
+	vec3_from_vec4(dst, &v4);
+}
+
+void vec3_transform3x4(struct vec3 *dst, const struct vec3 *v,
 		const struct matrix3 *m)
 {
 	struct vec3 temp;
@@ -47,6 +64,7 @@ void vec3_transform(struct vec3 *dst, const struct vec3 *v,
 	dst->x = vec3_dot(&temp, &m->x);
 	dst->y = vec3_dot(&temp, &m->y);
 	dst->z = vec3_dot(&temp, &m->z);
+	dst->w = 0.0f;
 }
 
 void vec3_mirror(struct vec3 *dst, const struct vec3 *v, const struct plane *p)
diff --git a/libobs/graphics/vec3.h b/libobs/graphics/vec3.h
index bb065478d474e1f524a7abe6e1a4ff88d7931529..b0e38f348f68471089b44ba9055600beb70b6583 100644
--- a/libobs/graphics/vec3.h
+++ b/libobs/graphics/vec3.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "math-defs.h"
+#include "vec4.h"
 #include <xmmintrin.h>
 
 #ifdef __cplusplus
@@ -26,6 +27,7 @@ extern "C" {
 
 struct plane;
 struct matrix3;
+struct matrix4;
 struct quat;
 
 struct vec3 {
@@ -53,6 +55,8 @@ static inline void vec3_copy(struct vec3 *dst, const struct vec3 *v)
 	dst->m = v->m;
 }
 
+EXPORT void vec3_from_vec4(struct vec3 *dst, const struct vec4 *v);
+
 static inline void vec3_add(struct vec3 *dst, const struct vec3 *v1,
 		const struct vec3 *v2)
 {
@@ -131,6 +135,7 @@ static inline void vec3_neg(struct vec3 *dst, const struct vec3 *v)
 	dst->x = -v->x;
 	dst->y = -v->y;
 	dst->z = -v->z;
+	dst->w = 0.0f;
 }
 
 static inline float vec3_len(const struct vec3 *v)
@@ -198,6 +203,7 @@ static inline void vec3_abs(struct vec3 *dst, const struct vec3 *v)
 	dst->x = fabsf(v->x);
 	dst->y = fabsf(v->y);
 	dst->z = fabsf(v->z);
+	dst->w = 0.0f;
 }
 
 static inline void vec3_floor(struct vec3 *dst, const struct vec3 *v)
@@ -205,6 +211,7 @@ static inline void vec3_floor(struct vec3 *dst, const struct vec3 *v)
 	dst->x = floorf(v->x);
 	dst->y = floorf(v->y);
 	dst->z = floorf(v->z);
+	dst->w = 0.0f;
 }
 
 static inline void vec3_ceil(struct vec3 *dst, const struct vec3 *v)
@@ -212,13 +219,17 @@ static inline void vec3_ceil(struct vec3 *dst, const struct vec3 *v)
 	dst->x = ceilf(v->x);
 	dst->y = ceilf(v->y);
 	dst->z = ceilf(v->z);
+	dst->w = 0.0f;
 }
 
 EXPORT float vec3_plane_dist(const struct vec3 *v, const struct plane *p);
 
+EXPORT void vec3_transform(struct vec3 *dst, const struct vec3 *v,
+		const struct matrix4 *m);
+
 EXPORT void vec3_rotate(struct vec3 *dst, const struct vec3 *v,
 		const struct matrix3 *m);
-EXPORT void vec3_transform(struct vec3 *dst, const struct vec3 *v,
+EXPORT void vec3_transform3x4(struct vec3 *dst, const struct vec3 *v,
 		const struct matrix3 *m);
 
 EXPORT void vec3_mirror(struct vec3 *dst, const struct vec3 *v,
diff --git a/libobs/graphics/vec4.c b/libobs/graphics/vec4.c
index 573851dff253b07d1097896adcb776917363f37a..d28aa012b2ec4a21267ff9fba89ec6df4c6c7466 100644
--- a/libobs/graphics/vec4.c
+++ b/libobs/graphics/vec4.c
@@ -16,17 +16,27 @@
 ******************************************************************************/
 
 #include "vec4.h"
+#include "vec3.h"
 #include "matrix4.h"
 
+void vec4_from_vec3(struct vec4 *dst, const struct vec3 *v)
+{
+	dst->m = v->m;
+	dst->w = 1.0f;
+}
+
 void vec4_transform(struct vec4 *dst, const struct vec4 *v,
 		const struct matrix4 *m)
 {
 	struct vec4 temp;
+	struct matrix4 transpose;
+
+	matrix4_transpose(&transpose, m);
 
-	temp.x = vec4_dot(&m->x, v);
-	temp.y = vec4_dot(&m->y, v);
-	temp.z = vec4_dot(&m->z, v);
-	temp.w = vec4_dot(&m->t, v);
+	temp.x = vec4_dot(&transpose.x, v);
+	temp.y = vec4_dot(&transpose.y, v);
+	temp.z = vec4_dot(&transpose.z, v);
+	temp.w = vec4_dot(&transpose.t, v);
 
 	vec4_copy(dst, &temp);
 }
diff --git a/libobs/graphics/vec4.h b/libobs/graphics/vec4.h
index 207b27aec41def07daaf9d7e0095166bfd40098b..6bb8fa32818ecb4b1dc6d4fadcff94dccfcda4b2 100644
--- a/libobs/graphics/vec4.h
+++ b/libobs/graphics/vec4.h
@@ -24,6 +24,7 @@
 extern "C" {
 #endif
 
+struct vec3;
 struct matrix4;
 
 struct vec4 {
@@ -52,6 +53,8 @@ static inline void vec4_copy(struct vec4 *dst, const struct vec4 *v)
 	dst->m = v->m;
 }
 
+EXPORT void vec4_from_vec3(struct vec4 *dst, const struct vec3 *v);
+
 static inline void vec4_add(struct vec4 *dst, const struct vec4 *v1,
 		const struct vec4 *v2)
 {