From 7e8c82fae327dee84e93817c2f8aabc8e1e4a61a Mon Sep 17 00:00:00 2001 From: Miloslav Ciz Date: Sun, 23 Jul 2023 16:51:09 +0200 Subject: [PATCH] Add todos --- TODO.txt | 20 + map.h | 92 +- small3dlib.h | 2997 +++++++++++++++++++++++++++++++++++++++ tinyphysicsengine.h | 3303 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 6386 insertions(+), 26 deletions(-) create mode 100644 small3dlib.h create mode 100644 tinyphysicsengine.h diff --git a/TODO.txt b/TODO.txt index 99fbafc..f68ec33 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,4 +1,14 @@ - track size: 64x64x64 +- EFFICINT MAP DRAWING: + - map will be subdivided into subblocks (probably 16x16x16 or 8x8x8), only + nearest subblocks (and possibly only those in viewing direction will be + drawn) + - HERE IS THE BEAUTY: when building the map 3D model from map format, order + the triangle indices so that they are grouped by blocks in the index array, + i.e. if we draw first N triangles we actually draw the fist subblock, if + we draw the next N triangles we draw the second subblocks etc. Then just + create a function to draw Nth subblock and use this to only draw the + subblocks we want to draw. - Architecture (modules): - map: loads map n stuff - racing engine (depends on map): handles physics of car with given inputs @@ -39,6 +49,11 @@ will be preprocessed out to normal blocks when the map is loaded. This will compress the stored maps. - binary +- how to do accelerators? + - special material? + - maybe they could simply boost the car in its current direction? +- compile time option to choose how many maps to include (for platforms with + lower memory) - Environments: just different textures for a cube inside which the tarck is, the cube won't have the top side, texture can have transparency (sky see through) @@ -55,6 +70,11 @@ in some format) - maybe just do both? pass to client pixel drawing function both a 565 color and a simplified 1 byte color? +- How to visually represent checkpoints (and finish)? + - Could be kind of an arrow made of single tri above the block? + (try how it looks in Blender) + - Probably just a literal block (or pyramid) DRAWN WITH DITHERING and/or + blinking - Start and finish blocks will be just the first/last checkpoint blocks (no need for special start, finish and CP). - Or maybe not, we would need to somehow mark these with extra vars, plus we diff --git a/map.h b/map.h index e3a12c5..2cbce76 100644 --- a/map.h +++ b/map.h @@ -6,36 +6,61 @@ /** The map (track) module for Licar. - Map format is binary and consists of following values: + Map coordinates/size: + - map size is 64x64x64 blocks + - [0,0,0] is is bottom-left-front-most + - x goes right, y goes up, z goes forward + - coordinate number is a single number obtained as x + 64 * y + 64 * 64 * z - - 76, 77 (for "LM", magic number) - - ASCII map name - - 10 (separator) - - ASCII comment - - 10 (separator) - - block values, each one in the format: - - 1 byte type: says the type of block. If the highest bit is 0, the block is - normal, otherwise it is a special block (e.g. a "command") - - 3 bytes: A, B, C, such that: - - A, B and lowest 2 bits of C form the block coordinate number (A being - the lowest part etc.), a sequential number going from 0,0,0 in +X - direction, then +Y and then +Z. - - bits C2 and C3 say the block material - - highest 4 bits of C (C4, C5, C6, C7) say the block's transform: - - first if C4 is set, the block is flipped in the X direction - - then the block is rotated around vertical axis by 0, 90, 180 or 270 - degrees if C5C6 is 00, 01, 10 or 11. - - last if C7 is set, the block is flipped vertically - - 255 (terminator) + Map format is binary and consists of the following values: + - 76, 77 (for "LM", magic number) + - ASCII map name + - 10 (separator) + - ASCII comment + - 10 (separator) + - block values, each one in the format: + - 1 byte type: says the type of block. If the highest bit is 0, the block + is normal, otherwise it is a special block (e.g. a "command") + - 3 bytes: A, B, C, such that: + - A, B and lowest 2 bits of C form the block coordinate number (A being + the lowest part etc.) + - bits C2 and C3 say the block material + - highest 4 bits of C (C4, C5, C6, C7) say the block's transform: + - first if C4 is set, the block is flipped in the X direction + - then the block is rotated around vertical axis by 0, 90, 180 or 270 + degrees if C5C6 is 00, 01, 10 or 11. + - last if C7 is set, the block is flipped vertically + - 255 (terminator) + + In this format order of blocks matters, latter blocks will replace previous + blocks placed on the same coordinate. Internally the map will be preprocessed + to RAM when loaded so that thing like the magic number and special blocks are + removed and the remaining blocks will be sorted for fast block searching. */ -#define LCR_BLOCK_TRANSFORM_ROT_MASK 0x60 +#define LCR_BLOCK_TRANSFORM_ROT_MASK 0x60 -#define LCR_BLOCK_TRANSFORM_FLIP_H 0x10 -#define LCR_BLOCK_TRANSFORM_ROT_90 0x20 -#define LCR_BLOCK_TRANSFORM_ROT_180 0x40 -#define LCR_BLOCK_TRANSFORM_ROT_270 0x60 -#define LCR_BLOCK_TRANSFORM_FLIP_V 0x80 +#define LCR_BLOCK_TRANSFORM_FLIP_H 0x10 +#define LCR_BLOCK_TRANSFORM_ROT_90 0x20 +#define LCR_BLOCK_TRANSFORM_ROT_180 0x40 +#define LCR_BLOCK_TRANSFORM_ROT_270 0x60 +#define LCR_BLOCK_TRANSFORM_FLIP_V 0x80 + +#define LCR_BLOCK_XYZ_TO_COORD(x,y,z) + +#define LCR_MAP_MAGIC_NUMBER 'L', 'M' +#define LCR_MAP_TERMINATOR 0xff +#define LCR_MAP_BLOCK(t,x,y,z,r) 0 // TODO + +#define LCR_BLOCK_MATERIAL_CONCRETE 0x00 + +#define LCR_MAP_COUNT 1 + +#define LCR_BLOCK_FULL 0x00 ///< completely filled block +#define LCR_BLOCK_BOTTOM 0x01 ///< filled bottom half of block +#define LCR_BLOCK_LEFT 0x02 ///< filled left half of block +#define LCR_BLOCK_LEFT_FRONT 0x03 ///< filled left front quarter of block +#define LCR_BLOCK_BOTTOM_LEFT 0x04 ///< filled bottom left quarter of block typedef struct { @@ -43,4 +68,19 @@ typedef struct uint8_t coordMatTrans[3]; ///< coordinate, material and transform } LCR_MapBlock; +static const uint8_t LCR_map0[] = +{ + LCR_MAP_MAGIC_NUMBER, + 77, 48, 10 // map name: M0 + 10, // map comment: + + LCR_MAP_BLOCK( LCR_BLOCK_CUBE, 1, 2, 3, 0), + LCR_MAP_TERMINATOR +}; + +static const uint8_t *LCR_maps[LCR_MAP_COUNT] = +{ + LCR_map0 +}; + #endif // guard diff --git a/small3dlib.h b/small3dlib.h new file mode 100644 index 0000000..4b77365 --- /dev/null +++ b/small3dlib.h @@ -0,0 +1,2997 @@ +#ifndef SMALL3DLIB_H +#define SMALL3DLIB_H + +/* + Simple realtime 3D software rasterization renderer. It is fast, focused on + resource-limited computers, located in a single C header file, with no + dependencies, using only 32bit integer arithmetics. + + author: Miloslav Ciz + license: CC0 1.0 (public domain) + found at https://creativecommons.org/publicdomain/zero/1.0/ + + additional waiver of all IP + version: 0.905d + + Before including the library, define S3L_PIXEL_FUNCTION to the name of the + function you'll be using to draw single pixels (this function will be called + by the library to render the frames). Also either init S3L_resolutionX and + S3L_resolutionY or define S3L_RESOLUTION_X and S3L_RESOLUTION_Y. + + You'll also need to decide what rendering strategy and other settings you + want to use, depending on your specific usecase. You may want to use a + z-buffer (full or reduced, S3L_Z_BUFFER), sorted-drawing (S3L_SORT), or even + none of these. See the description of the options in this file. + + The rendering itself is done with S3L_drawScene, usually preceded by + S3L_newFrame (for clearing zBuffer etc.). + + The library is meant to be used in not so huge programs that use single + translation unit and so includes both declarations and implementation at once. + If you for some reason use multiple translation units (which include the + library), you'll have to handle this yourself (e.g. create a wrapper, manually + split the library into .c and .h etc.). + + -------------------- + + This work's goal is to never be encumbered by any exclusive intellectual + property rights. The work is therefore provided under CC0 1.0 + additional + WAIVER OF ALL INTELLECTUAL PROPERTY RIGHTS that waives the rest of + intellectual property rights not already waived by CC0 1.0. The WAIVER OF ALL + INTELLECTUAL PROPERTY RGHTS is as follows: + + Each contributor to this work agrees that they waive any exclusive rights, + including but not limited to copyright, patents, trademark, trade dress, + industrial design, plant varieties and trade secrets, to any and all ideas, + concepts, processes, discoveries, improvements and inventions conceived, + discovered, made, designed, researched or developed by the contributor either + solely or jointly with others, which relate to this work or result from this + work. Should any waiver of such right be judged legally invalid or + ineffective under applicable law, the contributor hereby grants to each + affected person a royalty-free, non transferable, non sublicensable, non + exclusive, irrevocable and unconditional license to this right. + + -------------------- + + CONVENTIONS: + + This library should never draw pixels outside the specified screen + boundaries, so you don't have to check this (that would cost CPU time)! + + You can safely assume that triangles are rasterized one by one and from top + down, left to right (so you can utilize e.g. various caches), and if sorting + is disabled the order of rasterization will be that specified in the scene + structure and model arrays (of course, some triangles and models may be + skipped due to culling etc.). + + Angles are in S3L_Units, a full angle (2 pi) is S3L_FRACTIONS_PER_UNITs. + + We use row vectors. + + In 3D space, a left-handed coord. system is used. One spatial unit is split + into S3L_FRACTIONS_PER_UNITs fractions (fixed point arithmetic). + + y ^ + | _ + | /| z + | / + | / + [0,0,0]-------> x + + Untransformed camera is placed at [0,0,0], looking forward along +z axis. The + projection plane is centered at [0,0,0], stretrinch from + -S3L_FRACTIONS_PER_UNIT to S3L_FRACTIONS_PER_UNIT horizontally (x), + vertical size (y) depends on the aspect ratio (S3L_RESOLUTION_X and + S3L_RESOLUTION_Y). Camera FOV is defined by focal length in S3L_Units. + + Rotations use Euler angles and are generally in the extrinsic Euler angles in + ZXY order (by Z, then by X, then by Y). Positive rotation about an axis + rotates CW (clock-wise) when looking in the direction of the axis. + + Coordinates of pixels on the screen start at the top left, from [0,0]. + + There is NO subpixel accuracy (screen coordinates are only integer). + + Triangle rasterization rules are these (mostly same as OpenGL, D3D etc.): + + - Let's define: + - left side: + - not exactly horizontal, and on the left side of triangle + - exactly horizontal and above the topmost + (in other words: its normal points at least a little to the left or + completely up) + - right side: not left side + - Pixel centers are at integer coordinates and triangle for drawing are + specified with integer coordinates of pixel centers. + - A pixel is rasterized: + - if its center is inside the triangle OR + - if its center is exactly on the triangle side which is left and at the + same time is not on the side that's right (case of a triangle that's on + a single line) OR + - if its center is exactly on the triangle corner of sides neither of which + is right. + + These rules imply among others: + + - Adjacent triangles don't have any overlapping pixels, nor gaps between. + - Triangles of points that lie on a single line are NOT rasterized. + - A single "long" triangle CAN be rasterized as isolated islands of pixels. + - Transforming (e.g. mirroring, rotating by 90 degrees etc.) a result of + rasterizing triangle A is NOT generally equal to applying the same + transformation to triangle A first and then rasterizing it. Even the number + of rasterized pixels is usually different. + - If specifying a triangle with integer coordinates (which we are), then: + - The bottom-most corner (or side) of a triangle is never rasterized + (because it is connected to a right side). + - The top-most corner can only be rasterized on completely horizontal side + (otherwise it is connected to a right side). + - Vertically middle corner is rasterized if and only if it is on the left + of the triangle and at the same time is also not the bottom-most corner. +*/ + +#include + +#ifdef S3L_RESOLUTION_X + #ifdef S3L_RESOLUTION_Y + #define S3L_MAX_PIXELS (S3L_RESOLUTION_X * S3L_RESOLUTION_Y) + #endif +#endif + +#ifndef S3L_RESOLUTION_X + #ifndef S3L_MAX_PIXELS + #error Dynamic resolution set (S3L_RESOLUTION_X not defined), but\ + S3L_MAX_PIXELS not defined! + #endif + + uint16_t S3L_resolutionX = 512; /**< If a static resolution is not set with + S3L_RESOLUTION_X, this variable can be + used to change X resolution at runtime, + in which case S3L_MAX_PIXELS has to be + defined (to allocate zBuffer etc.)! */ + #define S3L_RESOLUTION_X S3L_resolutionX +#endif + +#ifndef S3L_RESOLUTION_Y + #ifndef S3L_MAX_PIXELS + #error Dynamic resolution set (S3L_RESOLUTION_Y not defined), but\ + S3L_MAX_PIXELS not defined! + #endif + + uint16_t S3L_resolutionY = 512; /**< Same as S3L_resolutionX, but for Y + resolution. */ + #define S3L_RESOLUTION_Y S3L_resolutionY +#endif + +#ifndef S3L_USE_WIDER_TYPES + /** If true, the library will use wider data types which will largely supress + many rendering bugs and imprecisions happening due to overflows, but this will + also consumer more RAM and may potentially be slower on computers with smaller + native integer. */ + #define S3L_USE_WIDER_TYPES 0 +#endif + +#ifndef S3L_SIN_METHOD + /** Says which method should be used for computing sin/cos functions, possible + values: 0 (lookup table, takes more program memory), 1 (Bhaskara's + approximation, slower). This may cause the trigonometric functions give + slightly different results. */ + #define S3L_SIN_METHOD 0 +#endif + +/** Units of measurement in 3D space. There is S3L_FRACTIONS_PER_UNIT in one +spatial unit. By dividing the unit into fractions we effectively achieve a +fixed point arithmetic. The number of fractions is a constant that serves as +1.0 in floating point arithmetic (normalization etc.). */ + +typedef +#if S3L_USE_WIDER_TYPES + int64_t +#else + int32_t +#endif + S3L_Unit; + +/** How many fractions a spatial unit is split into, i.e. this is the fixed +point scaling. This is NOT SUPPOSED TO BE REDEFINED, so rather don't do it +(otherwise things may overflow etc.). */ +#define S3L_FRACTIONS_PER_UNIT 512 +#define S3L_F S3L_FRACTIONS_PER_UNIT + +typedef +#if S3L_USE_WIDER_TYPES + int32_t +#else + int16_t +#endif + S3L_ScreenCoord; + +typedef +#if S3L_USE_WIDER_TYPES + uint32_t +#else + uint16_t +#endif + S3L_Index; + +#ifndef S3L_NEAR_CROSS_STRATEGY + /** Specifies how the library will handle triangles that partially cross the + near plane. These are problematic and require special handling. Possible + values: + + 0: Strictly cull any triangle crossing the near plane. This will make such + triangles disappear. This is good for performance or models viewed only + from at least small distance. + 1: Forcefully push the vertices crossing near plane in front of it. This is + a cheap technique that can be good enough for displaying simple + environments on slow devices, but texturing and geometric artifacts/warps + will appear. + 2: Geometrically correct the triangles crossing the near plane. This may + result in some triangles being subdivided into two and is a little more + expensive, but the results will be geometrically correct, even though + barycentric correction is not performed so texturing artifacts will + appear. Can be ideal with S3L_FLAT. + 3: Perform both geometrical and barycentric correction of triangle crossing + the near plane. This is significantly more expensive but results in + correct rendering. */ + #define S3L_NEAR_CROSS_STRATEGY 0 +#endif + +#ifndef S3L_FLAT + /** If on, disables computation of per-pixel values such as barycentric + coordinates and depth -- these will still be available but will be the same + for the whole triangle. This can be used to create flat-shaded renders and + will be a lot faster. With this option on you will probably want to use + sorting instead of z-buffer. */ + #define S3L_FLAT 0 +#endif + +#if S3L_FLAT + #define S3L_COMPUTE_DEPTH 0 + #define S3L_PERSPECTIVE_CORRECTION 0 + // don't disable z-buffer, it makes sense to use it with no sorting +#endif + +#ifndef S3L_PERSPECTIVE_CORRECTION + /** Specifies what type of perspective correction (PC) to use. Remember this + is an expensive operation! Possible values: + + 0: No perspective correction. Fastest, inaccurate from most angles. + 1: Per-pixel perspective correction, accurate but very expensive. + 2: Approximation (computing only at every S3L_PC_APPROX_LENGTHth pixel). + Quake-style approximation is used, which only computes the PC after + S3L_PC_APPROX_LENGTH pixels. This is reasonably accurate and fast. */ + #define S3L_PERSPECTIVE_CORRECTION 0 +#endif + +#ifndef S3L_PC_APPROX_LENGTH + /** For S3L_PERSPECTIVE_CORRECTION == 2, this specifies after how many pixels + PC is recomputed. Should be a power of two to keep up the performance. + Smaller is nicer but slower. */ + #define S3L_PC_APPROX_LENGTH 32 +#endif + +#if S3L_PERSPECTIVE_CORRECTION + #define S3L_COMPUTE_DEPTH 1 // PC inevitably computes depth, so enable it +#endif + +#ifndef S3L_COMPUTE_DEPTH + /** Whether to compute depth for each pixel (fragment). Some other options + may turn this on automatically. If you don't need depth information, turning + this off can save performance. Depth will still be accessible in + S3L_PixelInfo, but will be constant -- equal to center point depth -- over + the whole triangle. */ + #define S3L_COMPUTE_DEPTH 1 +#endif + +#ifndef S3L_Z_BUFFER + /** What type of z-buffer (depth buffer) to use for visibility determination. + Possible values: + + 0: Don't use z-buffer. This saves a lot of memory, but visibility checking + won't be pixel-accurate and has to mostly be done by other means (typically + sorting). + 1: Use full z-buffer (of S3L_Units) for visibiltiy determination. This is the + most accurate option (and also a fast one), but requires a big amount of + memory. + 2: Use reduced-size z-buffer (of bytes). This is fast and somewhat accurate, + but inaccuracies can occur and a considerable amount of memory is + needed. */ + #define S3L_Z_BUFFER 0 +#endif + +#ifndef S3L_REDUCED_Z_BUFFER_GRANULARITY + /** For S3L_Z_BUFFER == 2 this sets the reduced z-buffer granularity. */ + #define S3L_REDUCED_Z_BUFFER_GRANULARITY 5 +#endif + +#ifndef S3L_STENCIL_BUFFER + /** Whether to use stencil buffer for drawing -- with this a pixel that would + be resterized over an already rasterized pixel (within a frame) will be + discarded. This is mostly for front-to-back sorted drawing. */ + #define S3L_STENCIL_BUFFER 0 +#endif + +#ifndef S3L_SORT + /** Defines how to sort triangles before drawing a frame. This can be used to + solve visibility in case z-buffer is not used, to prevent overwriting already + rasterized pixels, implement transparency etc. Note that for simplicity and + performance a relatively simple sorting is used which doesn't work completely + correctly, so mistakes can occur (even the best sorting wouldn't be able to + solve e.g. intersecting triangles). Note that sorting requires a bit of extra + memory -- an array of the triangles to sort -- the size of this array limits + the maximum number of triangles that can be drawn in a single frame + (S3L_MAX_TRIANGES_DRAWN). Possible values: + + 0: Don't sort triangles. This is fastest and doesn't use extra memory. + 1: Sort triangles from back to front. This can in most cases solve visibility + without requiring almost any extra memory compared to z-buffer. + 2: Sort triangles from front to back. This can be faster than back to front + because we prevent computing pixels that will be overwritten by nearer + ones, but we need a 1b stencil buffer for this (enable S3L_STENCIL_BUFFER), + so a bit more memory is needed. */ + #define S3L_SORT 0 +#endif + +#ifndef S3L_MAX_TRIANGES_DRAWN + /** Maximum number of triangles that can be drawn in sorted modes. This + affects the size of the cache used for triangle sorting. */ + #define S3L_MAX_TRIANGES_DRAWN 128 +#endif + +#ifndef S3L_NEAR + /** Distance of the near clipping plane. Points in front or EXATLY ON this + plane are considered outside the frustum. This must be >= 0. */ + #define S3L_NEAR (S3L_F / 4) +#endif + +#if S3L_NEAR <= 0 +#define S3L_NEAR 1 // Can't be <= 0. +#endif + +#ifndef S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE + /** Affects the S3L_computeModelNormals function. See its description for + details. */ + #define S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE 6 +#endif + +#ifndef S3L_FAST_LERP_QUALITY + /** Quality (scaling) of SOME (stepped) linear interpolations. 0 will most + likely be a tiny bit faster, but artifacts can occur for bigger tris, while + higher values can fix this -- in theory all higher values will have the same + speed (it is a shift value), but it mustn't be too high to prevent + overflow. */ + #define S3L_FAST_LERP_QUALITY 11 +#endif + +/** Vector that consists of four scalars and can represent homogenous + coordinates, but is generally also used as Vec3 and Vec2 for various + purposes. */ +typedef struct +{ + S3L_Unit x; + S3L_Unit y; + S3L_Unit z; + S3L_Unit w; +} S3L_Vec4; + +#define S3L_logVec4(v)\ + printf("Vec4: %d %d %d %d\n",((v).x),((v).y),((v).z),((v).w)) + +static inline void S3L_vec4Init(S3L_Vec4 *v); +static inline void S3L_vec4Set(S3L_Vec4 *v, S3L_Unit x, S3L_Unit y, + S3L_Unit z, S3L_Unit w); +static inline void S3L_vec3Add(S3L_Vec4 *result, S3L_Vec4 added); +static inline void S3L_vec3Sub(S3L_Vec4 *result, S3L_Vec4 substracted); +S3L_Unit S3L_vec3Length(S3L_Vec4 v); + +/** Normalizes Vec3. Note that this function tries to normalize correctly + rather than quickly! If you need to normalize quickly, do it yourself in a + way that best fits your case. */ +void S3L_vec3Normalize(S3L_Vec4 *v); + +/** Like S3L_vec3Normalize, but doesn't perform any checks on the input vector, + which is faster, but can be very innacurate or overflowing. You are supposed + to provide a "nice" vector (not too big or small). */ +static inline void S3L_vec3NormalizeFast(S3L_Vec4 *v); + +S3L_Unit S3L_vec2Length(S3L_Vec4 v); +void S3L_vec3Cross(S3L_Vec4 a, S3L_Vec4 b, S3L_Vec4 *result); +static inline S3L_Unit S3L_vec3Dot(S3L_Vec4 a, S3L_Vec4 b); + +/** Computes a reflection direction (typically used e.g. for specular component + in Phong illumination). The input vectors must be normalized. The result will + be normalized as well. */ +void S3L_reflect(S3L_Vec4 toLight, S3L_Vec4 normal, S3L_Vec4 *result); + +/** Determines the winding of a triangle, returns 1 (CW, clockwise), -1 (CCW, + counterclockwise) or 0 (points lie on a single line). */ +static inline int8_t S3L_triangleWinding( + S3L_ScreenCoord x0, + S3L_ScreenCoord y0, + S3L_ScreenCoord x1, + S3L_ScreenCoord y1, + S3L_ScreenCoord x2, + S3L_ScreenCoord y2); + +typedef struct +{ + S3L_Vec4 translation; + S3L_Vec4 rotation; /**< Euler angles. Rortation is applied in this order: + 1. z = by z (roll) CW looking along z+ + 2. x = by x (pitch) CW looking along x+ + 3. y = by y (yaw) CW looking along y+ */ + S3L_Vec4 scale; +} S3L_Transform3D; + +#define S3L_logTransform3D(t)\ + printf("Transform3D: T = [%d %d %d], R = [%d %d %d], S = [%d %d %d]\n",\ + (t).translation.x,(t).translation.y,(t).translation.z,\ + (t).rotation.x,(t).rotation.y,(t).rotation.z,\ + (t).scale.x,(t).scale.y,(t).scale.z) + +static inline void S3L_transform3DInit(S3L_Transform3D *t); + +void S3L_lookAt(S3L_Vec4 pointTo, S3L_Transform3D *t); + +void S3L_transform3DSet( + S3L_Unit tx, + S3L_Unit ty, + S3L_Unit tz, + S3L_Unit rx, + S3L_Unit ry, + S3L_Unit rz, + S3L_Unit sx, + S3L_Unit sy, + S3L_Unit sz, + S3L_Transform3D *t); + +/** Converts rotation transformation to three direction vectors of given length + (any one can be NULL, in which case it won't be computed). */ +void S3L_rotationToDirections( + S3L_Vec4 rotation, + S3L_Unit length, + S3L_Vec4 *forw, + S3L_Vec4 *right, + S3L_Vec4 *up); + +/** 4x4 matrix, used mostly for 3D transforms. The indexing is this: + matrix[column][row]. */ +typedef S3L_Unit S3L_Mat4[4][4]; + +#define S3L_logMat4(m)\ + printf("Mat4:\n %d %d %d %d\n %d %d %d %d\n %d %d %d %d\n %d %d %d %d\n"\ + ,(m)[0][0],(m)[1][0],(m)[2][0],(m)[3][0],\ + (m)[0][1],(m)[1][1],(m)[2][1],(m)[3][1],\ + (m)[0][2],(m)[1][2],(m)[2][2],(m)[3][2],\ + (m)[0][3],(m)[1][3],(m)[2][3],(m)[3][3]) + +/** Initializes a 4x4 matrix to identity. */ +static inline void S3L_mat4Init(S3L_Mat4 m); + +void S3L_mat4Copy(S3L_Mat4 src, S3L_Mat4 dst); + +void S3L_mat4Transpose(S3L_Mat4 m); + +void S3L_makeTranslationMat( + S3L_Unit offsetX, + S3L_Unit offsetY, + S3L_Unit offsetZ, + S3L_Mat4 m); + +/** Makes a scaling matrix. DON'T FORGET: scale of 1.0 is set with + S3L_FRACTIONS_PER_UNIT! */ +void S3L_makeScaleMatrix( + S3L_Unit scaleX, + S3L_Unit scaleY, + S3L_Unit scaleZ, + S3L_Mat4 m); + +/** Makes a matrix for rotation in the ZXY order. */ +void S3L_makeRotationMatrixZXY( + S3L_Unit byX, + S3L_Unit byY, + S3L_Unit byZ, + S3L_Mat4 m); + +void S3L_makeWorldMatrix(S3L_Transform3D worldTransform, S3L_Mat4 m); +void S3L_makeCameraMatrix(S3L_Transform3D cameraTransform, S3L_Mat4 m); + +/** Multiplies a vector by a matrix with normalization by + S3L_FRACTIONS_PER_UNIT. Result is stored in the input vector. */ +void S3L_vec4Xmat4(S3L_Vec4 *v, S3L_Mat4 m); + +/** Same as S3L_vec4Xmat4 but faster, because this version doesn't compute the + W component of the result, which is usually not needed. */ +void S3L_vec3Xmat4(S3L_Vec4 *v, S3L_Mat4 m); + +/** Multiplies two matrices with normalization by S3L_FRACTIONS_PER_UNIT. + Result is stored in the first matrix. The result represents a transformation + that has the same effect as applying the transformation represented by m1 and + then m2 (in that order). */ +void S3L_mat4Xmat4(S3L_Mat4 m1, S3L_Mat4 m2); + +typedef struct +{ + S3L_Unit focalLength; /**< Defines the field of view (FOV). 0 sets an + orthographics projection (scale is controlled + with camera's scale in its transform). */ + S3L_Transform3D transform; +} S3L_Camera; + +void S3L_cameraInit(S3L_Camera *camera); + +typedef struct +{ + uint8_t backfaceCulling; /**< What backface culling to use. Possible + values: + - 0 none + - 1 clock-wise + - 2 counter clock-wise */ + int8_t visible; /**< Can be used to easily hide the model. */ +} S3L_DrawConfig; + +void S3L_drawConfigInit(S3L_DrawConfig *config); + +typedef struct +{ + const S3L_Unit *vertices; + S3L_Index vertexCount; + const S3L_Index *triangles; + S3L_Index triangleCount; + S3L_Transform3D transform; + S3L_Mat4 *customTransformMatrix; /**< This can be used to override the + transform (if != 0) with a custom + transform matrix, which is more + general. */ + S3L_DrawConfig config; +} S3L_Model3D; ///< Represents a 3D model. + +void S3L_model3DInit( + const S3L_Unit *vertices, + S3L_Index vertexCount, + const S3L_Index *triangles, + S3L_Index triangleCount, + S3L_Model3D *model); + +typedef struct +{ + S3L_Model3D *models; + S3L_Index modelCount; + S3L_Camera camera; +} S3L_Scene; ///< Represent the 3D scene to be rendered. + +void S3L_sceneInit( + S3L_Model3D *models, + S3L_Index modelCount, + S3L_Scene *scene); + +typedef struct +{ + S3L_ScreenCoord x; ///< Screen X coordinate. + S3L_ScreenCoord y; ///< Screen Y coordinate. + + S3L_Unit barycentric[3]; /**< Barycentric coords correspond to the three + vertices. These serve to locate the pixel on a + triangle and interpolate values between its + three points. Each one goes from 0 to + S3L_FRACTIONS_PER_UNIT (including), but due to + rounding error may fall outside this range (you + can use S3L_correctBarycentricCoords to fix this + for the price of some performance). The sum of + the three coordinates will always be exactly + S3L_FRACTIONS_PER_UNIT. */ + S3L_Index modelIndex; ///< Model index within the scene. + S3L_Index triangleIndex; ///< Triangle index within the model. + uint32_t triangleID; /**< Unique ID of the triangle withing the whole + scene. This can be used e.g. by a cache to + quickly find out if a triangle has changed. */ + S3L_Unit depth; ///< Depth (only if depth is turned on). + S3L_Unit previousZ; /**< Z-buffer value (not necessarily world depth in + S3L_Units!) that was in the z-buffer on the + pixels position before this pixel was + rasterized. This can be used to set the value + back, e.g. for transparency. */ + S3L_ScreenCoord triangleSize[2]; /**< Rasterized triangle width and height, + can be used e.g. for MIP mapping. */ +} S3L_PixelInfo; /**< Used to pass the info about a rasterized pixel + (fragment) to the user-defined drawing func. */ + +static inline void S3L_pixelInfoInit(S3L_PixelInfo *p); + +/** Corrects barycentric coordinates so that they exactly meet the defined + conditions (each fall into <0,S3L_FRACTIONS_PER_UNIT>, sum = + S3L_FRACTIONS_PER_UNIT). Note that doing this per-pixel can slow the program + down significantly. */ +static inline void S3L_correctBarycentricCoords(S3L_Unit barycentric[3]); + +// general helper functions +static inline S3L_Unit S3L_abs(S3L_Unit value); +static inline S3L_Unit S3L_min(S3L_Unit v1, S3L_Unit v2); +static inline S3L_Unit S3L_max(S3L_Unit v1, S3L_Unit v2); +static inline S3L_Unit S3L_clamp(S3L_Unit v, S3L_Unit v1, S3L_Unit v2); +static inline S3L_Unit S3L_wrap(S3L_Unit value, S3L_Unit mod); +static inline S3L_Unit S3L_nonZero(S3L_Unit value); +static inline S3L_Unit S3L_zeroClamp(S3L_Unit value); + +S3L_Unit S3L_sin(S3L_Unit x); +S3L_Unit S3L_asin(S3L_Unit x); +static inline S3L_Unit S3L_cos(S3L_Unit x); + +S3L_Unit S3L_vec3Length(S3L_Vec4 v); +S3L_Unit S3L_sqrt(S3L_Unit value); + +/** Projects a single point from 3D space to the screen space (pixels), which + can be useful e.g. for drawing sprites. The w component of input and result + holds the point size. If this size is 0 in the result, the sprite is outside + the view. */ +void S3L_project3DPointToScreen( + S3L_Vec4 point, + S3L_Camera camera, + S3L_Vec4 *result); + +/** Computes a normalized normal of given triangle. */ +void S3L_triangleNormal(S3L_Vec4 t0, S3L_Vec4 t1, S3L_Vec4 t2, + S3L_Vec4 *n); + +/** Helper function for retrieving per-vertex indexed values from an array, + e.g. texturing (UV) coordinates. The 'indices' array contains three indices + for each triangle, each index pointing into 'values' array, which contains + the values, each one consisting of 'numComponents' components (e.g. 2 for + UV coordinates). The three values are retrieved into 'v0', 'v1' and 'v2' + vectors (into x, y, z and w, depending on 'numComponents'). This function is + meant to be used per-triangle (typically from a cache), NOT per-pixel, as it + is not as fast as possible! */ +void S3L_getIndexedTriangleValues( + S3L_Index triangleIndex, + const S3L_Index *indices, + const S3L_Unit *values, + uint8_t numComponents, + S3L_Vec4 *v0, + S3L_Vec4 *v1, + S3L_Vec4 *v2); + +/** Computes a normalized normal for every vertex of given model (this is + relatively slow and SHOUDN'T be done each frame). The dst array must have a + sufficient size preallocated! The size is: number of model vertices * 3 * + sizeof(S3L_Unit). Note that for advanced allowing sharp edges it is not + sufficient to have per-vertex normals, but must be per-triangle. This + function doesn't support this. + + The function computes a normal for each vertex by averaging normals of + the triangles containing the vertex. The maximum number of these triangle + normals that will be averaged is set with + S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE. */ +void S3L_computeModelNormals(S3L_Model3D model, S3L_Unit *dst, + int8_t transformNormals); + +/** Interpolated between two values, v1 and v2, in the same ratio as t is to + tMax. Does NOT prevent zero division. */ +static inline S3L_Unit S3L_interpolate( + S3L_Unit v1, + S3L_Unit v2, + S3L_Unit t, + S3L_Unit tMax); + +/** Same as S3L_interpolate but with v1 == 0. Should be faster. */ +static inline S3L_Unit S3L_interpolateFrom0( + S3L_Unit v2, + S3L_Unit t, + S3L_Unit tMax); + +/** Like S3L_interpolate, but uses a parameter that goes from 0 to + S3L_FRACTIONS_PER_UNIT - 1, which can be faster. */ +static inline S3L_Unit S3L_interpolateByUnit( + S3L_Unit v1, + S3L_Unit v2, + S3L_Unit t); + +/** Same as S3L_interpolateByUnit but with v1 == 0. Should be faster. */ +static inline S3L_Unit S3L_interpolateByUnitFrom0( + S3L_Unit v2, + S3L_Unit t); + +static inline S3L_Unit S3L_distanceManhattan(S3L_Vec4 a, S3L_Vec4 b); + +/** Returns a value interpolated between the three triangle vertices based on + barycentric coordinates. */ +static inline S3L_Unit S3L_interpolateBarycentric( + S3L_Unit value0, + S3L_Unit value1, + S3L_Unit value2, + S3L_Unit barycentric[3]); + +static inline void S3L_mapProjectionPlaneToScreen( + S3L_Vec4 point, + S3L_ScreenCoord *screenX, + S3L_ScreenCoord *screenY); + +/** Draws a triangle according to given config. The vertices are specified in + Screen Space space (pixels). If perspective correction is enabled, each + vertex has to have a depth (Z position in camera space) specified in the Z + component. */ +void S3L_drawTriangle( + S3L_Vec4 point0, + S3L_Vec4 point1, + S3L_Vec4 point2, + S3L_Index modelIndex, + S3L_Index triangleIndex); + +/** This should be called before rendering each frame. The function clears + buffers and does potentially other things needed for the frame. */ +void S3L_newFrame(void); + +void S3L_zBufferClear(void); +void S3L_stencilBufferClear(void); + +/** Writes a value (not necessarily depth! depends on the format of z-buffer) + to z-buffer (if enabled). Does NOT check boundaries! */ +void S3L_zBufferWrite(S3L_ScreenCoord x, S3L_ScreenCoord y, S3L_Unit value); + +/** Reads a value (not necessarily depth! depends on the format of z-buffer) + from z-buffer (if enabled). Does NOT check boundaries! */ +S3L_Unit S3L_zBufferRead(S3L_ScreenCoord x, S3L_ScreenCoord y); + +static inline void S3L_rotate2DPoint(S3L_Unit *x, S3L_Unit *y, S3L_Unit angle); + +/** Predefined vertices of a cube to simply insert in an array. These come with + S3L_CUBE_TRIANGLES and S3L_CUBE_TEXCOORDS. */ +#define S3L_CUBE_VERTICES(m)\ + /* 0 front, bottom, right */\ + m/2, -m/2, -m/2,\ + /* 1 front, bottom, left */\ +-m/2, -m/2, -m/2,\ + /* 2 front, top, right */\ + m/2, m/2, -m/2,\ + /* 3 front, top, left */\ +-m/2, m/2, -m/2,\ + /* 4 back, bottom, right */\ + m/2, -m/2, m/2,\ + /* 5 back, bottom, left */\ +-m/2, -m/2, m/2,\ + /* 6 back, top, right */\ + m/2, m/2, m/2,\ + /* 7 back, top, left */\ +-m/2, m/2, m/2 + +#define S3L_CUBE_VERTEX_COUNT 8 + +/** Predefined triangle indices of a cube, to be used with S3L_CUBE_VERTICES + and S3L_CUBE_TEXCOORDS. */ +#define S3L_CUBE_TRIANGLES\ + 3, 0, 2, /* front */\ + 1, 0, 3,\ + 0, 4, 2, /* right */\ + 2, 4, 6,\ + 4, 5, 6, /* back */\ + 7, 6, 5,\ + 3, 7, 1, /* left */\ + 1, 7, 5,\ + 6, 3, 2, /* top */\ + 7, 3, 6,\ + 1, 4, 0, /* bottom */\ + 5, 4, 1 + +#define S3L_CUBE_TRIANGLE_COUNT 12 + +/** Predefined texture coordinates of a cube, corresponding to triangles (NOT + vertices), to be used with S3L_CUBE_VERTICES and S3L_CUBE_TRIANGLES. */ +#define S3L_CUBE_TEXCOORDS(m)\ + 0,0, m,m, m,0,\ + 0,m, m,m, 0,0,\ + m,m, m,0, 0,m,\ + 0,m, m,0, 0,0,\ + m,0, 0,0, m,m,\ + 0,m, m,m, 0,0,\ + 0,0, 0,m, m,0,\ + m,0, 0,m, m,m,\ + 0,0, m,m, m,0,\ + 0,m, m,m, 0,0,\ + m,0, 0,m, m,m,\ + 0,0, 0,m, m,0 + +//============================================================================= +// privates + +#define S3L_UNUSED(what) (void)(what) ///< helper macro for unused vars + +#define S3L_HALF_RESOLUTION_X (S3L_RESOLUTION_X >> 1) +#define S3L_HALF_RESOLUTION_Y (S3L_RESOLUTION_Y >> 1) + +#define S3L_PROJECTION_PLANE_HEIGHT\ + ((S3L_RESOLUTION_Y * S3L_F * 2) / S3L_RESOLUTION_X) + +#if S3L_Z_BUFFER == 1 + #define S3L_MAX_DEPTH 2147483647 + S3L_Unit S3L_zBuffer[S3L_MAX_PIXELS]; + #define S3L_zBufferFormat(depth) (depth) +#elif S3L_Z_BUFFER == 2 + #define S3L_MAX_DEPTH 255 + uint8_t S3L_zBuffer[S3L_MAX_PIXELS]; + #define S3L_zBufferFormat(depth)\ + S3L_min(255,(depth) >> S3L_REDUCED_Z_BUFFER_GRANULARITY) +#endif + +#if S3L_Z_BUFFER +static inline int8_t S3L_zTest( + S3L_ScreenCoord x, + S3L_ScreenCoord y, + S3L_Unit depth) +{ + uint32_t index = y * S3L_RESOLUTION_X + x; + + depth = S3L_zBufferFormat(depth); + +#if S3L_Z_BUFFER == 2 + #define cmp <= /* For reduced z-buffer we need equality test, because + otherwise pixels at the maximum depth (255) would never be + drawn over the background (which also has the depth of + 255). */ +#else + #define cmp < /* For normal z-buffer we leave out equality test to not waste + time by drawing over already drawn pixls. */ +#endif + + if (depth cmp S3L_zBuffer[index]) + { + S3L_zBuffer[index] = depth; + return 1; + } + +#undef cmp + + return 0; +} +#endif + +S3L_Unit S3L_zBufferRead(S3L_ScreenCoord x, S3L_ScreenCoord y) +{ +#if S3L_Z_BUFFER + return S3L_zBuffer[y * S3L_RESOLUTION_X + x]; +#else + S3L_UNUSED(x); + S3L_UNUSED(y); + + return 0; +#endif +} + +void S3L_zBufferWrite(S3L_ScreenCoord x, S3L_ScreenCoord y, S3L_Unit value) +{ +#if S3L_Z_BUFFER + S3L_zBuffer[y * S3L_RESOLUTION_X + x] = value; +#else + S3L_UNUSED(x); + S3L_UNUSED(y); + S3L_UNUSED(value); +#endif +} + +#if S3L_STENCIL_BUFFER + #define S3L_STENCIL_BUFFER_SIZE\ + ((S3L_RESOLUTION_X * S3L_RESOLUTION_Y - 1) / 8 + 1) + +uint8_t S3L_stencilBuffer[S3L_STENCIL_BUFFER_SIZE]; + +static inline int8_t S3L_stencilTest( + S3L_ScreenCoord x, + S3L_ScreenCoord y) +{ + uint32_t index = y * S3L_RESOLUTION_X + x; + uint32_t bit = (index & 0x00000007); + index = index >> 3; + + uint8_t val = S3L_stencilBuffer[index]; + + if ((val >> bit) & 0x1) + return 0; + + S3L_stencilBuffer[index] = val | (0x1 << bit); + + return 1; +} +#endif + +#define S3L_COMPUTE_LERP_DEPTH\ + (S3L_COMPUTE_DEPTH && (S3L_PERSPECTIVE_CORRECTION == 0)) + +#define S3L_SIN_TABLE_LENGTH 128 + +#if S3L_SIN_METHOD == 0 +static const S3L_Unit S3L_sinTable[S3L_SIN_TABLE_LENGTH] = +{ + /* 511 was chosen here as a highest number that doesn't overflow during + compilation for S3L_F == 1024 */ + + (0*S3L_F)/511, (6*S3L_F)/511, + (12*S3L_F)/511, (18*S3L_F)/511, + (25*S3L_F)/511, (31*S3L_F)/511, + (37*S3L_F)/511, (43*S3L_F)/511, + (50*S3L_F)/511, (56*S3L_F)/511, + (62*S3L_F)/511, (68*S3L_F)/511, + (74*S3L_F)/511, (81*S3L_F)/511, + (87*S3L_F)/511, (93*S3L_F)/511, + (99*S3L_F)/511, (105*S3L_F)/511, + (111*S3L_F)/511, (118*S3L_F)/511, + (124*S3L_F)/511, (130*S3L_F)/511, + (136*S3L_F)/511, (142*S3L_F)/511, + (148*S3L_F)/511, (154*S3L_F)/511, + (160*S3L_F)/511, (166*S3L_F)/511, + (172*S3L_F)/511, (178*S3L_F)/511, + (183*S3L_F)/511, (189*S3L_F)/511, + (195*S3L_F)/511, (201*S3L_F)/511, + (207*S3L_F)/511, (212*S3L_F)/511, + (218*S3L_F)/511, (224*S3L_F)/511, + (229*S3L_F)/511, (235*S3L_F)/511, + (240*S3L_F)/511, (246*S3L_F)/511, + (251*S3L_F)/511, (257*S3L_F)/511, + (262*S3L_F)/511, (268*S3L_F)/511, + (273*S3L_F)/511, (278*S3L_F)/511, + (283*S3L_F)/511, (289*S3L_F)/511, + (294*S3L_F)/511, (299*S3L_F)/511, + (304*S3L_F)/511, (309*S3L_F)/511, + (314*S3L_F)/511, (319*S3L_F)/511, + (324*S3L_F)/511, (328*S3L_F)/511, + (333*S3L_F)/511, (338*S3L_F)/511, + (343*S3L_F)/511, (347*S3L_F)/511, + (352*S3L_F)/511, (356*S3L_F)/511, + (361*S3L_F)/511, (365*S3L_F)/511, + (370*S3L_F)/511, (374*S3L_F)/511, + (378*S3L_F)/511, (382*S3L_F)/511, + (386*S3L_F)/511, (391*S3L_F)/511, + (395*S3L_F)/511, (398*S3L_F)/511, + (402*S3L_F)/511, (406*S3L_F)/511, + (410*S3L_F)/511, (414*S3L_F)/511, + (417*S3L_F)/511, (421*S3L_F)/511, + (424*S3L_F)/511, (428*S3L_F)/511, + (431*S3L_F)/511, (435*S3L_F)/511, + (438*S3L_F)/511, (441*S3L_F)/511, + (444*S3L_F)/511, (447*S3L_F)/511, + (450*S3L_F)/511, (453*S3L_F)/511, + (456*S3L_F)/511, (459*S3L_F)/511, + (461*S3L_F)/511, (464*S3L_F)/511, + (467*S3L_F)/511, (469*S3L_F)/511, + (472*S3L_F)/511, (474*S3L_F)/511, + (476*S3L_F)/511, (478*S3L_F)/511, + (481*S3L_F)/511, (483*S3L_F)/511, + (485*S3L_F)/511, (487*S3L_F)/511, + (488*S3L_F)/511, (490*S3L_F)/511, + (492*S3L_F)/511, (494*S3L_F)/511, + (495*S3L_F)/511, (497*S3L_F)/511, + (498*S3L_F)/511, (499*S3L_F)/511, + (501*S3L_F)/511, (502*S3L_F)/511, + (503*S3L_F)/511, (504*S3L_F)/511, + (505*S3L_F)/511, (506*S3L_F)/511, + (507*S3L_F)/511, (507*S3L_F)/511, + (508*S3L_F)/511, (509*S3L_F)/511, + (509*S3L_F)/511, (510*S3L_F)/511, + (510*S3L_F)/511, (510*S3L_F)/511, + (510*S3L_F)/511, (510*S3L_F)/511 +}; +#endif + +#define S3L_SIN_TABLE_UNIT_STEP\ + (S3L_F / (S3L_SIN_TABLE_LENGTH * 4)) + +void S3L_vec4Init(S3L_Vec4 *v) +{ + v->x = 0; v->y = 0; v->z = 0; v->w = S3L_F; +} + +void S3L_vec4Set(S3L_Vec4 *v, S3L_Unit x, S3L_Unit y, S3L_Unit z, S3L_Unit w) +{ + v->x = x; + v->y = y; + v->z = z; + v->w = w; +} + +void S3L_vec3Add(S3L_Vec4 *result, S3L_Vec4 added) +{ + result->x += added.x; + result->y += added.y; + result->z += added.z; +} + +void S3L_vec3Sub(S3L_Vec4 *result, S3L_Vec4 substracted) +{ + result->x -= substracted.x; + result->y -= substracted.y; + result->z -= substracted.z; +} + +void S3L_mat4Init(S3L_Mat4 m) +{ + #define M(x,y) m[x][y] + #define S S3L_F + + M(0,0) = S; M(1,0) = 0; M(2,0) = 0; M(3,0) = 0; + M(0,1) = 0; M(1,1) = S; M(2,1) = 0; M(3,1) = 0; + M(0,2) = 0; M(1,2) = 0; M(2,2) = S; M(3,2) = 0; + M(0,3) = 0; M(1,3) = 0; M(2,3) = 0; M(3,3) = S; + + #undef M + #undef S +} + +void S3L_mat4Copy(S3L_Mat4 src, S3L_Mat4 dst) +{ + for (uint8_t j = 0; j < 4; ++j) + for (uint8_t i = 0; i < 4; ++i) + dst[i][j] = src[i][j]; +} + +S3L_Unit S3L_vec3Dot(S3L_Vec4 a, S3L_Vec4 b) +{ + return (a.x * b.x + a.y * b.y + a.z * b.z) / S3L_F; +} + +void S3L_reflect(S3L_Vec4 toLight, S3L_Vec4 normal, S3L_Vec4 *result) +{ + S3L_Unit d = 2 * S3L_vec3Dot(toLight,normal); + + result->x = (normal.x * d) / S3L_F - toLight.x; + result->y = (normal.y * d) / S3L_F - toLight.y; + result->z = (normal.z * d) / S3L_F - toLight.z; +} + +void S3L_vec3Cross(S3L_Vec4 a, S3L_Vec4 b, S3L_Vec4 *result) +{ + result->x = a.y * b.z - a.z * b.y; + result->y = a.z * b.x - a.x * b.z; + result->z = a.x * b.y - a.y * b.x; +} + +void S3L_triangleNormal(S3L_Vec4 t0, S3L_Vec4 t1, S3L_Vec4 t2, S3L_Vec4 *n) +{ + #define ANTI_OVERFLOW 32 + + t1.x = (t1.x - t0.x) / ANTI_OVERFLOW; + t1.y = (t1.y - t0.y) / ANTI_OVERFLOW; + t1.z = (t1.z - t0.z) / ANTI_OVERFLOW; + + t2.x = (t2.x - t0.x) / ANTI_OVERFLOW; + t2.y = (t2.y - t0.y) / ANTI_OVERFLOW; + t2.z = (t2.z - t0.z) / ANTI_OVERFLOW; + + #undef ANTI_OVERFLOW + + S3L_vec3Cross(t1,t2,n); + + S3L_vec3Normalize(n); +} + +void S3L_getIndexedTriangleValues( + S3L_Index triangleIndex, + const S3L_Index *indices, + const S3L_Unit *values, + uint8_t numComponents, + S3L_Vec4 *v0, + S3L_Vec4 *v1, + S3L_Vec4 *v2) +{ + uint32_t i0, i1; + S3L_Unit *value; + + i0 = triangleIndex * 3; + i1 = indices[i0] * numComponents; + value = (S3L_Unit *) v0; + + if (numComponents > 4) + numComponents = 4; + + for (uint8_t j = 0; j < numComponents; ++j) + { + *value = values[i1]; + i1++; + value++; + } + + i0++; + i1 = indices[i0] * numComponents; + value = (S3L_Unit *) v1; + + for (uint8_t j = 0; j < numComponents; ++j) + { + *value = values[i1]; + i1++; + value++; + } + + i0++; + i1 = indices[i0] * numComponents; + value = (S3L_Unit *) v2; + + for (uint8_t j = 0; j < numComponents; ++j) + { + *value = values[i1]; + i1++; + value++; + } +} + +void S3L_computeModelNormals(S3L_Model3D model, S3L_Unit *dst, + int8_t transformNormals) +{ + S3L_Index vPos = 0; + + S3L_Vec4 n; + + n.w = 0; + + S3L_Vec4 ns[S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE]; + S3L_Index normalCount; + + for (uint32_t i = 0; i < model.vertexCount; ++i) + { + normalCount = 0; + + for (uint32_t j = 0; j < model.triangleCount * 3; j += 3) + { + if ( + (model.triangles[j] == i) || + (model.triangles[j + 1] == i) || + (model.triangles[j + 2] == i)) + { + S3L_Vec4 t0, t1, t2; + uint32_t vIndex; + + #define getVertex(n)\ + vIndex = model.triangles[j + n] * 3;\ + t##n.x = model.vertices[vIndex];\ + vIndex++;\ + t##n.y = model.vertices[vIndex];\ + vIndex++;\ + t##n.z = model.vertices[vIndex]; + + getVertex(0) + getVertex(1) + getVertex(2) + + #undef getVertex + + S3L_triangleNormal(t0,t1,t2,&(ns[normalCount])); + + normalCount++; + + if (normalCount >= S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE) + break; + } + } + + n.x = S3L_F; + n.y = 0; + n.z = 0; + + if (normalCount != 0) + { + // compute average + + n.x = 0; + + for (uint8_t i = 0; i < normalCount; ++i) + { + n.x += ns[i].x; + n.y += ns[i].y; + n.z += ns[i].z; + } + + n.x /= normalCount; + n.y /= normalCount; + n.z /= normalCount; + + S3L_vec3Normalize(&n); + } + + dst[vPos] = n.x; + vPos++; + + dst[vPos] = n.y; + vPos++; + + dst[vPos] = n.z; + vPos++; + } + + S3L_Mat4 m; + + S3L_makeWorldMatrix(model.transform,m); + + if (transformNormals) + for (S3L_Index i = 0; i < model.vertexCount * 3; i += 3) + { + n.x = dst[i]; + n.y = dst[i + 1]; + n.z = dst[i + 2]; + + S3L_vec4Xmat4(&n,m); + + dst[i] = n.x; + dst[i + 1] = n.y; + dst[i + 2] = n.z; + } +} + +void S3L_vec4Xmat4(S3L_Vec4 *v, S3L_Mat4 m) +{ + S3L_Vec4 vBackup; + + vBackup.x = v->x; + vBackup.y = v->y; + vBackup.z = v->z; + vBackup.w = v->w; + + #define dotCol(col)\ + ((vBackup.x * m[col][0]) +\ + (vBackup.y * m[col][1]) +\ + (vBackup.z * m[col][2]) +\ + (vBackup.w * m[col][3])) / S3L_F + + v->x = dotCol(0); + v->y = dotCol(1); + v->z = dotCol(2); + v->w = dotCol(3); +} + +void S3L_vec3Xmat4(S3L_Vec4 *v, S3L_Mat4 m) +{ + S3L_Vec4 vBackup; + + #undef dotCol + #define dotCol(col)\ + (vBackup.x * m[col][0]) / S3L_F +\ + (vBackup.y * m[col][1]) / S3L_F +\ + (vBackup.z * m[col][2]) / S3L_F +\ + m[col][3] + + vBackup.x = v->x; + vBackup.y = v->y; + vBackup.z = v->z; + vBackup.w = v->w; + + v->x = dotCol(0); + v->y = dotCol(1); + v->z = dotCol(2); + v->w = S3L_F; +} + +#undef dotCol + +S3L_Unit S3L_abs(S3L_Unit value) +{ + return value * (((value >= 0) << 1) - 1); +} + +S3L_Unit S3L_min(S3L_Unit v1, S3L_Unit v2) +{ + return v1 >= v2 ? v2 : v1; +} + +S3L_Unit S3L_max(S3L_Unit v1, S3L_Unit v2) +{ + return v1 >= v2 ? v1 : v2; +} + +S3L_Unit S3L_clamp(S3L_Unit v, S3L_Unit v1, S3L_Unit v2) +{ + return v >= v1 ? (v <= v2 ? v : v2) : v1; +} + +S3L_Unit S3L_zeroClamp(S3L_Unit value) +{ + return (value * (value >= 0)); +} + +S3L_Unit S3L_wrap(S3L_Unit value, S3L_Unit mod) +{ + return value >= 0 ? (value % mod) : (mod + (value % mod) - 1); +} + +S3L_Unit S3L_nonZero(S3L_Unit value) +{ + return (value + (value == 0)); +} + +S3L_Unit S3L_interpolate(S3L_Unit v1, S3L_Unit v2, S3L_Unit t, S3L_Unit tMax) +{ + return v1 + ((v2 - v1) * t) / tMax; +} + +S3L_Unit S3L_interpolateByUnit(S3L_Unit v1, S3L_Unit v2, S3L_Unit t) +{ + return v1 + ((v2 - v1) * t) / S3L_F; +} + +S3L_Unit S3L_interpolateByUnitFrom0(S3L_Unit v2, S3L_Unit t) +{ + return (v2 * t) / S3L_F; +} + +S3L_Unit S3L_interpolateFrom0(S3L_Unit v2, S3L_Unit t, S3L_Unit tMax) +{ + return (v2 * t) / tMax; +} + +S3L_Unit S3L_distanceManhattan(S3L_Vec4 a, S3L_Vec4 b) +{ + return + S3L_abs(a.x - b.x) + + S3L_abs(a.y - b.y) + + S3L_abs(a.z - b.z); +} + +void S3L_mat4Xmat4(S3L_Mat4 m1, S3L_Mat4 m2) +{ + S3L_Mat4 mat1; + + for (uint16_t row = 0; row < 4; ++row) + for (uint16_t col = 0; col < 4; ++col) + mat1[col][row] = m1[col][row]; + + for (uint16_t row = 0; row < 4; ++row) + for (uint16_t col = 0; col < 4; ++col) + { + m1[col][row] = 0; + + for (uint16_t i = 0; i < 4; ++i) + m1[col][row] += + (mat1[i][row] * m2[col][i]) / S3L_F; + } +} + +S3L_Unit S3L_sin(S3L_Unit x) +{ +#if S3L_SIN_METHOD == 0 + x = S3L_wrap(x / S3L_SIN_TABLE_UNIT_STEP,S3L_SIN_TABLE_LENGTH * 4); + int8_t positive = 1; + + if (x < S3L_SIN_TABLE_LENGTH) + { + } + else if (x < S3L_SIN_TABLE_LENGTH * 2) + { + x = S3L_SIN_TABLE_LENGTH * 2 - x - 1; + } + else if (x < S3L_SIN_TABLE_LENGTH * 3) + { + x = x - S3L_SIN_TABLE_LENGTH * 2; + positive = 0; + } + else + { + x = S3L_SIN_TABLE_LENGTH - (x - S3L_SIN_TABLE_LENGTH * 3) - 1; + positive = 0; + } + + return positive ? S3L_sinTable[x] : -1 * S3L_sinTable[x]; +#else + int8_t sign = 1; + + if (x < 0) // odd function + { + x *= -1; + sign = -1; + } + + x %= S3L_F; + + if (x > S3L_F / 2) + { + x -= S3L_F / 2; + sign *= -1; + } + + S3L_Unit tmp = S3L_F - 2 * x; + + #define _PI2 ((S3L_Unit) (9.8696044 * S3L_F)) + return sign * // Bhaskara's approximation + (((32 * x * _PI2) / S3L_F) * tmp) / + ((_PI2 * (5 * S3L_F - (8 * x * tmp) / + S3L_F)) / S3L_F); + #undef _PI2 +#endif +} + +S3L_Unit S3L_asin(S3L_Unit x) +{ +#if S3L_SIN_METHOD == 0 + x = S3L_clamp(x,-S3L_F,S3L_F); + + int8_t sign = 1; + + if (x < 0) + { + sign = -1; + x *= -1; + } + + int16_t low = 0, high = S3L_SIN_TABLE_LENGTH -1, middle; + + while (low <= high) // binary search + { + middle = (low + high) / 2; + + S3L_Unit v = S3L_sinTable[middle]; + + if (v > x) + high = middle - 1; + else if (v < x) + low = middle + 1; + else + break; + } + + middle *= S3L_SIN_TABLE_UNIT_STEP; + + return sign * middle; +#else + S3L_Unit low = -1 * S3L_F / 4, + high = S3L_F / 4, + middle; + + while (low <= high) // binary search + { + middle = (low + high) / 2; + + S3L_Unit v = S3L_sin(middle); + + if (v > x) + high = middle - 1; + else if (v < x) + low = middle + 1; + else + break; + } + + return middle; +#endif +} + +S3L_Unit S3L_cos(S3L_Unit x) +{ + return S3L_sin(x + S3L_F / 4); +} + +void S3L_correctBarycentricCoords(S3L_Unit barycentric[3]) +{ + barycentric[0] = S3L_clamp(barycentric[0],0,S3L_F); + barycentric[1] = S3L_clamp(barycentric[1],0,S3L_F); + + S3L_Unit d = S3L_F - barycentric[0] - barycentric[1]; + + if (d < 0) + { + barycentric[0] += d; + barycentric[2] = 0; + } + else + barycentric[2] = d; +} + +void S3L_makeTranslationMat( + S3L_Unit offsetX, + S3L_Unit offsetY, + S3L_Unit offsetZ, + S3L_Mat4 m) +{ + #define M(x,y) m[x][y] + #define S S3L_F + + M(0,0) = S; M(1,0) = 0; M(2,0) = 0; M(3,0) = 0; + M(0,1) = 0; M(1,1) = S; M(2,1) = 0; M(3,1) = 0; + M(0,2) = 0; M(1,2) = 0; M(2,2) = S; M(3,2) = 0; + M(0,3) = offsetX; M(1,3) = offsetY; M(2,3) = offsetZ; M(3,3) = S; + + #undef M + #undef S +} + +void S3L_makeScaleMatrix( + S3L_Unit scaleX, + S3L_Unit scaleY, + S3L_Unit scaleZ, + S3L_Mat4 m) +{ + #define M(x,y) m[x][y] + + M(0,0) = scaleX; M(1,0) = 0; M(2,0) = 0; M(3,0) = 0; + M(0,1) = 0; M(1,1) = scaleY; M(2,1) = 0; M(3,1) = 0; + M(0,2) = 0; M(1,2) = 0; M(2,2) = scaleZ; M(3,2) = 0; + M(0,3) = 0; M(1,3) = 0; M(2,3) = 0; M(3,3) = S3L_F; + + #undef M +} + +void S3L_makeRotationMatrixZXY( + S3L_Unit byX, + S3L_Unit byY, + S3L_Unit byZ, + S3L_Mat4 m) +{ + byX *= -1; + byY *= -1; + byZ *= -1; + + S3L_Unit sx = S3L_sin(byX); + S3L_Unit sy = S3L_sin(byY); + S3L_Unit sz = S3L_sin(byZ); + + S3L_Unit cx = S3L_cos(byX); + S3L_Unit cy = S3L_cos(byY); + S3L_Unit cz = S3L_cos(byZ); + + #define M(x,y) m[x][y] + #define S S3L_F + + M(0,0) = (cy * cz) / S + (sy * sx * sz) / (S * S); + M(1,0) = (cx * sz) / S; + M(2,0) = (cy * sx * sz) / (S * S) - (cz * sy) / S; + M(3,0) = 0; + + M(0,1) = (cz * sy * sx) / (S * S) - (cy * sz) / S; + M(1,1) = (cx * cz) / S; + M(2,1) = (cy * cz * sx) / (S * S) + (sy * sz) / S; + M(3,1) = 0; + + M(0,2) = (cx * sy) / S; + M(1,2) = -1 * sx; + M(2,2) = (cy * cx) / S; + M(3,2) = 0; + + M(0,3) = 0; + M(1,3) = 0; + M(2,3) = 0; + M(3,3) = S3L_F; + + #undef M + #undef S +} + +S3L_Unit S3L_sqrt(S3L_Unit value) +{ + int8_t sign = 1; + + if (value < 0) + { + sign = -1; + value *= -1; + } + + uint32_t result = 0; + uint32_t a = value; + uint32_t b = 1u << 30; + + while (b > a) + b >>= 2; + + while (b != 0) + { + if (a >= result + b) + { + a -= result + b; + result = result + 2 * b; + } + + b >>= 2; + result >>= 1; + } + + return result * sign; +} + +S3L_Unit S3L_vec3Length(S3L_Vec4 v) +{ + return S3L_sqrt(v.x * v.x + v.y * v.y + v.z * v.z); +} + +S3L_Unit S3L_vec2Length(S3L_Vec4 v) +{ + return S3L_sqrt(v.x * v.x + v.y * v.y); +} + +void S3L_vec3Normalize(S3L_Vec4 *v) +{ + #define SCALE 16 + #define BOTTOM_LIMIT 16 + #define UPPER_LIMIT 900 + + /* Here we try to decide if the vector is too small and would cause + inaccurate result due to very its inaccurate length. If so, we scale + it up. We can't scale up everything as big vectors overflow in length + calculations. */ + + if ( + S3L_abs(v->x) <= BOTTOM_LIMIT && + S3L_abs(v->y) <= BOTTOM_LIMIT && + S3L_abs(v->z) <= BOTTOM_LIMIT) + { + v->x *= SCALE; + v->y *= SCALE; + v->z *= SCALE; + } + else if ( + S3L_abs(v->x) > UPPER_LIMIT || + S3L_abs(v->y) > UPPER_LIMIT || + S3L_abs(v->z) > UPPER_LIMIT) + { + v->x /= SCALE; + v->y /= SCALE; + v->z /= SCALE; + } + + #undef SCALE + #undef BOTTOM_LIMIT + #undef UPPER_LIMIT + + S3L_Unit l = S3L_vec3Length(*v); + + if (l == 0) + return; + + v->x = (v->x * S3L_F) / l; + v->y = (v->y * S3L_F) / l; + v->z = (v->z * S3L_F) / l; +} + +void S3L_vec3NormalizeFast(S3L_Vec4 *v) +{ + S3L_Unit l = S3L_vec3Length(*v); + + if (l == 0) + return; + + v->x = (v->x * S3L_F) / l; + v->y = (v->y * S3L_F) / l; + v->z = (v->z * S3L_F) / l; +} + +void S3L_transform3DInit(S3L_Transform3D *t) +{ + S3L_vec4Init(&(t->translation)); + S3L_vec4Init(&(t->rotation)); + t->scale.x = S3L_F; + t->scale.y = S3L_F; + t->scale.z = S3L_F; + t->scale.w = 0; +} + +/** Performs perspecive division (z-divide). Does NOT check for division by + zero. */ +static inline void S3L_perspectiveDivide(S3L_Vec4 *vector, + S3L_Unit focalLength) +{ + if (focalLength == 0) + return; + + vector->x = (vector->x * focalLength) / vector->z; + vector->y = (vector->y * focalLength) / vector->z; +} + +void S3L_project3DPointToScreen( + S3L_Vec4 point, + S3L_Camera camera, + S3L_Vec4 *result) +{ + // TODO: hotfix to prevent a mapping bug probably to overlfows + S3L_Vec4 toPoint = point, camForw; + + S3L_vec3Sub(&toPoint,camera.transform.translation); + + S3L_vec3Normalize(&toPoint); + + S3L_rotationToDirections(camera.transform.rotation,S3L_FRACTIONS_PER_UNIT, + &camForw,0,0); + + if (S3L_vec3Dot(toPoint,camForw) < S3L_FRACTIONS_PER_UNIT / 6) + { + result->z = -1; + result->w = 0; + return; + } + // end of hotfix + S3L_Mat4 m; + S3L_makeCameraMatrix(camera.transform,m); + + S3L_Unit s = point.w; + + point.w = S3L_F; + + S3L_vec3Xmat4(&point,m); + + point.z = S3L_nonZero(point.z); + + S3L_perspectiveDivide(&point,camera.focalLength); + + S3L_ScreenCoord x, y; + + S3L_mapProjectionPlaneToScreen(point,&x,&y); + + result->x = x; + result->y = y; + result->z = point.z; + + result->w = + (point.z <= 0) ? 0 : + ( + camera.focalLength > 0 ?( + (s * camera.focalLength * S3L_RESOLUTION_X) / + (point.z * S3L_F)) : + ((camera.transform.scale.x * S3L_RESOLUTION_X) / S3L_F) + ); +} + +void S3L_lookAt(S3L_Vec4 pointTo, S3L_Transform3D *t) +{ + S3L_Vec4 v; + + v.x = pointTo.x - t->translation.x; + v.y = pointTo.z - t->translation.z; + + S3L_Unit dx = v.x; + S3L_Unit l = S3L_vec2Length(v); + + dx = (v.x * S3L_F) / S3L_nonZero(l); // normalize + + t->rotation.y = -1 * S3L_asin(dx); + + if (v.y < 0) + t->rotation.y = S3L_F / 2 - t->rotation.y; + + v.x = pointTo.y - t->translation.y; + v.y = l; + + l = S3L_vec2Length(v); + + dx = (v.x * S3L_F) / S3L_nonZero(l); + + t->rotation.x = S3L_asin(dx); +} + +void S3L_transform3DSet( + S3L_Unit tx, + S3L_Unit ty, + S3L_Unit tz, + S3L_Unit rx, + S3L_Unit ry, + S3L_Unit rz, + S3L_Unit sx, + S3L_Unit sy, + S3L_Unit sz, + S3L_Transform3D *t) +{ + t->translation.x = tx; + t->translation.y = ty; + t->translation.z = tz; + + t->rotation.x = rx; + t->rotation.y = ry; + t->rotation.z = rz; + + t->scale.x = sx; + t->scale.y = sy; + t->scale.z = sz; +} + +void S3L_cameraInit(S3L_Camera *camera) +{ + camera->focalLength = S3L_F; + S3L_transform3DInit(&(camera->transform)); +} + +void S3L_rotationToDirections( + S3L_Vec4 rotation, + S3L_Unit length, + S3L_Vec4 *forw, + S3L_Vec4 *right, + S3L_Vec4 *up) +{ + S3L_Mat4 m; + + S3L_makeRotationMatrixZXY(rotation.x,rotation.y,rotation.z,m); + + if (forw != 0) + { + forw->x = 0; + forw->y = 0; + forw->z = length; + S3L_vec3Xmat4(forw,m); + } + + if (right != 0) + { + right->x = length; + right->y = 0; + right->z = 0; + S3L_vec3Xmat4(right,m); + } + + if (up != 0) + { + up->x = 0; + up->y = length; + up->z = 0; + S3L_vec3Xmat4(up,m); + } +} + +void S3L_pixelInfoInit(S3L_PixelInfo *p) +{ + p->x = 0; + p->y = 0; + p->barycentric[0] = S3L_F; + p->barycentric[1] = 0; + p->barycentric[2] = 0; + p->modelIndex = 0; + p->triangleIndex = 0; + p->triangleID = 0; + p->depth = 0; + p->previousZ = 0; +} + +void S3L_model3DInit( + const S3L_Unit *vertices, + S3L_Index vertexCount, + const S3L_Index *triangles, + S3L_Index triangleCount, + S3L_Model3D *model) +{ + model->vertices = vertices; + model->vertexCount = vertexCount; + model->triangles = triangles; + model->triangleCount = triangleCount; + model->customTransformMatrix = 0; + + S3L_transform3DInit(&(model->transform)); + S3L_drawConfigInit(&(model->config)); +} + +void S3L_sceneInit( + S3L_Model3D *models, + S3L_Index modelCount, + S3L_Scene *scene) +{ + scene->models = models; + scene->modelCount = modelCount; + S3L_cameraInit(&(scene->camera)); +} + +void S3L_drawConfigInit(S3L_DrawConfig *config) +{ + config->backfaceCulling = 2; + config->visible = 1; +} + +#ifndef S3L_PIXEL_FUNCTION + #error Pixel rendering function (S3L_PIXEL_FUNCTION) not specified! +#endif + +static inline void S3L_PIXEL_FUNCTION(S3L_PixelInfo *pixel); // forward decl + +/** Serves to accelerate linear interpolation for performance-critical + code. Functions such as S3L_interpolate require division to compute each + interpolated value, while S3L_FastLerpState only requires a division for + the initiation and a shift for retrieving each interpolated value. + + S3L_FastLerpState stores a value and a step, both scaled (shifted by + S3L_FAST_LERP_QUALITY) to increase precision. The step is being added to the + value, which achieves the interpolation. This will only be useful for + interpolations in which we need to get the interpolated value in every step. + + BEWARE! Shifting a negative value is undefined, so handling shifting of + negative values has to be done cleverly. */ +typedef struct +{ + S3L_Unit valueScaled; + S3L_Unit stepScaled; +} S3L_FastLerpState; + +#define S3L_getFastLerpValue(state)\ + (state.valueScaled >> S3L_FAST_LERP_QUALITY) + +#define S3L_stepFastLerp(state)\ + state.valueScaled += state.stepScaled + +static inline S3L_Unit S3L_interpolateBarycentric( + S3L_Unit value0, + S3L_Unit value1, + S3L_Unit value2, + S3L_Unit barycentric[3]) +{ + return + ( + (value0 * barycentric[0]) + + (value1 * barycentric[1]) + + (value2 * barycentric[2]) + ) / S3L_F; +} + +void S3L_mapProjectionPlaneToScreen( + S3L_Vec4 point, + S3L_ScreenCoord *screenX, + S3L_ScreenCoord *screenY) +{ + *screenX = + S3L_HALF_RESOLUTION_X + + (point.x * S3L_HALF_RESOLUTION_X) / S3L_F; + + *screenY = + S3L_HALF_RESOLUTION_Y - + (point.y * S3L_HALF_RESOLUTION_X) / S3L_F; +} + +void S3L_zBufferClear(void) +{ +#if S3L_Z_BUFFER + for (uint32_t i = 0; i < S3L_RESOLUTION_X * S3L_RESOLUTION_Y; ++i) + S3L_zBuffer[i] = S3L_MAX_DEPTH; +#endif +} + +void S3L_stencilBufferClear(void) +{ +#if S3L_STENCIL_BUFFER + for (uint32_t i = 0; i < S3L_STENCIL_BUFFER_SIZE; ++i) + S3L_stencilBuffer[i] = 0; +#endif +} + +void S3L_newFrame(void) +{ + S3L_zBufferClear(); + S3L_stencilBufferClear(); +} + +/* the following serves to communicate info about if the triangle has been split + and how the barycentrics should be remapped. */ +uint8_t _S3L_projectedTriangleState = 0; // 0 = normal, 1 = cut, 2 = split + +#if S3L_NEAR_CROSS_STRATEGY == 3 +S3L_Vec4 _S3L_triangleRemapBarycentrics[6]; +#endif + +void S3L_drawTriangle( + S3L_Vec4 point0, + S3L_Vec4 point1, + S3L_Vec4 point2, + S3L_Index modelIndex, + S3L_Index triangleIndex) +{ + S3L_PixelInfo p; + S3L_pixelInfoInit(&p); + p.modelIndex = modelIndex; + p.triangleIndex = triangleIndex; + p.triangleID = (modelIndex << 16) | triangleIndex; + + S3L_Vec4 *tPointSS, *lPointSS, *rPointSS; /* points in Screen Space (in + S3L_Units, normalized by + S3L_F) */ + + S3L_Unit *barycentric0; // bar. coord that gets higher from L to R + S3L_Unit *barycentric1; // bar. coord that gets higher from R to L + S3L_Unit *barycentric2; // bar. coord that gets higher from bottom up + + // sort the vertices: + + #define assignPoints(t,a,b)\ + {\ + tPointSS = &point##t;\ + barycentric2 = &(p.barycentric[t]);\ + if (S3L_triangleWinding(point##t.x,point##t.y,point##a.x,point##a.y,\ + point##b.x,point##b.y) >= 0)\ + {\ + lPointSS = &point##a; rPointSS = &point##b;\ + barycentric0 = &(p.barycentric[b]);\ + barycentric1 = &(p.barycentric[a]);\ + }\ + else\ + {\ + lPointSS = &point##b; rPointSS = &point##a;\ + barycentric0 = &(p.barycentric[a]);\ + barycentric1 = &(p.barycentric[b]);\ + }\ + } + + if (point0.y <= point1.y) + { + if (point0.y <= point2.y) + assignPoints(0,1,2) + else + assignPoints(2,0,1) + } + else + { + if (point1.y <= point2.y) + assignPoints(1,0,2) + else + assignPoints(2,0,1) + } + + #undef assignPoints + +#if S3L_FLAT + *barycentric0 = S3L_F / 3; + *barycentric1 = S3L_F / 3; + *barycentric2 = S3L_F - 2 * (S3L_F / 3); +#endif + + p.triangleSize[0] = rPointSS->x - lPointSS->x; + p.triangleSize[1] = + (rPointSS->y > lPointSS->y ? rPointSS->y : lPointSS->y) - tPointSS->y; + + // now draw the triangle line by line: + + S3L_ScreenCoord splitY; // Y of the vertically middle point of the triangle + S3L_ScreenCoord endY; // bottom Y of the whole triangle + int splitOnLeft; /* whether splitY is the y coord. of left or right + point */ + + if (rPointSS->y <= lPointSS->y) + { + splitY = rPointSS->y; + splitOnLeft = 0; + endY = lPointSS->y; + } + else + { + splitY = lPointSS->y; + splitOnLeft = 1; + endY = rPointSS->y; + } + + S3L_ScreenCoord currentY = tPointSS->y; + + /* We'll be using an algorithm similar to Bresenham line algorithm. The + specifics of this algorithm are among others: + + - drawing possibly NON-CONTINUOUS line + - NOT tracing the line exactly, but rather rasterizing one the right + side of it, according to the pixel CENTERS, INCLUDING the pixel + centers + + The principle is this: + + - Move vertically by pixels and accumulate the error (abs(dx/dy)). + - If the error is greater than one (crossed the next pixel center), keep + moving horizontally and substracting 1 from the error until it is less + than 1 again. + - To make this INTEGER ONLY, scale the case so that distance between + pixels is equal to dy (instead of 1). This way the error becomes + dx/dy * dy == dx, and we're comparing the error to (and potentially + substracting) 1 * dy == dy. */ + + int16_t + /* triangle side: + left right */ + lX, rX, // current x position on the screen + lDx, rDx, // dx (end point - start point) + lDy, rDy, // dy (end point - start point) + lInc, rInc, // direction in which to increment (1 or -1) + lErr, rErr, // current error (Bresenham) + lErrCmp, rErrCmp, // helper for deciding comparison (> vs >=) + lErrAdd, rErrAdd, // error value to add in each Bresenham cycle + lErrSub, rErrSub; // error value to substract when moving in x direction + + S3L_FastLerpState lSideFLS, rSideFLS; + +#if S3L_COMPUTE_LERP_DEPTH + S3L_FastLerpState lDepthFLS, rDepthFLS; + + #define initDepthFLS(s,p1,p2)\ + s##DepthFLS.valueScaled = p1##PointSS->z << S3L_FAST_LERP_QUALITY;\ + s##DepthFLS.stepScaled = ((p2##PointSS->z << S3L_FAST_LERP_QUALITY) -\ + s##DepthFLS.valueScaled) / (s##Dy != 0 ? s##Dy : 1); +#else + #define initDepthFLS(s,p1,p2) ; +#endif + + /* init side for the algorithm, params: + s - which side (l or r) + p1 - point from (t, l or r) + p2 - point to (t, l or r) + down - whether the side coordinate goes top-down or vice versa */ + #define initSide(s,p1,p2,down)\ + s##X = p1##PointSS->x;\ + s##Dx = p2##PointSS->x - p1##PointSS->x;\ + s##Dy = p2##PointSS->y - p1##PointSS->y;\ + initDepthFLS(s,p1,p2)\ + s##SideFLS.stepScaled = (S3L_F << S3L_FAST_LERP_QUALITY)\ + / (s##Dy != 0 ? s##Dy : 1);\ + s##SideFLS.valueScaled = 0;\ + if (!down)\ + {\ + s##SideFLS.valueScaled =\ + S3L_F << S3L_FAST_LERP_QUALITY;\ + s##SideFLS.stepScaled *= -1;\ + }\ + s##Inc = s##Dx >= 0 ? 1 : -1;\ + if (s##Dx < 0)\ + {s##Err = 0; s##ErrCmp = 0;}\ + else\ + {s##Err = s##Dy; s##ErrCmp = 1;}\ + s##ErrAdd = S3L_abs(s##Dx);\ + s##ErrSub = s##Dy != 0 ? s##Dy : 1; /* don't allow 0, could lead to an + infinite substracting loop */ + + #define stepSide(s)\ + while (s##Err - s##Dy >= s##ErrCmp)\ + {\ + s##X += s##Inc;\ + s##Err -= s##ErrSub;\ + }\ + s##Err += s##ErrAdd; + + initSide(r,t,r,1) + initSide(l,t,l,1) + +#if S3L_PERSPECTIVE_CORRECTION + /* PC is done by linearly interpolating reciprocals from which the corrected + velues can be computed. See + http://www.lysator.liu.se/~mikaelk/doc/perspectivetexture/ */ + + #if S3L_PERSPECTIVE_CORRECTION == 1 + #define Z_RECIP_NUMERATOR\ + (S3L_F * S3L_F * S3L_F) + #elif S3L_PERSPECTIVE_CORRECTION == 2 + #define Z_RECIP_NUMERATOR\ + (S3L_F * S3L_F) + #endif + /* ^ This numerator is a number by which we divide values for the + reciprocals. For PC == 2 it has to be lower because linear interpolation + scaling would make it overflow -- this results in lower depth precision + in bigger distance for PC == 2. */ + + S3L_Unit + tPointRecipZ, lPointRecipZ, rPointRecipZ, /* Reciprocals of the depth of + each triangle point. */ + lRecip0, lRecip1, rRecip0, rRecip1; /* Helper variables for swapping + the above after split. */ + + tPointRecipZ = Z_RECIP_NUMERATOR / S3L_nonZero(tPointSS->z); + lPointRecipZ = Z_RECIP_NUMERATOR / S3L_nonZero(lPointSS->z); + rPointRecipZ = Z_RECIP_NUMERATOR / S3L_nonZero(rPointSS->z); + + lRecip0 = tPointRecipZ; + lRecip1 = lPointRecipZ; + rRecip0 = tPointRecipZ; + rRecip1 = rPointRecipZ; + + #define manageSplitPerspective(b0,b1)\ + b1##Recip0 = b0##PointRecipZ;\ + b1##Recip1 = b1##PointRecipZ;\ + b0##Recip0 = b0##PointRecipZ;\ + b0##Recip1 = tPointRecipZ; +#else + #define manageSplitPerspective(b0,b1) ; +#endif + + // clip to the screen in y dimension: + + endY = S3L_min(endY,S3L_RESOLUTION_Y); + + /* Clipping above the screen (y < 0) can't be easily done here, will be + handled inside the loop. */ + + while (currentY < endY) /* draw the triangle from top to bottom -- the + bottom-most row is left out because, following + from the rasterization rules (see start of the + file), it is to never be rasterized. */ + { + if (currentY == splitY) // reached a vertical split of the triangle? + { + #define manageSplit(b0,b1,s0,s1)\ + S3L_Unit *tmp = barycentric##b0;\ + barycentric##b0 = barycentric##b1;\ + barycentric##b1 = tmp;\ + s0##SideFLS.valueScaled = (S3L_F\ + << S3L_FAST_LERP_QUALITY) - s0##SideFLS.valueScaled;\ + s0##SideFLS.stepScaled *= -1;\ + manageSplitPerspective(s0,s1) + + if (splitOnLeft) + { + initSide(l,l,r,0); + manageSplit(0,2,r,l) + } + else + { + initSide(r,r,l,0); + manageSplit(1,2,l,r) + } + } + + stepSide(r) + stepSide(l) + + if (currentY >= 0) /* clipping of pixels whose y < 0 (can't be easily done + outside the loop because of the Bresenham-like + algorithm steps) */ + { + p.y = currentY; + + // draw the horizontal line + +#if !S3L_FLAT + S3L_Unit rowLength = S3L_nonZero(rX - lX - 1); // prevent zero div + + #if S3L_PERSPECTIVE_CORRECTION + S3L_Unit lOverZ, lRecipZ, rOverZ, rRecipZ, lT, rT; + + lT = S3L_getFastLerpValue(lSideFLS); + rT = S3L_getFastLerpValue(rSideFLS); + + lOverZ = S3L_interpolateByUnitFrom0(lRecip1,lT); + lRecipZ = S3L_interpolateByUnit(lRecip0,lRecip1,lT); + + rOverZ = S3L_interpolateByUnitFrom0(rRecip1,rT); + rRecipZ = S3L_interpolateByUnit(rRecip0,rRecip1,rT); + #else + S3L_FastLerpState b0FLS, b1FLS; + + #if S3L_COMPUTE_LERP_DEPTH + S3L_FastLerpState depthFLS; + + depthFLS.valueScaled = lDepthFLS.valueScaled; + depthFLS.stepScaled = + (rDepthFLS.valueScaled - lDepthFLS.valueScaled) / rowLength; + #endif + + b0FLS.valueScaled = 0; + b1FLS.valueScaled = lSideFLS.valueScaled; + + b0FLS.stepScaled = rSideFLS.valueScaled / rowLength; + b1FLS.stepScaled = -1 * lSideFLS.valueScaled / rowLength; + #endif +#endif + + // clip to the screen in x dimension: + + S3L_ScreenCoord rXClipped = S3L_min(rX,S3L_RESOLUTION_X), + lXClipped = lX; + + if (lXClipped < 0) + { + lXClipped = 0; + +#if !S3L_PERSPECTIVE_CORRECTION && !S3L_FLAT + b0FLS.valueScaled -= lX * b0FLS.stepScaled; + b1FLS.valueScaled -= lX * b1FLS.stepScaled; + + #if S3L_COMPUTE_LERP_DEPTH + depthFLS.valueScaled -= lX * depthFLS.stepScaled; + #endif +#endif + } + +#if S3L_PERSPECTIVE_CORRECTION + S3L_ScreenCoord i = lXClipped - lX; /* helper var to save one + substraction in the inner + loop */ +#endif + +#if S3L_PERSPECTIVE_CORRECTION == 2 + S3L_FastLerpState + depthPC, // interpolates depth between row segments + b0PC, // interpolates barycentric0 between row segments + b1PC; // interpolates barycentric1 between row segments + + /* ^ These interpolate values between row segments (lines of pixels + of S3L_PC_APPROX_LENGTH length). After each row segment perspective + correction is recomputed. */ + + depthPC.valueScaled = + (Z_RECIP_NUMERATOR / + S3L_nonZero(S3L_interpolate(lRecipZ,rRecipZ,i,rowLength))) + << S3L_FAST_LERP_QUALITY; + + b0PC.valueScaled = + ( + S3L_interpolateFrom0(rOverZ,i,rowLength) + * depthPC.valueScaled + ) / (Z_RECIP_NUMERATOR / S3L_F); + + b1PC.valueScaled = + ( + (lOverZ - S3L_interpolateFrom0(lOverZ,i,rowLength)) + * depthPC.valueScaled + ) / (Z_RECIP_NUMERATOR / S3L_F); + + int8_t rowCount = S3L_PC_APPROX_LENGTH; +#endif + +#if S3L_Z_BUFFER + uint32_t zBufferIndex = p.y * S3L_RESOLUTION_X + lXClipped; +#endif + + // draw the row -- inner loop: + for (S3L_ScreenCoord x = lXClipped; x < rXClipped; ++x) + { + int8_t testsPassed = 1; + +#if S3L_STENCIL_BUFFER + if (!S3L_stencilTest(x,p.y)) + testsPassed = 0; +#endif + p.x = x; + +#if S3L_COMPUTE_DEPTH + #if S3L_PERSPECTIVE_CORRECTION == 1 + p.depth = Z_RECIP_NUMERATOR / + S3L_nonZero(S3L_interpolate(lRecipZ,rRecipZ,i,rowLength)); + #elif S3L_PERSPECTIVE_CORRECTION == 2 + if (rowCount >= S3L_PC_APPROX_LENGTH) + { + // init the linear interpolation to the next PC correct value + + rowCount = 0; + + S3L_Unit nextI = i + S3L_PC_APPROX_LENGTH; + + if (nextI < rowLength) + { + S3L_Unit nextDepthScaled = + ( + Z_RECIP_NUMERATOR / + S3L_nonZero(S3L_interpolate(lRecipZ,rRecipZ,nextI,rowLength)) + ) << S3L_FAST_LERP_QUALITY; + + depthPC.stepScaled = + (nextDepthScaled - depthPC.valueScaled) / S3L_PC_APPROX_LENGTH; + + S3L_Unit nextValue = + ( + S3L_interpolateFrom0(rOverZ,nextI,rowLength) + * nextDepthScaled + ) / (Z_RECIP_NUMERATOR / S3L_F); + + b0PC.stepScaled = + (nextValue - b0PC.valueScaled) / S3L_PC_APPROX_LENGTH; + + nextValue = + ( + (lOverZ - S3L_interpolateFrom0(lOverZ,nextI,rowLength)) + * nextDepthScaled + ) / (Z_RECIP_NUMERATOR / S3L_F); + + b1PC.stepScaled = + (nextValue - b1PC.valueScaled) / S3L_PC_APPROX_LENGTH; + } + else + { + /* A special case where we'd be interpolating outside the triangle. + It seems like a valid approach at first, but it creates a bug + in a case when the rasaterized triangle is near screen 0 and can + actually never reach the extrapolated screen position. So we + have to clamp to the actual end of the triangle here. */ + + S3L_Unit maxI = S3L_nonZero(rowLength - i); + + S3L_Unit nextDepthScaled = + ( + Z_RECIP_NUMERATOR / + S3L_nonZero(rRecipZ) + ) << S3L_FAST_LERP_QUALITY; + + depthPC.stepScaled = + (nextDepthScaled - depthPC.valueScaled) / maxI; + + S3L_Unit nextValue = + ( + rOverZ + * nextDepthScaled + ) / (Z_RECIP_NUMERATOR / S3L_F); + + b0PC.stepScaled = + (nextValue - b0PC.valueScaled) / maxI; + + b1PC.stepScaled = + -1 * b1PC.valueScaled / maxI; + } + } + + p.depth = S3L_getFastLerpValue(depthPC); + #else + p.depth = S3L_getFastLerpValue(depthFLS); + S3L_stepFastLerp(depthFLS); + #endif +#else // !S3L_COMPUTE_DEPTH + p.depth = (tPointSS->z + lPointSS->z + rPointSS->z) / 3; +#endif + +#if S3L_Z_BUFFER + p.previousZ = S3L_zBuffer[zBufferIndex]; + + zBufferIndex++; + + if (!S3L_zTest(p.x,p.y,p.depth)) + testsPassed = 0; +#endif + + if (testsPassed) + { +#if !S3L_FLAT + #if S3L_PERSPECTIVE_CORRECTION == 0 + *barycentric0 = S3L_getFastLerpValue(b0FLS); + *barycentric1 = S3L_getFastLerpValue(b1FLS); + #elif S3L_PERSPECTIVE_CORRECTION == 1 + *barycentric0 = + ( + S3L_interpolateFrom0(rOverZ,i,rowLength) + * p.depth + ) / (Z_RECIP_NUMERATOR / S3L_F); + + *barycentric1 = + ( + (lOverZ - S3L_interpolateFrom0(lOverZ,i,rowLength)) + * p.depth + ) / (Z_RECIP_NUMERATOR / S3L_F); + #elif S3L_PERSPECTIVE_CORRECTION == 2 + *barycentric0 = S3L_getFastLerpValue(b0PC); + *barycentric1 = S3L_getFastLerpValue(b1PC); + #endif + + *barycentric2 = + S3L_F - *barycentric0 - *barycentric1; +#endif + +#if S3L_NEAR_CROSS_STRATEGY == 3 + if (_S3L_projectedTriangleState != 0) + { + S3L_Unit newBarycentric[3]; + + newBarycentric[0] = S3L_interpolateBarycentric( + _S3L_triangleRemapBarycentrics[0].x, + _S3L_triangleRemapBarycentrics[1].x, + _S3L_triangleRemapBarycentrics[2].x, + p.barycentric); + + newBarycentric[1] = S3L_interpolateBarycentric( + _S3L_triangleRemapBarycentrics[0].y, + _S3L_triangleRemapBarycentrics[1].y, + _S3L_triangleRemapBarycentrics[2].y, + p.barycentric); + + newBarycentric[2] = S3L_interpolateBarycentric( + _S3L_triangleRemapBarycentrics[0].z, + _S3L_triangleRemapBarycentrics[1].z, + _S3L_triangleRemapBarycentrics[2].z, + p.barycentric); + + p.barycentric[0] = newBarycentric[0]; + p.barycentric[1] = newBarycentric[1]; + p.barycentric[2] = newBarycentric[2]; + } +#endif + S3L_PIXEL_FUNCTION(&p); + } // tests passed + +#if !S3L_FLAT + #if S3L_PERSPECTIVE_CORRECTION + i++; + #if S3L_PERSPECTIVE_CORRECTION == 2 + rowCount++; + + S3L_stepFastLerp(depthPC); + S3L_stepFastLerp(b0PC); + S3L_stepFastLerp(b1PC); + #endif + #else + S3L_stepFastLerp(b0FLS); + S3L_stepFastLerp(b1FLS); + #endif +#endif + } // inner loop + } // y clipping + +#if !S3L_FLAT + S3L_stepFastLerp(lSideFLS); + S3L_stepFastLerp(rSideFLS); + + #if S3L_COMPUTE_LERP_DEPTH + S3L_stepFastLerp(lDepthFLS); + S3L_stepFastLerp(rDepthFLS); + #endif +#endif + + ++currentY; + } // row drawing + + #undef manageSplit + #undef initPC + #undef initSide + #undef stepSide + #undef Z_RECIP_NUMERATOR +} + +void S3L_rotate2DPoint(S3L_Unit *x, S3L_Unit *y, S3L_Unit angle) +{ + if (angle < S3L_SIN_TABLE_UNIT_STEP) + return; // no visible rotation + + S3L_Unit angleSin = S3L_sin(angle); + S3L_Unit angleCos = S3L_cos(angle); + + S3L_Unit xBackup = *x; + + *x = + (angleCos * (*x)) / S3L_F - + (angleSin * (*y)) / S3L_F; + + *y = + (angleSin * xBackup) / S3L_F + + (angleCos * (*y)) / S3L_F; +} + +void S3L_makeWorldMatrix(S3L_Transform3D worldTransform, S3L_Mat4 m) +{ + S3L_makeScaleMatrix( + worldTransform.scale.x, + worldTransform.scale.y, + worldTransform.scale.z, + m); + + S3L_Mat4 t; + + S3L_makeRotationMatrixZXY( + worldTransform.rotation.x, + worldTransform.rotation.y, + worldTransform.rotation.z, + t); + + S3L_mat4Xmat4(m,t); + + S3L_makeTranslationMat( + worldTransform.translation.x, + worldTransform.translation.y, + worldTransform.translation.z, + t); + + S3L_mat4Xmat4(m,t); +} + +void S3L_mat4Transpose(S3L_Mat4 m) +{ + S3L_Unit tmp; + + for (uint8_t y = 0; y < 3; ++y) + for (uint8_t x = 1 + y; x < 4; ++x) + { + tmp = m[x][y]; + m[x][y] = m[y][x]; + m[y][x] = tmp; + } +} + +void S3L_makeCameraMatrix(S3L_Transform3D cameraTransform, S3L_Mat4 m) +{ + S3L_makeTranslationMat( + -1 * cameraTransform.translation.x, + -1 * cameraTransform.translation.y, + -1 * cameraTransform.translation.z, + m); + + S3L_Mat4 r; + + S3L_makeRotationMatrixZXY( + cameraTransform.rotation.x, + cameraTransform.rotation.y, + cameraTransform.rotation.z, + r); + + S3L_mat4Transpose(r); // transposing creates an inverse transform + + S3L_Mat4 s; + + S3L_makeScaleMatrix( + cameraTransform.scale.x, + cameraTransform.scale.y, + cameraTransform.scale.z,s); + + S3L_mat4Xmat4(m,r); + S3L_mat4Xmat4(m,s); +} + +int8_t S3L_triangleWinding( + S3L_ScreenCoord x0, + S3L_ScreenCoord y0, + S3L_ScreenCoord x1, + S3L_ScreenCoord y1, + S3L_ScreenCoord x2, + S3L_ScreenCoord y2) +{ + int32_t winding = + (y1 - y0) * (x2 - x1) - (x1 - x0) * (y2 - y1); + // ^ cross product for points with z == 0 + + return winding > 0 ? 1 : (winding < 0 ? -1 : 0); +} + +/** Checks if given triangle (in Screen Space) is at least partially visible, + i.e. returns false if the triangle is either completely outside the frustum + (left, right, top, bottom, near) or is invisible due to backface culling. */ +static inline int8_t S3L_triangleIsVisible( + S3L_Vec4 p0, + S3L_Vec4 p1, + S3L_Vec4 p2, + uint8_t backfaceCulling) +{ + #define clipTest(c,cmp,v)\ + (p0.c cmp (v) && p1.c cmp (v) && p2.c cmp (v)) + + if ( // outside frustum? +#if S3L_NEAR_CROSS_STRATEGY == 0 + p0.z <= S3L_NEAR || p1.z <= S3L_NEAR || p2.z <= S3L_NEAR || + // ^ partially in front of NEAR? +#else + clipTest(z,<=,S3L_NEAR) || // completely in front of NEAR? +#endif + clipTest(x,<,0) || + clipTest(x,>=,S3L_RESOLUTION_X) || + clipTest(y,<,0) || + clipTest(y,>,S3L_RESOLUTION_Y) + ) + return 0; + + #undef clipTest + + if (backfaceCulling != 0) + { + int8_t winding = + S3L_triangleWinding(p0.x,p0.y,p1.x,p1.y,p2.x,p2.y); + + if ((backfaceCulling == 1 && winding > 0) || + (backfaceCulling == 2 && winding < 0)) + return 0; + } + + return 1; +} + +#if S3L_SORT != 0 +typedef struct +{ + uint8_t modelIndex; + S3L_Index triangleIndex; + uint16_t sortValue; +} _S3L_TriangleToSort; + +_S3L_TriangleToSort S3L_sortArray[S3L_MAX_TRIANGES_DRAWN]; +uint16_t S3L_sortArrayLength; +#endif + +void _S3L_projectVertex(const S3L_Model3D *model, S3L_Index triangleIndex, + uint8_t vertex, S3L_Mat4 projectionMatrix, S3L_Vec4 *result) +{ + uint32_t vertexIndex = model->triangles[triangleIndex * 3 + vertex] * 3; + + result->x = model->vertices[vertexIndex]; + result->y = model->vertices[vertexIndex + 1]; + result->z = model->vertices[vertexIndex + 2]; + result->w = S3L_F; // needed for translation + + S3L_vec3Xmat4(result,projectionMatrix); + + result->w = result->z; + /* We'll keep the non-clamped z in w for sorting. */ +} + +void _S3L_mapProjectedVertexToScreen(S3L_Vec4 *vertex, S3L_Unit focalLength) +{ + vertex->z = vertex->z >= S3L_NEAR ? vertex->z : S3L_NEAR; + /* ^ This firstly prevents zero division in the follwoing z-divide and + secondly "pushes" vertices that are in front of near a little bit forward, + which makes them behave a bit better. If all three vertices end up exactly + on NEAR, the triangle will be culled. */ + + S3L_perspectiveDivide(vertex,focalLength); + + S3L_ScreenCoord sX, sY; + + S3L_mapProjectionPlaneToScreen(*vertex,&sX,&sY); + + vertex->x = sX; + vertex->y = sY; +} + +/** Projects a triangle to the screen. If enabled, a triangle can be potentially + subdivided into two if it crosses the near plane, in which case two projected + triangles are returned (the info about splitting or cutting the triangle is + passed in global variables, see above). */ +void _S3L_projectTriangle( + const S3L_Model3D *model, + S3L_Index triangleIndex, + S3L_Mat4 matrix, + uint32_t focalLength, + S3L_Vec4 transformed[6]) +{ + _S3L_projectVertex(model,triangleIndex,0,matrix,&(transformed[0])); + _S3L_projectVertex(model,triangleIndex,1,matrix,&(transformed[1])); + _S3L_projectVertex(model,triangleIndex,2,matrix,&(transformed[2])); + _S3L_projectedTriangleState = 0; + +#if S3L_NEAR_CROSS_STRATEGY == 2 || S3L_NEAR_CROSS_STRATEGY == 3 + uint8_t infront = 0; + uint8_t behind = 0; + uint8_t infrontI[3]; + uint8_t behindI[3]; + + for (uint8_t i = 0; i < 3; ++i) + if (transformed[i].z < S3L_NEAR) + { + infrontI[infront] = i; + infront++; + } + else + { + behindI[behind] = i; + behind++; + } + +#if S3L_NEAR_CROSS_STRATEGY == 3 + for (int i = 0; i < 3; ++i) + S3L_vec4Init(&(_S3L_triangleRemapBarycentrics[i])); + + _S3L_triangleRemapBarycentrics[0].x = S3L_F; + _S3L_triangleRemapBarycentrics[1].y = S3L_F; + _S3L_triangleRemapBarycentrics[2].z = S3L_F; +#endif + +#define interpolateVertex \ + S3L_Unit ratio =\ + ((transformed[be].z - S3L_NEAR) * S3L_F) /\ + (transformed[be].z - transformed[in].z);\ + transformed[in].x = transformed[be].x - \ + ((transformed[be].x - transformed[in].x) * ratio) /\ + S3L_F;\ + transformed[in].y = transformed[be].y -\ + ((transformed[be].y - transformed[in].y) * ratio) /\ + S3L_F;\ + transformed[in].z = S3L_NEAR;\ + if (beI != 0) {\ + beI->x = (beI->x * ratio) / S3L_F;\ + beI->y = (beI->y * ratio) / S3L_F;\ + beI->z = (beI->z * ratio) / S3L_F;\ + ratio = S3L_F - ratio;\ + beI->x += (beB->x * ratio) / S3L_F;\ + beI->y += (beB->y * ratio) / S3L_F;\ + beI->z += (beB->z * ratio) / S3L_F; } + + if (infront == 2) + { + // shift the two vertices forward along the edge + for (uint8_t i = 0; i < 2; ++i) + { + uint8_t be = behindI[0], in = infrontI[i]; + +#if S3L_NEAR_CROSS_STRATEGY == 3 + S3L_Vec4 *beI = &(_S3L_triangleRemapBarycentrics[in]), + *beB = &(_S3L_triangleRemapBarycentrics[be]); +#else + S3L_Vec4 *beI = 0, *beB = 0; +#endif + + interpolateVertex + + _S3L_projectedTriangleState = 1; + } + } + else if (infront == 1) + { + // create another triangle and do the shifts + transformed[3] = transformed[behindI[1]]; + transformed[4] = transformed[infrontI[0]]; + transformed[5] = transformed[infrontI[0]]; + +#if S3L_NEAR_CROSS_STRATEGY == 3 + _S3L_triangleRemapBarycentrics[3] = + _S3L_triangleRemapBarycentrics[behindI[1]]; + _S3L_triangleRemapBarycentrics[4] = + _S3L_triangleRemapBarycentrics[infrontI[0]]; + _S3L_triangleRemapBarycentrics[5] = + _S3L_triangleRemapBarycentrics[infrontI[0]]; +#endif + + for (uint8_t i = 0; i < 2; ++i) + { + uint8_t be = behindI[i], in = i + 4; + +#if S3L_NEAR_CROSS_STRATEGY == 3 + S3L_Vec4 *beI = &(_S3L_triangleRemapBarycentrics[in]), + *beB = &(_S3L_triangleRemapBarycentrics[be]); +#else + S3L_Vec4 *beI = 0, *beB = 0; +#endif + + interpolateVertex + } + +#if S3L_NEAR_CROSS_STRATEGY == 3 + _S3L_triangleRemapBarycentrics[infrontI[0]] = + _S3L_triangleRemapBarycentrics[4]; +#endif + + transformed[infrontI[0]] = transformed[4]; + + _S3L_mapProjectedVertexToScreen(&transformed[3],focalLength); + _S3L_mapProjectedVertexToScreen(&transformed[4],focalLength); + _S3L_mapProjectedVertexToScreen(&transformed[5],focalLength); + + _S3L_projectedTriangleState = 2; + } + +#undef interpolateVertex +#endif // S3L_NEAR_CROSS_STRATEGY == 2 + + _S3L_mapProjectedVertexToScreen(&transformed[0],focalLength); + _S3L_mapProjectedVertexToScreen(&transformed[1],focalLength); + _S3L_mapProjectedVertexToScreen(&transformed[2],focalLength); +} + +void S3L_drawScene(S3L_Scene scene) +{ + S3L_Mat4 matFinal, matCamera; + S3L_Vec4 transformed[6]; // transformed triangle coords, for 2 triangles + + const S3L_Model3D *model; + S3L_Index modelIndex, triangleIndex; + + S3L_makeCameraMatrix(scene.camera.transform,matCamera); + +#if S3L_SORT != 0 + uint16_t previousModel = 0; + S3L_sortArrayLength = 0; +#endif + + for (modelIndex = 0; modelIndex < scene.modelCount; ++modelIndex) + { + if (!scene.models[modelIndex].config.visible) + continue; + +#if S3L_SORT != 0 + if (S3L_sortArrayLength >= S3L_MAX_TRIANGES_DRAWN) + break; + + previousModel = modelIndex; +#endif + + if (scene.models[modelIndex].customTransformMatrix == 0) + S3L_makeWorldMatrix(scene.models[modelIndex].transform,matFinal); + else + { + S3L_Mat4 *m = scene.models[modelIndex].customTransformMatrix; + + for (int8_t j = 0; j < 4; ++j) + for (int8_t i = 0; i < 4; ++i) + matFinal[i][j] = (*m)[i][j]; + } + + S3L_mat4Xmat4(matFinal,matCamera); + + S3L_Index triangleCount = scene.models[modelIndex].triangleCount; + + triangleIndex = 0; + + model = &(scene.models[modelIndex]); + + while (triangleIndex < triangleCount) + { + /* Some kind of cache could be used in theory to not project perviously + already projected vertices, but after some testing this was abandoned, + no gain was seen. */ + + _S3L_projectTriangle(model,triangleIndex,matFinal, + scene.camera.focalLength,transformed); + + if (S3L_triangleIsVisible(transformed[0],transformed[1],transformed[2], + model->config.backfaceCulling)) + { +#if S3L_SORT == 0 + // without sorting draw right away + S3L_drawTriangle(transformed[0],transformed[1],transformed[2],modelIndex, + triangleIndex); + + if (_S3L_projectedTriangleState == 2) // draw potential subtriangle + { +#if S3L_NEAR_CROSS_STRATEGY == 3 + _S3L_triangleRemapBarycentrics[0] = _S3L_triangleRemapBarycentrics[3]; + _S3L_triangleRemapBarycentrics[1] = _S3L_triangleRemapBarycentrics[4]; + _S3L_triangleRemapBarycentrics[2] = _S3L_triangleRemapBarycentrics[5]; +#endif + + S3L_drawTriangle(transformed[3],transformed[4],transformed[5], + modelIndex, triangleIndex); + } +#else + + if (S3L_sortArrayLength >= S3L_MAX_TRIANGES_DRAWN) + break; + + // with sorting add to a sort list + S3L_sortArray[S3L_sortArrayLength].modelIndex = modelIndex; + S3L_sortArray[S3L_sortArrayLength].triangleIndex = triangleIndex; + S3L_sortArray[S3L_sortArrayLength].sortValue = S3L_zeroClamp( + transformed[0].w + transformed[1].w + transformed[2].w) >> 2; + /* ^ + The w component here stores non-clamped z. + + As a simple approximation we sort by the triangle center point, + which is a mean coordinate -- we don't actually have to divide by 3 + (or anything), that is unnecessary for sorting! We shift by 2 just + as a fast operation to prevent overflow of the sum over uint_16t. */ + + S3L_sortArrayLength++; +#endif + } + + triangleIndex++; + } + } + +#if S3L_SORT != 0 + + #if S3L_SORT == 1 + #define cmp < + #else + #define cmp > + #endif + + /* Sort the triangles. We use insertion sort, because it has many advantages, + especially for smaller arrays (better than bubble sort, in-place, stable, + simple, ...). */ + + for (int16_t i = 1; i < S3L_sortArrayLength; ++i) + { + _S3L_TriangleToSort tmp = S3L_sortArray[i]; + + int16_t j = i - 1; + + while (j >= 0 && S3L_sortArray[j].sortValue cmp tmp.sortValue) + { + S3L_sortArray[j + 1] = S3L_sortArray[j]; + j--; + } + + S3L_sortArray[j + 1] = tmp; + } + + #undef cmp + + for (S3L_Index i = 0; i < S3L_sortArrayLength; ++i) // draw sorted triangles + { + modelIndex = S3L_sortArray[i].modelIndex; + triangleIndex = S3L_sortArray[i].triangleIndex; + + model = &(scene.models[modelIndex]); + + if (modelIndex != previousModel) + { + // only recompute the matrix when the model has changed + S3L_makeWorldMatrix(model->transform,matFinal); + S3L_mat4Xmat4(matFinal,matCamera); + previousModel = modelIndex; + } + + /* Here we project the points again, which is redundant and slow as they've + already been projected above, but saving the projected points would + require a lot of memory, which for small resolutions could be even + worse than z-bufer. So this seems to be the best way memory-wise. */ + + _S3L_projectTriangle(model,triangleIndex,matFinal,scene.camera.focalLength, + transformed); + + S3L_drawTriangle(transformed[0],transformed[1],transformed[2],modelIndex, + triangleIndex); + + if (_S3L_projectedTriangleState == 2) + { +#if S3L_NEAR_CROSS_STRATEGY == 3 + _S3L_triangleRemapBarycentrics[0] = _S3L_triangleRemapBarycentrics[3]; + _S3L_triangleRemapBarycentrics[1] = _S3L_triangleRemapBarycentrics[4]; + _S3L_triangleRemapBarycentrics[2] = _S3L_triangleRemapBarycentrics[5]; +#endif + + S3L_drawTriangle(transformed[3],transformed[4],transformed[5], + modelIndex, triangleIndex); + } + } +#endif +} + +#endif // guard diff --git a/tinyphysicsengine.h b/tinyphysicsengine.h new file mode 100644 index 0000000..ff0e8c1 --- /dev/null +++ b/tinyphysicsengine.h @@ -0,0 +1,3303 @@ +#ifndef _TINYPHYSICSENGINE_H +#define _TINYPHYSICSENGINE_H + +/** + tinyphysicsengine (TPE) + + Simple/suckless header-only hybrid 3D physics engine with no floating point, + only 32 bit int arithmetic, similar to e.g. small3dlib. + + Conventions and formats are the same or similar to those of small3dlib so as + to make them easily integrate with each other. + + The library works with bodies made of spheres connected by elastic springs, + i.e. soft bodies which however behave as "stiff" bodies by default and can + be used to fake rigid body physics as well. Bodies are placed in environemnts + specified by a distance function that allows to implement any mathematical + shape. + + Orientations/rotations are in extrinsic Euler angles in the ZXY order (by Z, + then by X, then by Y), if not mentioned otherwise. Angles are in TPE_Units, + TPE_FRACTIONS_PER_UNIT is full angle (2 PI). Sometimes rotations can also be + specified in the "about axis" format: here the object is rotated CW by given + axis by an angle that's specified by the magnitude of the vector. + + Where it matters (e.g. rotations about axes) we consider a left-handed coord. + system (x right, y up, z forward). + + ------------------------------------------------------------------------------ + + by drummyfish, 2022 + + version 0.8d + + This work's goal is to never be encumbered by any exclusive intellectual + property rights. The work is therefore provided under CC0 1.0 + (https://creativecommons.org/publicdomain/zero/1.0/) + additional WAIVER OF + ALL INTELLECTUAL PROPERTY RIGHTS that waives the rest of intellectual property + rights not already waived by CC0 1.0. The WAIVER OF ALL INTELLECTUAL PROPERTY + RGHTS is as follows: + + Each contributor to this work agrees that they waive any exclusive rights, + including but not limited to copyright, patents, trademark, trade dress, + industrial design, plant varieties and trade secrets, to any and all ideas, + concepts, processes, discoveries, improvements and inventions conceived, + discovered, made, designed, researched or developed by the contributor either + solely or jointly with others, which relate to this work or result from this + work. Should any waiver of such right be judged legally invalid or + ineffective under applicable law, the contributor hereby grants to each + affected person a royalty-free, non transferable, non sublicensable, non + exclusive, irrevocable and unconditional license to this right. +*/ + +#include + +typedef int32_t TPE_Unit; ///< Basic fixed point unit type. +typedef int16_t TPE_UnitReduced; ///< Like TPE_Unit but saving space + +#define TPE_FRACTIONS_PER_UNIT 512 ///< one fixed point unit, don't change +#define TPE_F TPE_FRACTIONS_PER_UNIT ///< short for TPE_FRACTIONS_PER_UNIT +#define TPE_JOINT_SIZE_MULTIPLIER 32 ///< joint size is scaled (size saving) + +#define TPE_INFINITY 2147483647 + +#define TPE_JOINT_SIZE(joint) ((joint).sizeDivided * TPE_JOINT_SIZE_MULTIPLIER) + +#ifndef TPE_APPROXIMATE_LENGTH + #define TPE_APPROXIMATE_LENGTH 0 /**< whether or not use length/distance + approximation rather than exact + calculation (1 is faster but less + accurate), beware of possible lower + stability */ +#endif + +#if !TPE_APPROXIMATE_LENGTH + #define TPE_DISTANCE TPE_dist + #define TPE_LENGTH TPE_vec3Len +#else + #define TPE_DISTANCE TPE_distApprox + #define TPE_LENGTH TPE_vec3LenApprox +#endif + +#ifndef TPE_LOG + #define TPE_LOG(s) ; // redefine to some print function to show debug logs +#endif + +#ifndef TPE_LOW_SPEED +/** Speed, in TPE_Units per ticks, that is considered low (used e.g. for auto + deactivation of bodies). */ + #define TPE_LOW_SPEED 30 +#endif + +#ifndef TPE_RESHAPE_TENSION_LIMIT +/** Tension limit, in TPE_Units, after which a non-soft body will be reshaped. + Smaller number will keep more stable shapes but will cost more performance. */ + #define TPE_RESHAPE_TENSION_LIMIT 20 +#endif + +#ifndef TPE_RESHAPE_ITERATIONS +/** How many iterations of reshaping will be performed by the step function if + the body's shape needs to be reshaped. Greater number will keep shapes more + stable but will cost some performance. */ + #define TPE_RESHAPE_ITERATIONS 3 +#endif + +#ifndef TPE_DEACTIVATE_AFTER +/** After how many ticks of low speed should a body be disabled. This mustn't + be greater than 255. */ + #define TPE_DEACTIVATE_AFTER 128 +#endif + +#ifndef TPE_LIGHT_DEACTIVATION +/** When a body is activated by a collision, its deactivation counter will be + set to this value, i.e. after a collision the body will be prone to deactivate + sooner than normally. This is to handle situations with many bodies touching + each other that would normally keep activating each other, never coming to + rest. */ + #define TPE_LIGHT_DEACTIVATION \ + (TPE_DEACTIVATE_AFTER - TPE_DEACTIVATE_AFTER / 10) +#endif + +#ifndef TPE_TENSION_ACCELERATION_DIVIDER +/** Number by which the base acceleration (TPE_FRACTIONS_PER_UNIT per tick + squared) caused by the connection tension will be divided. This should be + power of 2. */ + #define TPE_TENSION_ACCELERATION_DIVIDER 32 +#endif + +#ifndef TPE_TENSION_ACCELERATION_THRESHOLD +/** Limit within which acceleration caused by connection tension won't be + applied. */ + #define TPE_TENSION_ACCELERATION_THRESHOLD 5 +#endif + +#ifndef TPE_TENSION_GREATER_ACCELERATION_THRESHOLD +/** Connection tension threshold after which twice as much acceleration will + be applied. This helps prevent diverting joints that are "impaled" by + environment.*/ + #define TPE_TENSION_GREATER_ACCELERATION_THRESHOLD \ + (TPE_TENSION_ACCELERATION_THRESHOLD * 3) +#endif + +#ifndef TPE_COLLISION_RESOLUTION_ITERATIONS +/** Maximum number of iterations to try to uncollide two colliding bodies. */ + #define TPE_COLLISION_RESOLUTION_ITERATIONS 16 +#endif + +#ifndef TPE_COLLISION_RESOLUTION_MARGIN +/** Margin, in TPE_Units, by which a body will be shifted back to get out of + collision. */ + #define TPE_COLLISION_RESOLUTION_MARGIN (TPE_F / 64) +#endif + +#ifndef TPE_NONROTATING_COLLISION_RESOLVE_ATTEMPTS +/** Number of times a collision of nonrotating bodies with environment will be + attempted to resolve. This probably won't have great performance implications + as complex collisions of this kind should be relatively rare. */ + #define TPE_NONROTATING_COLLISION_RESOLVE_ATTEMPTS 8 +#endif + +#ifndef TPE_APPROXIMATE_NET_SPEED +/** Whether to use a fast approximation for calculating net speed of bodies + which increases performance a bit. */ + #define TPE_APPROXIMATE_NET_SPEED 1 +#endif + +#define TPE_PRINTF_VEC3(v) printf("[%d %d %d]",(v).x,(v).y,(v).z); + +typedef struct +{ + TPE_Unit x; + TPE_Unit y; + TPE_Unit z; +} TPE_Vec3; + +/** Keeps given point within specified axis-aligned box. This can be used e.g. + to smooth rendered movement of jittering physics bodies. */ +TPE_Vec3 TPE_vec3KeepWithinBox(TPE_Vec3 point, TPE_Vec3 boxCenter, + TPE_Vec3 boxMaxVect); + +TPE_Vec3 TPE_vec3KeepWithinDistanceBand(TPE_Vec3 point, TPE_Vec3 center, + TPE_Unit minDistance, TPE_Unit maxDistance); + +TPE_Vec3 TPE_vec3(TPE_Unit x, TPE_Unit y, TPE_Unit z); +TPE_Vec3 TPE_vec3Minus(TPE_Vec3 v1, TPE_Vec3 v2); +TPE_Vec3 TPE_vec3Plus(TPE_Vec3 v1, TPE_Vec3 v2); +TPE_Vec3 TPE_vec3Cross(TPE_Vec3 v1, TPE_Vec3 v2); +TPE_Vec3 TPE_vec3Project(TPE_Vec3 v, TPE_Vec3 base); +TPE_Vec3 TPE_vec3ProjectNormalized(TPE_Vec3 v, TPE_Vec3 baseNormalized); +TPE_Vec3 TPE_vec3Times(TPE_Vec3 v, TPE_Unit units); +TPE_Vec3 TPE_vec3TimesPlain(TPE_Vec3 v, TPE_Unit q); +TPE_Vec3 TPE_vec3Normalized(TPE_Vec3 v); + +TPE_Unit TPE_vec3Dot(TPE_Vec3 v1, TPE_Vec3 v2); +TPE_Unit TPE_vec3Len(TPE_Vec3 v); +TPE_Unit TPE_vec3LenApprox(TPE_Vec3 v); + +/** Returns an angle in TPE_Units (see angle conventions) of a 2D vector with + the X axis, CCW. */ +TPE_Unit TPE_vec2Angle(TPE_Unit x, TPE_Unit y); + +/** Keeps given value within specified range. This can be used e.g. for movement + smoothing. */ +TPE_Unit TPE_keepInRange(TPE_Unit x, TPE_Unit xMin, TPE_Unit xMax); + +static inline TPE_Unit TPE_abs(TPE_Unit x); +static inline TPE_Unit TPE_max(TPE_Unit a, TPE_Unit b); +static inline TPE_Unit TPE_min(TPE_Unit a, TPE_Unit b); +static inline TPE_Unit TPE_nonZero(TPE_Unit x); +static inline TPE_Unit TPE_dist(TPE_Vec3 p1, TPE_Vec3 p2); +static inline TPE_Unit TPE_distApprox(TPE_Vec3 p1, TPE_Vec3 p2); +TPE_Unit TPE_sqrt(TPE_Unit x); + +/** Compute sine, TPE_FRACTIONS_PER_UNIT as argument corresponds to 2 * PI + radians. Returns a number from -TPE_FRACTIONS_PER_UNIT to + TPE_FRACTIONS_PER_UNIT. */ +TPE_Unit TPE_sin(TPE_Unit x); +TPE_Unit TPE_cos(TPE_Unit x); +TPE_Unit TPE_atan(TPE_Unit x); + +typedef struct +{ + TPE_Vec3 position; + TPE_UnitReduced velocity[3]; ///< not TPE_Vec3 to save size + uint8_t sizeDivided; /**< size (radius, ...), for saving space divided by + TPE_JOINT_SIZE_MULTIPLIER */ +} TPE_Joint; + +typedef struct +{ + uint8_t joint1; + uint8_t joint2; + uint16_t length; ///< connection's preferred length, uint16_t saves space +} TPE_Connection; + +#define TPE_BODY_FLAG_DEACTIVATED 1 /**< Not being updated due to low energy, + "sleeping", will be woken by + collisions etc. */ +#define TPE_BODY_FLAG_NONROTATING 2 /**< When set, the body won't rotate, + will only move linearly. Here the + velocity of the body's first joint + is the velocity of the whole + body. */ +#define TPE_BODY_FLAG_DISABLED 4 /**< Disabled, not taking part in + simulation. */ +#define TPE_BODY_FLAG_SOFT 8 /**< Soft connections, effort won't be + made to keep the body's shape. */ +#define TPE_BODY_FLAG_SIMPLE_CONN 16 /**< Simple connections, don't zero out + antagonist forces or apply + connection friction, can increase + performance. */ +#define TPE_BODY_FLAG_ALWAYS_ACTIVE 32 /**< Will never deactivate due to low + energy. */ + +/** Function used for defining static environment, working similarly to an SDF + (signed distance function). The parameters are: 3D point P, max distance D. + The function should behave like this: if P is inside the solid environment + volume, P will be returned; otherwise closest point (by Euclidean distance) to + the solid environment volume from P will be returned, except for a case when + this closest point would be further away than D, in which case any arbitrary + point further away than D may be returned (this allows for optimizations). */ +typedef TPE_Vec3 (*TPE_ClosestPointFunction)(TPE_Vec3, TPE_Unit); + +/** Function that can be used as a joint-joint or joint-environment collision + callback, parameters are following: body1 index, joint1 index, body2 index, + joint2 index, collision world position. If body1 index is the same as body1 + index, then collision type is body-environment, otherwise it is body-body + type. The function has to return either 1 if the collision is to be allowed + or 0 if it is to be discarded. This can besides others be used to disable + collisions between some bodies. */ +typedef uint8_t (*TPE_CollisionCallback)(uint16_t, uint16_t, uint16_t, uint16_t, + TPE_Vec3); + +/** Function used by the debug drawing functions to draw individual pixels to + the screen. The parameters are following: pixel x, pixel y, pixel color. */ +typedef void (*TPE_DebugDrawFunction)(uint16_t, uint16_t, uint8_t); + +/** Physics body made of spheres (each of same weight but possibly different + radia) connected by elastic springs. */ +typedef struct +{ + TPE_Joint *joints; + uint8_t jointCount; + TPE_Connection *connections; + uint8_t connectionCount; + TPE_UnitReduced jointMass; ///< mass of a single joint + TPE_UnitReduced friction; ///< friction of each joint + TPE_UnitReduced elasticity; ///< elasticity of each joint + uint8_t flags; + uint8_t deactivateCount; +} TPE_Body; + +typedef struct +{ + TPE_Body *bodies; + uint16_t bodyCount; + TPE_ClosestPointFunction environmentFunction; + TPE_CollisionCallback collisionCallback; +} TPE_World; + +/** Tests the mathematical validity of given closest point function (function + representing the physics environment), i.e. whether for example approaching + some closest point in a straight line keeps approximately the same closest + point. Note that this function may take a long time to complete, especially + with higher gridResolution values and more complex environment functions. You + should use this function to test your environment function, especially if you + create functions for your own shapes etc. The cornerFrom and cornerTo points + are corners of an axis-aligned box within which testing will take place, + gridResolution defines numbers of points (i.e. step length) along each + dimension to test (recommended e.g. 64), allowedError says error within which + points will be considered the same (recommended range approx. 10 to 200). If + testing is successful, 1 is returned, otherwise 0 is returned and the point + around which error was detected is returned in errorPoint (unless the pointer + is 0 in which case it is ignored). */ +uint8_t TPE_testClosestPointFunction(TPE_ClosestPointFunction f, + TPE_Vec3 cornerFrom, TPE_Vec3 cornerTo, uint8_t gridResolution, + TPE_UnitReduced allowedError, TPE_Vec3 *errorPoint); + +void TPE_bodyInit(TPE_Body *body, + TPE_Joint *joints, uint8_t jointCount, + TPE_Connection *connections, uint8_t connectionCount, + TPE_Unit mass); + +void TPE_worldInit(TPE_World *world, + TPE_Body *bodies, uint16_t bodyCount, + TPE_ClosestPointFunction environmentFunction); + +/** Gets orientation (rotation) of a body from a position of three of its + joints. The vector from joint1 to joint2 is considered the body's forward + direction, the vector from joint1 to joint3 its right direction. The returned + rotation is in Euler angles (see rotation conventions). */ +TPE_Vec3 TPE_bodyGetRotation(const TPE_Body *body, uint16_t joint1, + uint16_t joint2, uint16_t joint3); + +void TPE_vec3Normalize(TPE_Vec3 *v); + +/** Rotates a 3D point by given Euler angle rotation (see rotation + conventions). */ +TPE_Vec3 TPE_pointRotate(TPE_Vec3 point, TPE_Vec3 rotation); + +/** Returns an inverse rotation to given rotation, in Euler angles (see rotation + conventions). */ +TPE_Vec3 TPE_rotationInverse(TPE_Vec3 rotation); + +/** Returns a connection tension, i.e. a signed percentage difference against + desired length (TPE_FRACTIONS_PER_UNIT means 100%). */ +static inline TPE_Unit TPE_connectionTension(TPE_Unit length, + TPE_Unit desiredLength); + +/** Rotates a rotation specified in Euler angles by given axis + angle (see + rotation conventions). Returns a rotation in Eurler angles. */ +TPE_Vec3 TPE_rotationRotateByAxis(TPE_Vec3 rotation, TPE_Vec3 rotationByAxis); + +/** Computes the formula of a 1D collision of rigid bodies. */ +void TPE_getVelocitiesAfterCollision(TPE_Unit *v1, TPE_Unit *v2, TPE_Unit m1, + TPE_Unit m2, TPE_Unit elasticity); + +/** Computes orientation/rotation (see docs for orientation format) from two + vectors (which should be at least close to being perpendicular and do NOT + need to be normalized). This can be used to determine orientation of a body + from a relative position of its joints. */ +TPE_Vec3 TPE_rotationFromVecs(TPE_Vec3 forward, TPE_Vec3 right); + +TPE_Joint TPE_joint(TPE_Vec3 position, TPE_Unit size); + +/** Mostly for internal use, resolves a potential collision of two joints in a + way that keeps the joints outside provided environment (if the function + pointer is not 0). Returns 1 if joints collided or 0 otherwise. */ +uint8_t TPE_jointsResolveCollision(TPE_Joint *j1, TPE_Joint *j2, + TPE_Unit mass1, TPE_Unit mass2, TPE_Unit elasticity, TPE_Unit friction, + TPE_ClosestPointFunction env); + +/** Mostly for internal use, tests and potentially resolves a collision between + a joint and environment, returns 0 if no collision happened, 1 if it happened + and was resolved normally and 2 if it couldn't be resolved normally. */ +uint8_t TPE_jointEnvironmentResolveCollision(TPE_Joint *joint, TPE_Unit + elasticity, TPE_Unit friction, TPE_ClosestPointFunction env); + +/** Tests whether a body is currently colliding with the environment. */ +uint8_t TPE_bodyEnvironmentCollide(const TPE_Body *body, + TPE_ClosestPointFunction env); + +/** Mostly for internal use, tests and potentially resolves a collision of a + body with the environment, returns 1 if collision happened or 0 otherwise. */ +uint8_t TPE_bodyEnvironmentResolveCollision(TPE_Body *body, + TPE_ClosestPointFunction env); + +TPE_Vec3 TPE_bodyGetLinearVelocity(const TPE_Body *body); + +/** Computes the minimum bounding box of given body. */ +void TPE_bodyGetAABB(const TPE_Body *body, TPE_Vec3 *vMin, TPE_Vec3 *vMax); + +/** Computes a bounding sphere of a body which is not minimal but faster to + compute than the minimum bounding sphere. */ +void TPE_bodyGetFastBSphere(const TPE_Body *body, TPE_Vec3 *center, + TPE_Unit *radius); + +/** Computes the minimum bounding sphere of a body (there is another function + for a faster approximate bounding sphere). */ +void TPE_bodyGetBSphere(const TPE_Body *body, TPE_Vec3 *center, + TPE_Unit *radius); + +uint8_t TPE_checkOverlapAABB(TPE_Vec3 v1Min, TPE_Vec3 v1Max, TPE_Vec3 v2Min, + TPE_Vec3 v2Max); + +/** Mostly for internal use, checks and potentiall resolves collision of two + bodies so as to keep them outside given environment. Returns 1 if collision + happened or 0 otherwise. */ +uint8_t TPE_bodiesResolveCollision(TPE_Body *b1, TPE_Body *b2, + TPE_ClosestPointFunction env); + +/** Pins a joint of a body to specified location in space (sets its location + and zeros its velocity). */ +void TPE_jointPin(TPE_Joint *joint, TPE_Vec3 position); + +/** "Fakes" a rotation of a moving sphere by rotating it in the direction of + its movement; this can create the illusion of the sphere actually rotating + due to friction even if the physics sphere object (a body with a single joint) + isn't rotating at all. Returns a rotation in the "about axis" format (see + library conventions). */ +TPE_Vec3 TPE_fakeSphereRotation(TPE_Vec3 position1, TPE_Vec3 position2, + TPE_Unit radius); + +/** Casts a ray against environment and returns the closest hit of a surface. If + no surface was hit, a vector with all elements equal to TPE_INFINITY will be + returned. The function internally works differently for outside rays (rays + cast from the outside of the environment) and inside rays. Outside rays can + be traced with raymarching and will be processed very quickly and precisely; + in this case if any intersection is found, the function will try to return a + point outside (not guaranteed) the environment that's just in front of the hit + surface. Inside rays are difficult and slow to trace because environment + function won't provide distance, so the results aren't guaranteed to be + precise (the ray may miss some intersections); here rays will be traced by + given step (insideStepSize) and eventually iterated a bit towards the + intersection -- if any intersection is found, the function will try to return + a point inside (not guaranteed) the environment just before the hit + surface. */ +TPE_Vec3 TPE_castEnvironmentRay(TPE_Vec3 rayPos, TPE_Vec3 rayDir, + TPE_ClosestPointFunction environment, TPE_Unit insideStepSize, + TPE_Unit rayMarchMaxStep, uint32_t maxSteps); + +/** Casts a ray against bodies in a world (ignoring the environment), returns + the position of the closest hit as well as the hit body's index in bodyIndex + (unless the bodyIndex pointer is 0 in which case it is ignored). Similarly + with jointIndex. If no hit is found a vector with all elements equal to + TPE_INFINITY will be returned and bodyIndex will be -1. A specific body can be + excluded with excludeBody (negative value will just make this parameter + ignored). */ +TPE_Vec3 TPE_castBodyRay(TPE_Vec3 rayPos, TPE_Vec3 rayDir, int16_t excludeBody, + const TPE_World *world, int16_t *bodyIndex, int16_t *jointIndex); + +/** Performs one step (tick, frame, ...) of the physics world simulation + including updating positions and velocities of bodies, collision detection and + resolution, possible reshaping or deactivation of inactive bodies etc. The + time length of the step is relative to all other units but it's ideal if it is + 1/30th of a second. */ +void TPE_worldStep(TPE_World *world); + +void TPE_worldDeactivateAll(TPE_World *world); +void TPE_worldActivateAll(TPE_World *world); + +TPE_Unit TPE_worldGetNetSpeed(const TPE_World *world); +TPE_Unit TPE_bodyGetNetSpeed(const TPE_Body *body); +TPE_Unit TPE_bodyGetAverageSpeed(const TPE_Body *body); +void TPE_bodyMultiplyNetSpeed(TPE_Body *body, TPE_Unit factor); +void TPE_bodyLimitAverageSpeed(TPE_Body *body, TPE_Unit speedMin, + TPE_Unit speedMax); + +/** Deactivates a body (puts it to sleep until another collision or force wake + up). */ +void TPE_bodyDeactivate(TPE_Body *body); + +static inline uint8_t TPE_bodyIsActive(const TPE_Body *body); + +/** Attempts to shift the joints of a soft body so that the tension of all + springs becomes zero while keeping the joints near their current position. + This function performs one iteration of the equalizing algorithm and doesn't + guarantee a perfect solution, it may help to run multiple iterations (call + this function multiple times). */ +void TPE_bodyReshape(TPE_Body *body, TPE_ClosestPointFunction + environmentFunction); + +/** Mostly for internal use, performs some "magic" on body connections, mainly + cancelling out of velocities going against each other and also applying + connection friction in soft bodies. The strong parameter indicates if the + body is soft or not. */ +void TPE_bodyCancelOutVelocities(TPE_Body *body, uint8_t strong); + +/** Moves a body by certain offset. */ +void TPE_bodyMoveBy(TPE_Body *body, TPE_Vec3 offset); + +/** Moves a body (its center of mass) to given position. */ +void TPE_bodyMoveTo(TPE_Body *body, TPE_Vec3 position); + +/** Zeros velocities of all soft body joints. */ +void TPE_bodyStop(TPE_Body *body); + +void TPE_bodyActivate(TPE_Body *body); + +/** Adds velocity to a soft body. */ +void TPE_bodyAccelerate(TPE_Body *body, TPE_Vec3 velocity); + +void TPE_bodyApplyGravity(TPE_Body *body, TPE_Unit downwardsAccel); + +/** Adds angular velocity to a soft body. The rotation vector specifies the axis + of rotation by its direction and angular velocity by its magnitude (magnitude + of TPE_FRACTIONS_PER_UNIT will add linear velocity of TPE_FRACTIONS_PER_UNIT + per tick to a point in the distance of TPE_FRACTIONS_PER_UNIT from the + rotation axis). */ +void TPE_bodySpin(TPE_Body *body, TPE_Vec3 rotation); + +/** Same as TPE_bodySpin but additionally allows to specify the center of + the spin. */ +void TPE_bodySpinWithCenter(TPE_Body *body, TPE_Vec3 rotation, TPE_Vec3 center); + +/** Instantly rotates a body about an axis (see library conventions for + the rotation format). */ +void TPE_bodyRotateByAxis(TPE_Body *body, TPE_Vec3 rotation); + +/** Computes the center of mass of a body. This averages the position of all + joints; note that if you need, you may estimate the center of the body faster, + e.g. by taking a position of a single "center joint", or averaging just 2 + extreme points. */ +TPE_Vec3 TPE_bodyGetCenterOfMass(const TPE_Body *body); + +/** Draws a debug view of a 3D physics world using a provided pixel drawing + function. This can be used to overlay a simple visualization of the physics + objects to your main render, to spot exact borders of objects etc. The + function draws simple dotted lines and circles with different "colors" for + different types of objects (joints, connections, environemnt). camPos, camRot + and camView should match the camera settings of your main renderer. CamView.x + is horizontal resolution in pixels, camView.y is the vertical resolution, + CamView.z says the camera focal length (~FOV) in TPE_Units (0 means + orthographic projection). envGridRes is the resolution of an environment probe + grid (the function will probe points in space and draw borders of the physics + environemnt), envGridSize is the size (int TPE_Units) of the grid cell. Note + the function may be slow (reducing envGridRes can help, workable value can be + e.g. 16). */ +void TPE_worldDebugDraw(TPE_World *world, TPE_DebugDrawFunction drawFunc, + TPE_Vec3 camPos, TPE_Vec3 camRot, TPE_Vec3 camView, uint16_t envGridRes, + TPE_Unit envGridSize); + +#define TPE_DEBUG_COLOR_CONNECTION 0 +#define TPE_DEBUG_COLOR_JOINT 1 +#define TPE_DEBUG_COLOR_ENVIRONMENT 2 +#define TPE_DEBUG_COLOR_INACTIVE 3 + +uint32_t TPE_jointHash(const TPE_Joint *joint); +uint32_t TPE_connectionHash(const TPE_Connection *connection); +uint32_t TPE_bodyHash(const TPE_Body *body); + +/** Computes 32 bit hash of the world, useful for checking if two states of the + world differ. The function takes into account most of the relevant state but + possibly not all of it, for details check the code. */ +uint32_t TPE_worldHash(const TPE_World *world); + +// FUNCTIONS FOR GENERATING BODIES + +void TPE_makeBox(TPE_Joint joints[8], TPE_Connection connections[16], + TPE_Unit width, TPE_Unit depth, TPE_Unit height, TPE_Unit jointSize); +void TPE_makeCenterBox(TPE_Joint joints[9], TPE_Connection connections[18], + TPE_Unit width, TPE_Unit depth, TPE_Unit height, TPE_Unit jointSize); +void TPE_makeRect(TPE_Joint joints[4], TPE_Connection connections[6], + TPE_Unit width, TPE_Unit depth, TPE_Unit jointSize); +void TPE_makeTriangle(TPE_Joint joints[3], TPE_Connection connections[3], + TPE_Unit sideLength, TPE_Unit jointSize); +void TPE_makeCenterRect(TPE_Joint joints[5], TPE_Connection connections[8], + TPE_Unit width, TPE_Unit depth, TPE_Unit jointSize); +void TPE_makeCenterRectFull(TPE_Joint joints[5], TPE_Connection connections[10], + TPE_Unit width, TPE_Unit depth, TPE_Unit jointSize); +void TPE_make2Line(TPE_Joint joints[2], TPE_Connection connections[1], + TPE_Unit length, TPE_Unit jointSize); + +// FUNCTIONS FOR BUILDING ENVIRONMENT + +TPE_Vec3 TPE_envAABoxInside(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 size); +TPE_Vec3 TPE_envAABox(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 maxCornerVec); +TPE_Vec3 TPE_envBox(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 maxCornerVec, + TPE_Vec3 rotation); +TPE_Vec3 TPE_envSphere(TPE_Vec3 point, TPE_Vec3 center, TPE_Unit radius); +TPE_Vec3 TPE_envSphereInside(TPE_Vec3 point, TPE_Vec3 center, TPE_Unit radius); +TPE_Vec3 TPE_envHalfPlane(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 normal); +TPE_Vec3 TPE_envGround(TPE_Vec3 point, TPE_Unit height); +TPE_Vec3 TPE_envInfiniteCylinder(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 + direction, TPE_Unit radius); +TPE_Vec3 TPE_envCylinder(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 direction, + TPE_Unit radius); +TPE_Vec3 TPE_envCone(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 direction, + TPE_Unit radius); +TPE_Vec3 TPE_envLineSegment(TPE_Vec3 point, TPE_Vec3 a, TPE_Vec3 b); +TPE_Vec3 TPE_envHeightmap(TPE_Vec3 point, TPE_Vec3 center, TPE_Unit gridSize, + TPE_Unit (*heightFunction)(int32_t x, int32_t y), TPE_Unit maxDist); + +/** Environment function for triagnular prism, e.g. for ramps. The sides array + contains three 2D coordinates of points of the triangle in given plane with + respect to the center. WARNING: the points must be specified in counter + clowckwise direction! The direction var specified axis direction (0, 1 or + 2).*/ +TPE_Vec3 TPE_envAATriPrism(TPE_Vec3 point, TPE_Vec3 center, + const TPE_Unit sides[6], TPE_Unit depth, uint8_t direction); + +/* The following are helper macros for creating a union of shapes inside an + environment function and accelerating them with bounding volumes. */ + +#define TPE_ENV_START(test,point) TPE_Vec3 _pBest = test, _pTest; \ + TPE_Unit _dBest = TPE_DISTANCE(_pBest,point), _dTest; \ + (void)(_pBest); (void)(_dBest); (void)(_dTest); (void)(_pTest); // supress war + +#define TPE_ENV_NEXT(test,point) \ + { if (_pBest.x == point.x && _pBest.y == point.y && _pBest.z == point.z) \ + return _pBest; \ + _pTest = test; _dTest = TPE_DISTANCE(_pTest,point); \ + if (_dTest < _dBest) { _pBest = _pTest; _dBest = _dTest; } } + +#define TPE_ENV_END return _pBest; + +#define TPE_ENV_BCUBE_TEST(bodyBCubeC,bodyBCubeR,envBCubeC,envBCubeR) ( \ + (TPE_abs(envBCubeC.x - bodyBCubeC.x) <= ((bodyBCubeR) + (envBCubeR))) && \ + (TPE_abs(envBCubeC.y - bodyBCubeC.y) <= ((bodyBCubeR) + (envBCubeR))) && \ + (TPE_abs(envBCubeC.z - bodyBCubeC.z) <= ((bodyBCubeR) + (envBCubeR)))) + +#define TPE_ENV_BSPHERE_TEST(bodyBSphereC,bodyBSphereR,envBSphereC,envBSphereR)\ + (TPE_DISTANCE(bodyBSphereC,envBSphereC) <= ((bodyBSphereR) + (envBSphereR))) + +//------------------------------------------------------------------------------ +// privates: + +uint16_t _TPE_body1Index, _TPE_body2Index, _TPE_joint1Index, _TPE_joint2Index; +TPE_CollisionCallback _TPE_collisionCallback; + +static inline TPE_Unit TPE_nonZero(TPE_Unit x) +{ + return x != 0 ? x : 1; +} + +static inline TPE_Unit TPE_connectionTension(TPE_Unit length, + TPE_Unit desiredLength) +{ + return (length * TPE_F) / desiredLength + - TPE_F; +} + +TPE_Joint TPE_joint(TPE_Vec3 position, TPE_Unit size) +{ + TPE_Joint result; + + result.velocity[0] = 0; + result.velocity[1] = 0; + result.velocity[2] = 0; + + result.position = position; + + size /= TPE_JOINT_SIZE_MULTIPLIER; + + if (size > 0xff) + { + TPE_LOG("WARNING: joint size too big in TPE_joint"); + } + + result.sizeDivided = size; + + return result; +} + +TPE_Vec3 TPE_vec3(TPE_Unit x, TPE_Unit y, TPE_Unit z) +{ + TPE_Vec3 r; + + r.x = x; + r.y = y; + r.z = z; + + return r; +} + +TPE_Unit TPE_sqrt(TPE_Unit x) +{ + int8_t sign = 1; + + if (x < 0) + { + sign = -1; + x *= -1; + } + + uint32_t result = 0; + uint32_t a = x; + uint32_t b = 1u << 30; + + while (b > a) + b >>= 2; + + while (b != 0) + { + if (a >= result + b) + { + a -= result + b; + result = result + 2 * b; + } + + b >>= 2; + result >>= 1; + } + + return result * sign; +} + +TPE_Unit TPE_vec3Len(TPE_Vec3 v) +{ +#define ANTI_OVERFLOW 25000 + if (v.x < ANTI_OVERFLOW && v.x > -1 * ANTI_OVERFLOW && + v.y < ANTI_OVERFLOW && v.y > -1 * ANTI_OVERFLOW && + v.z < ANTI_OVERFLOW && v.z > -1 * ANTI_OVERFLOW) + { + return TPE_sqrt(v.x * v.x + v.y * v.y + v.z * v.z); + } + else + { + v.x /= 32; v.y /= 32; v.z /= 32; + return TPE_sqrt(v.x * v.x + v.y * v.y + v.z * v.z) * 32; + } +#undef ANTI_OVERFLOW +} + +TPE_Unit TPE_vec3LenApprox(TPE_Vec3 v) +{ + // 48 sided polyhedron approximation + + if (v.x < 0) v.x *= -1; + if (v.y < 0) v.y *= -1; + if (v.z < 0) v.z *= -1; + + if (v.x < v.y) // order the coordinates + { + if (v.x < v.z) + { + if (v.y < v.z) + { // v.x < v.y < v.z + int32_t t = v.x; v.x = v.z; v.z = t; + } + else + { // v.x < v.z < v.y + int32_t t = v.x; v.x = v.y; v.y = t; + t = v.z; v.z = v.y; v.y = t; + } + } + else + { // v.z < v.x < v.y + int32_t t = v.x; v.x = v.y; v.y = t; + } + } + else + { + if (v.y < v.z) + { + if (v.x < v.z) + { // v.y < v.x < v.z + int32_t t = v.y; v.y = v.z; v.z = t; + t = v.x; v.x = v.y; v.y = t; + } + else + { // v.y < v.z < v.x + int32_t t = v.y; v.y = v.z; v.z = t; + } + } + } + + return (893 * v.x + 446 * v.y + 223 * v.z) / 1024; +} + +TPE_Unit TPE_dist(TPE_Vec3 p1, TPE_Vec3 p2) +{ + p1 = TPE_vec3Minus(p1,p2); + return TPE_vec3Len(p1); +} + +TPE_Unit TPE_distApprox(TPE_Vec3 p1, TPE_Vec3 p2) +{ + p1 = TPE_vec3Minus(p1,p2); + return TPE_vec3LenApprox(p1); +} + +void TPE_bodyInit(TPE_Body *body, + TPE_Joint *joints, uint8_t jointCount, + TPE_Connection *connections, uint8_t connectionCount, + TPE_Unit mass) +{ + body->joints = joints; + body->jointCount = jointCount; + body->connections = connections; + body->connectionCount = connectionCount; + body->deactivateCount = 0; + body->friction = TPE_F / 2; + body->elasticity = TPE_F / 2; + body->flags = 0; + body->jointMass = TPE_nonZero(mass / jointCount); + + for (uint32_t i = 0; i < connectionCount; ++i) + { + TPE_Unit d = TPE_DISTANCE( + joints[connections[i].joint1].position, + joints[connections[i].joint2].position); + + if (d > 0xffff) + { + TPE_LOG("WARNING: joint distance too long in TPE_bodyInit"); + } + + connections[i].length = d != 0 ? d : 1; // prevent later division by zero + } +} + +void TPE_worldInit(TPE_World *world, TPE_Body *bodies, uint16_t bodyCount, + TPE_ClosestPointFunction environmentFunction) +{ + world->bodies = bodies; + world->bodyCount = bodyCount; + world->environmentFunction = environmentFunction; + world->collisionCallback = 0; +} + +#define C(n,a,b) connections[n].joint1 = a; connections[n].joint2 = b; + +void TPE_make2Line(TPE_Joint joints[2], TPE_Connection connections[1], + TPE_Unit length, TPE_Unit jointSize) +{ + joints[0] = TPE_joint(TPE_vec3(length / 2,0,0),jointSize); + joints[1] = TPE_joint(TPE_vec3(length / -2,0,0),jointSize); + C(0, 0,1) +} + +void TPE_makeRect(TPE_Joint joints[4], TPE_Connection connections[6], + TPE_Unit width, TPE_Unit depth, TPE_Unit jointSize) +{ + width /= 2; + depth /= 2; + + for (uint8_t i = 0; i < 4; ++i) + joints[i] = TPE_joint(TPE_vec3((i % 2) ? -1 * width : width, + 0,(i / 2) ? - 1 * depth : depth),jointSize); + + C(0, 0,1) C(1, 0,2) C (2, 3,1) C(3, 3,2) + C(4, 0,3) C(5, 1,2) +} + +void TPE_makeCenterRect(TPE_Joint joints[5], TPE_Connection connections[8], + TPE_Unit width, TPE_Unit depth, TPE_Unit jointSize) +{ + TPE_makeRect(joints,connections,width,depth,jointSize); + + joints[4] = TPE_joint(TPE_vec3(0,0,0),jointSize); + + C(6, 0,4) C(7, 3,4) +} + +void TPE_makeCenterRectFull(TPE_Joint joints[5], TPE_Connection connections[10], + TPE_Unit width, TPE_Unit depth, TPE_Unit jointSize) +{ + TPE_makeCenterRect(joints,connections,width,depth,jointSize); + C(8, 1,4) C(9, 2,4) +} + +void TPE_makeTriangle(TPE_Joint joints[3], TPE_Connection connections[3], + TPE_Unit sideLength, TPE_Unit jointSize) +{ + joints[0] = TPE_joint(TPE_vec3(sideLength / 2,0, + TPE_sqrt((sideLength * sideLength) / 2) / 2),jointSize); + + joints[1] = joints[0]; + joints[1].position.x *= -1; + joints[2] = TPE_joint(TPE_vec3(0,0,-1 * joints[0].position.z),jointSize); + + C(0, 0,1) C(1, 1,2) C(2, 2,0) +} + +void TPE_makeBox(TPE_Joint joints[8], TPE_Connection connections[16], + TPE_Unit width, TPE_Unit depth, TPE_Unit height, TPE_Unit jointSize) +{ + width /= 2; + depth /= 2; + height /= 2; + + for (uint8_t i = 0; i < 8; ++i) + joints[i] = TPE_joint( + TPE_vec3( + (i % 2) ? width : (-1 * width), + ((i >> 2) % 2) ? height : (-1 * height), + ((i >> 1) % 2) ? depth : (-1 * depth)), + jointSize); + + C(0, 0,1) C(1, 1,3) C(2, 3,2) C(3, 2,0) // top + C(4, 4,5) C(5, 5,7) C(6, 7,6) C(7, 6,4) // bottom + C(8, 0,4) C(9, 1,5) C(10,3,7) C(11,2,6) // middle + C(12,0,7) C(13,1,6) C(14,2,5) C(15,3,4) // diagonal +} + +void TPE_makeCenterBox(TPE_Joint joints[9], TPE_Connection connections[18], + TPE_Unit width, TPE_Unit depth, TPE_Unit height, TPE_Unit jointSize) +{ + TPE_makeBox(joints,connections,width,depth,height,jointSize); + + joints[8] = TPE_joint(TPE_vec3(0,0,0),jointSize); + + C(16, 0,8) C(17, 7,8) +} + +#undef C + +void TPE_bodyDeactivate(TPE_Body *body) +{ + body->flags |= TPE_BODY_FLAG_DEACTIVATED; +} + +void TPE_worldStep(TPE_World *world) +{ + _TPE_collisionCallback = world->collisionCallback; + + for (uint16_t i = 0; i < world->bodyCount; ++i) + { + TPE_Body *body = world->bodies + i; + + if (body->flags & (TPE_BODY_FLAG_DEACTIVATED | TPE_BODY_FLAG_DISABLED)) + continue; + + TPE_Joint *joint = body->joints, *joint2; + + TPE_Vec3 origPos = body->joints[0].position; + + for (uint16_t j = 0; j < body->jointCount; ++j) // apply velocities + { + // non-rotating bodies will copy the 1st joint's velocity + + if (body->flags & TPE_BODY_FLAG_NONROTATING) + for (uint8_t k = 0; k < 3; ++k) + joint->velocity[k] = body->joints[0].velocity[k]; + + joint->position.x += joint->velocity[0]; + joint->position.y += joint->velocity[1]; + joint->position.z += joint->velocity[2]; + + joint++; + } + + TPE_Connection *connection = body->connections; + + TPE_Vec3 aabbMin, aabbMax; + + TPE_bodyGetAABB(body,&aabbMin,&aabbMax); + + _TPE_body1Index = i; + + _TPE_body2Index = _TPE_body1Index; + + uint8_t collided = + TPE_bodyEnvironmentResolveCollision(body,world->environmentFunction); + + if (body->flags & TPE_BODY_FLAG_NONROTATING) + { + /* Non-rotating bodies may end up still colliding after environment coll + resolvement (unlike rotating bodies where each joint is ensured separately + to not collide). So if still in collision, we try a few more times. If not + successful, we simply undo any shifts we've done. This should absolutely + prevent any body escaping out of environment bounds. */ + + for (uint8_t i = 0; i < TPE_NONROTATING_COLLISION_RESOLVE_ATTEMPTS; ++i) + { + if (!collided) + break; + + collided = + TPE_bodyEnvironmentResolveCollision(body,world->environmentFunction); + } + + if (collided && + TPE_bodyEnvironmentCollide(body,world->environmentFunction)) + TPE_bodyMoveBy(body,TPE_vec3Minus(origPos,body->joints[0].position)); + } + else // normal, rotating bodies + { + TPE_Unit bodyTension = 0; + + for (uint16_t j = 0; j < body->connectionCount; ++j) // joint tension + { + joint = &(body->joints[connection->joint1]); + joint2 = &(body->joints[connection->joint2]); + + TPE_Vec3 dir = TPE_vec3Minus(joint2->position,joint->position); + + TPE_Unit tension = TPE_connectionTension(TPE_LENGTH(dir), + connection->length); + + bodyTension += tension > 0 ? tension : -tension; + + if (tension > TPE_TENSION_ACCELERATION_THRESHOLD || + tension < -1 * TPE_TENSION_ACCELERATION_THRESHOLD) + { + TPE_vec3Normalize(&dir); + + if (tension > TPE_TENSION_GREATER_ACCELERATION_THRESHOLD || + tension < -1 * TPE_TENSION_GREATER_ACCELERATION_THRESHOLD) + { + /* apply twice the acceleration after a second threshold, not so + elegant but seems to work :) */ + dir.x *= 2; + dir.y *= 2; + dir.z *= 2; + } + + dir.x /= TPE_TENSION_ACCELERATION_DIVIDER; + dir.y /= TPE_TENSION_ACCELERATION_DIVIDER; + dir.z /= TPE_TENSION_ACCELERATION_DIVIDER; + + if (tension < 0) + { + dir.x *= -1; + dir.y *= -1; + dir.z *= -1; + } + + joint->velocity[0] += dir.x; + joint->velocity[1] += dir.y; + joint->velocity[2] += dir.z; + + joint2->velocity[0] -= dir.x; + joint2->velocity[1] -= dir.y; + joint2->velocity[2] -= dir.z; + } + + connection++; + } + + if (body->connectionCount > 0) + { + uint8_t hard = !(body->flags & TPE_BODY_FLAG_SOFT); + + if (hard) + { + TPE_bodyReshape(body,world->environmentFunction); + + bodyTension /= body->connectionCount; + + if (bodyTension > TPE_RESHAPE_TENSION_LIMIT) + for (uint8_t k = 0; k < TPE_RESHAPE_ITERATIONS; ++k) + TPE_bodyReshape(body,world->environmentFunction); + } + + if (!(body->flags & TPE_BODY_FLAG_SIMPLE_CONN)) + TPE_bodyCancelOutVelocities(body,hard); + } + } + + for (uint16_t j = 0; j < world->bodyCount; ++j) + { + if (j > i || (world->bodies[j].flags & TPE_BODY_FLAG_DEACTIVATED)) + { + // firstly quick-check collision of body AA bounding boxes + + TPE_Vec3 aabbMin2, aabbMax2; + TPE_bodyGetAABB(&world->bodies[j],&aabbMin2,&aabbMax2); + + _TPE_body2Index = j; + + if (TPE_checkOverlapAABB(aabbMin,aabbMax,aabbMin2,aabbMax2) && + TPE_bodiesResolveCollision(body,world->bodies + j, + world->environmentFunction)) + { + TPE_bodyActivate(body); + body->deactivateCount = TPE_LIGHT_DEACTIVATION; + + TPE_bodyActivate(world->bodies + j); + world->bodies[j].deactivateCount = TPE_LIGHT_DEACTIVATION; + } + } + } + + if (!(body->flags & TPE_BODY_FLAG_ALWAYS_ACTIVE)) + { + if (body->deactivateCount >= TPE_DEACTIVATE_AFTER) + { + TPE_bodyStop(body); + body->deactivateCount = 0; + body->flags |= TPE_BODY_FLAG_DEACTIVATED; + } + else if (TPE_bodyGetAverageSpeed(body) <= TPE_LOW_SPEED) + body->deactivateCount++; + else + body->deactivateCount = 0; + } + } +} + +void TPE_bodyActivate(TPE_Body *body) +{ + // the if check has to be here, don't remove it + + if (body->flags & TPE_BODY_FLAG_DEACTIVATED) + { + TPE_bodyStop(body); + body->flags &= ~TPE_BODY_FLAG_DEACTIVATED; + body->deactivateCount = 0; + } +} + +TPE_Unit TPE_bodyGetNetSpeed(const TPE_Body *body) +{ +#if TPE_APPROXIMATE_NET_SPEED + TPE_Vec3 netV = TPE_vec3(0,0,0); + + const TPE_Joint *joint = body->joints; + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + netV.x += TPE_abs(joint->velocity[0]); + netV.y += TPE_abs(joint->velocity[1]); + netV.z += TPE_abs(joint->velocity[2]); + + joint++; + } + + return TPE_vec3LenApprox(netV); +#else + TPE_Unit velocity = 0; + + const TPE_Joint *joint = body->joints; + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + velocity += TPE_LENGTH( + TPE_vec3(joint->velocity[0],joint->velocity[1],joint->velocity[2])); + + joint++; + } + + return velocity; +#endif +} + +TPE_Unit TPE_bodyGetAverageSpeed(const TPE_Body *body) +{ + return TPE_bodyGetNetSpeed(body) / body->jointCount; +} + +void TPE_bodyMultiplyNetSpeed(TPE_Body *body, TPE_Unit factor) +{ + TPE_Joint *joint = body->joints; + + for (uint16_t j = 0; j < body->jointCount; ++j) + { + for (uint8_t k = 0; k < 3; ++k) + joint->velocity[k] = + (((TPE_Unit) joint->velocity[k]) * factor) / + TPE_F; + + joint++; + } +} + +void TPE_bodyLimitAverageSpeed(TPE_Body *body, TPE_Unit speedMin, + TPE_Unit speedMax) +{ + for (uint8_t i = 0; i < 16; ++i) + { + TPE_Unit speed = TPE_bodyGetAverageSpeed(body); + + if (speed >= speedMin && speed <= speedMax) + return; + + TPE_Unit fraction = + (((speedMax + speedMin) / 2) * TPE_F) / + TPE_nonZero(speed); + + TPE_bodyMultiplyNetSpeed(body,fraction); + } +} + +void TPE_bodyCancelOutVelocities(TPE_Body *body, uint8_t strong) +{ + for (uint16_t i = 0; i < body->connectionCount; ++i) + { + TPE_Connection *c = &body->connections[i]; + + TPE_Joint *j1 = &(body->joints[c->joint1]); + TPE_Joint *j2 = &(body->joints[c->joint2]); + + TPE_Vec3 dir = TPE_vec3Minus(j2->position,j1->position); + + TPE_Unit len = TPE_nonZero(TPE_LENGTH(dir)); + + uint8_t cancel = 1; + + if (strong) + { + TPE_Unit tension = TPE_connectionTension(len,c->length); + + cancel = tension <= TPE_TENSION_ACCELERATION_THRESHOLD && + tension >= -1 * TPE_TENSION_ACCELERATION_THRESHOLD; + } + + if (cancel) + { + TPE_Vec3 + v1 = TPE_vec3(j1->velocity[0],j1->velocity[1],j1->velocity[2]), + v2 = TPE_vec3(j2->velocity[0],j2->velocity[1],j2->velocity[2]); + + dir.x = (dir.x * TPE_F) / len; // normalize + dir.y = (dir.y * TPE_F) / len; + dir.z = (dir.z * TPE_F) / len; + + v1 = TPE_vec3ProjectNormalized(v1,dir); + v2 = TPE_vec3ProjectNormalized(v2,dir); + + TPE_Vec3 avg = TPE_vec3Plus(v1,v2); + + avg.x /= 2; + avg.y /= 2; + avg.z /= 2; + + if (strong) + { + j1->velocity[0] = j1->velocity[0] - v1.x + avg.x; + j1->velocity[1] = j1->velocity[1] - v1.y + avg.y; + j1->velocity[2] = j1->velocity[2] - v1.z + avg.z; + + j2->velocity[0] = j2->velocity[0] - v2.x + avg.x; + j2->velocity[1] = j2->velocity[1] - v2.y + avg.y; + j2->velocity[2] = j2->velocity[2] - v2.z + avg.z; + } + else + { + j1->velocity[0] = j1->velocity[0] - v1.x + (v1.x * 3 + avg.x) / 4; + j1->velocity[1] = j1->velocity[1] - v1.y + (v1.y * 3 + avg.y) / 4; + j1->velocity[2] = j1->velocity[2] - v1.z + (v1.z * 3 + avg.z) / 4; + + j2->velocity[0] = j2->velocity[0] - v2.x + (v2.x * 3 + avg.x) / 4; + j2->velocity[1] = j2->velocity[1] - v2.y + (v2.y * 3 + avg.y) / 4; + j2->velocity[2] = j2->velocity[2] - v2.z + (v2.z * 3 + avg.z) / 4; + } + } + } +} + +void TPE_bodyReshape(TPE_Body *body, + TPE_ClosestPointFunction environmentFunction) +{ + for (uint16_t i = 0; i < body->connectionCount; ++i) + { + TPE_Connection *c = &body->connections[i]; + + TPE_Joint *j1 = &(body->joints[c->joint1]); + TPE_Joint *j2 = &(body->joints[c->joint2]); + + TPE_Vec3 dir = TPE_vec3Minus(j2->position,j1->position); + + TPE_Vec3 middle = TPE_vec3Plus(j1->position,j2->position); + + middle.x /= 2; + middle.y /= 2; + middle.z /= 2; + + TPE_vec3Normalize(&dir); + + dir.x = (dir.x * c->length) / TPE_F; + dir.y = (dir.y * c->length) / TPE_F; + dir.z = (dir.z * c->length) / TPE_F; + + TPE_Vec3 positionBackup = j1->position; + + j1->position.x = middle.x - dir.x / 2; + j1->position.y = middle.y - dir.y / 2; + j1->position.z = middle.z - dir.z / 2; + + if (environmentFunction != 0 && TPE_LENGTH(TPE_vec3Minus(j1->position, + environmentFunction(j1->position,TPE_JOINT_SIZE(*j1)))) + < TPE_JOINT_SIZE(*j1)) + j1->position = positionBackup; + + positionBackup = j2->position; + + j2->position.x = j1->position.x + dir.x; + j2->position.y = j1->position.y + dir.y; + j2->position.z = j1->position.z + dir.z; + + if (environmentFunction != 0 && TPE_LENGTH(TPE_vec3Minus(j2->position, + environmentFunction(j2->position,TPE_JOINT_SIZE(*j2)))) + < TPE_JOINT_SIZE(*j2)) + j2->position = positionBackup; + } +} + +TPE_Vec3 TPE_vec3Plus(TPE_Vec3 v1, TPE_Vec3 v2) +{ + v1.x += v2.x; + v1.y += v2.y; + v1.z += v2.z; + + return v1; +} + +TPE_Vec3 TPE_vec3Minus(TPE_Vec3 v1, TPE_Vec3 v2) +{ + v1.x -= v2.x; + v1.y -= v2.y; + v1.z -= v2.z; + + return v1; +} + +void TPE_vec3Normalize(TPE_Vec3 *v) +{ + TPE_Unit l = TPE_LENGTH(*v); + + if (l == 0) + *v = TPE_vec3(TPE_F,0,0); + else + { + if (l < 16) // too short vec would cause inacurracte normalization + { + v->x *= 8; + v->y *= 8; + v->z *= 8; + l = TPE_LENGTH(*v); + } + + v->x = (v->x * TPE_F) / l; + v->y = (v->y * TPE_F) / l; + v->z = (v->z * TPE_F) / l; + } +} + +TPE_Vec3 TPE_bodyGetRotation(const TPE_Body *body, uint16_t joint1, + uint16_t joint2, uint16_t joint3) +{ + return TPE_rotationFromVecs( + TPE_vec3Minus( + body->joints[joint2].position, + body->joints[joint1].position), + TPE_vec3Minus( + body->joints[joint3].position, + body->joints[joint1].position)); +} + +TPE_Vec3 TPE_bodyGetCenterOfMass(const TPE_Body *body) +{ + // note that joint sizes don't play a role as all weight the same + + TPE_Vec3 result = TPE_vec3(0,0,0); + + const TPE_Joint *j = body->joints; + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + result = TPE_vec3Plus(result,j->position); + j++; + } + + result.x /= body->jointCount; + result.y /= body->jointCount; + result.z /= body->jointCount; + + return result; +} + +void TPE_bodySpinWithCenter(TPE_Body *body, TPE_Vec3 rotation, TPE_Vec3 center) +{ + for (uint16_t i = 0; i < body->jointCount; ++i) + { + TPE_Joint *j = body->joints + i; + + TPE_Vec3 toPoint = TPE_vec3Minus(j->position,center); + + toPoint = TPE_vec3Project(toPoint,rotation); + toPoint = TPE_vec3Plus(center,toPoint); + toPoint = TPE_vec3Minus(j->position,toPoint); + toPoint = TPE_vec3Cross(toPoint,rotation); + + j->velocity[0] += toPoint.x; + j->velocity[1] += toPoint.y; + j->velocity[2] += toPoint.z; + } +} + +void TPE_bodySpin(TPE_Body *body, TPE_Vec3 rotation) +{ + TPE_bodySpinWithCenter(body,rotation,TPE_bodyGetCenterOfMass(body)); +} + +TPE_Vec3 _TPE_rotateByAxis(TPE_Vec3 p, TPE_Vec3 axisNormalized, TPE_Unit angle) +{ + TPE_Vec3 projected = TPE_vec3ProjectNormalized(p,axisNormalized); + + TPE_Vec3 a = TPE_vec3Minus(p,projected); + + if (a.x == 0 && a.y == 0 && a.z == 0) + return p; + + TPE_Vec3 b = TPE_vec3Cross(a,axisNormalized); + + return TPE_vec3Plus(projected,TPE_vec3Plus( + TPE_vec3Times(a,TPE_cos(angle)), + TPE_vec3Times(b,TPE_sin(angle)))); +} + +void TPE_bodyRotateByAxis(TPE_Body *body, TPE_Vec3 rotation) +{ + TPE_Vec3 bodyCenter = TPE_bodyGetCenterOfMass(body); + TPE_Unit angle = TPE_LENGTH(rotation); + + TPE_vec3Normalize(&rotation); + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + TPE_Vec3 toPoint = TPE_vec3Minus(body->joints[i].position,bodyCenter); + body->joints[i].position = TPE_vec3Plus(bodyCenter, + _TPE_rotateByAxis(toPoint,rotation,angle)); + } +} + +TPE_Vec3 TPE_vec3Cross(TPE_Vec3 v1, TPE_Vec3 v2) +{ + TPE_Vec3 r; + + r.x = (v1.y * v2.z - v1.z * v2.y) / TPE_F; + r.y = (v1.z * v2.x - v1.x * v2.z) / TPE_F; + r.z = (v1.x * v2.y - v1.y * v2.x) / TPE_F; + + return r; +} + +TPE_Vec3 TPE_vec3ProjectNormalized(TPE_Vec3 v, TPE_Vec3 baseNormalized) +{ + TPE_Vec3 r; + + TPE_Unit p = TPE_vec3Dot(v,baseNormalized); + + r.x = (p * baseNormalized.x) / TPE_F; + r.y = (p * baseNormalized.y) / TPE_F; + r.z = (p * baseNormalized.z) / TPE_F; + + return r; +} + +TPE_Vec3 TPE_vec3Project(TPE_Vec3 v, TPE_Vec3 base) +{ + TPE_vec3Normalize(&base); + + return TPE_vec3ProjectNormalized(v,base); +} + +void TPE_bodyMoveBy(TPE_Body *body, TPE_Vec3 offset) +{ + for (uint16_t i = 0; i < body->jointCount; ++i) + body->joints[i].position = TPE_vec3Plus(body->joints[i].position, + offset); +} + +void TPE_bodyApplyGravity(TPE_Body *body, TPE_Unit downwardsAccel) +{ + if ((body->flags & TPE_BODY_FLAG_DEACTIVATED) || + (body->flags & TPE_BODY_FLAG_DISABLED)) + return; + + for (uint16_t i = 0; i < body->jointCount; ++i) + body->joints[i].velocity[1] -= downwardsAccel; +} + +void TPE_bodyAccelerate(TPE_Body *body, TPE_Vec3 velocity) +{ + TPE_bodyActivate(body); + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + body->joints[i].velocity[0] += velocity.x; + body->joints[i].velocity[1] += velocity.y; + body->joints[i].velocity[2] += velocity.z; + } +} + +void TPE_bodyStop(TPE_Body *body) +{ + for (uint16_t i = 0; i < body->jointCount; ++i) + { + body->joints[i].velocity[0] = 0; + body->joints[i].velocity[1] = 0; + body->joints[i].velocity[2] = 0; + } +} + +void _TPE_bodyNonrotatingJointCollided(TPE_Body *b, int16_t jointIndex, + TPE_Vec3 origPos, uint8_t success) +{ + origPos = TPE_vec3Minus(b->joints[jointIndex].position,origPos); + + for (uint16_t i = 0; i < b->jointCount; ++i) + if (i != jointIndex) + { + b->joints[i].position = TPE_vec3Plus(b->joints[i].position,origPos); + + if (success) + for (uint8_t j = 0; j < 3; ++j) + b->joints[i].velocity[j] = b->joints[jointIndex].velocity[j]; + } +} + +TPE_Unit TPE_vec3Dot(TPE_Vec3 v1, TPE_Vec3 v2) +{ + return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z) / TPE_F; +} + +TPE_Unit TPE_cos(TPE_Unit x) +{ + return TPE_sin(x + TPE_F / 4); +} + +TPE_Unit TPE_sin(TPE_Unit x) +{ + int8_t sign = 1; + + if (x < 0) // odd function + { + x *= -1; + sign = -1; + } + + x %= TPE_F; + + if (x > TPE_F / 2) + { + x -= TPE_F / 2; + sign *= -1; + } + + TPE_Unit tmp = TPE_F - 2 * x; + + #define _PI2 5053 // 9.8696044 * TPE_F + return sign * // Bhaskara's approximation + (((32 * x * _PI2) / TPE_F) * tmp) / + ((_PI2 * (5 * TPE_F - (8 * x * tmp) / + TPE_F)) / TPE_F); + #undef _PI2 +} + +uint8_t TPE_bodiesResolveCollision(TPE_Body *b1, TPE_Body *b2, + TPE_ClosestPointFunction env) +{ + uint8_t r = 0; + + for (uint16_t i = 0; i < b1->jointCount; ++i) + for (uint16_t j = 0; j < b2->jointCount; ++j) + { + TPE_Vec3 origPos2 = b2->joints[j].position; + TPE_Vec3 origPos1 = b1->joints[i].position; + + _TPE_joint1Index = i; + _TPE_joint2Index = j; + + if (TPE_jointsResolveCollision(&(b1->joints[i]),&(b2->joints[j]), + b1->jointMass,b2->jointMass,(b1->elasticity + b2->elasticity) / 2, + (b1->friction + b2->friction) / 2,env)) + { + r = 1; + + if (b1->flags & TPE_BODY_FLAG_NONROTATING) + _TPE_bodyNonrotatingJointCollided(b1,i,origPos1,1); + + if (b2->flags & TPE_BODY_FLAG_NONROTATING) + _TPE_bodyNonrotatingJointCollided(b2,j,origPos2,1); + } + } + + return r; +} + +uint8_t TPE_jointsResolveCollision(TPE_Joint *j1, TPE_Joint *j2, + TPE_Unit mass1, TPE_Unit mass2, TPE_Unit elasticity, TPE_Unit friction, + TPE_ClosestPointFunction env) +{ + TPE_Vec3 dir = TPE_vec3Minus(j2->position,j1->position); + + TPE_Unit d = TPE_LENGTH(dir) - TPE_JOINT_SIZE(*j1) - TPE_JOINT_SIZE(*j2); + + if (d < 0) // collision? + { + if (_TPE_collisionCallback != 0 && !_TPE_collisionCallback( + _TPE_body1Index,_TPE_joint1Index,_TPE_body2Index,_TPE_joint2Index, + TPE_vec3Plus(j1->position,dir))) + return 0; + + TPE_Vec3 + pos1Backup = j1->position, + pos2Backup = j2->position; + + // separate joints, the shift distance will depend on the weight ratio: + + d = -1 * d + TPE_COLLISION_RESOLUTION_MARGIN; + + TPE_vec3Normalize(&dir); + + TPE_Unit ratio = (mass2 * TPE_F) / + TPE_nonZero(mass1 + mass2); + + TPE_Unit shiftDistance = (ratio * d) / TPE_F; + + TPE_Vec3 shift = TPE_vec3Times(dir,shiftDistance); + + j1->position = TPE_vec3Minus(j1->position,shift); + + shiftDistance = d - shiftDistance; + + shift = TPE_vec3Times(dir,shiftDistance); + + j2->position = TPE_vec3Plus(j2->position,shift); + + // compute new velocities: + + TPE_Unit v1, v2; + + TPE_Vec3 vel = TPE_vec3(j1->velocity[0],j1->velocity[1],j1->velocity[2]); + + vel = TPE_vec3Project(vel,dir); + + j1->velocity[0] = j1->velocity[0] - vel.x; + j1->velocity[1] = j1->velocity[1] - vel.y; + j1->velocity[2] = j1->velocity[2] - vel.z; + + /* friction explanation: Not physically correct (doesn't depend on load), + friction basically means we weighted average the velocities of the bodies + in the direction perpendicular to the hit normal, in the ratio of their + masses, friction coefficient just says how much of this effect we apply + (it multiplies the friction vectors we are subtracting) */ + + TPE_Vec3 frictionVec = + TPE_vec3(j1->velocity[0],j1->velocity[1],j1->velocity[2]); + + v1 = TPE_vec3Dot(vel,dir); + vel = TPE_vec3(j2->velocity[0],j2->velocity[1],j2->velocity[2]); + vel = TPE_vec3Project(vel,dir); + + j2->velocity[0] = j2->velocity[0] - vel.x; + j2->velocity[1] = j2->velocity[1] - vel.y; + j2->velocity[2] = j2->velocity[2] - vel.z; + + frictionVec = TPE_vec3Minus( + TPE_vec3(j2->velocity[0],j2->velocity[1],j2->velocity[2]), + frictionVec); + + v2 = TPE_vec3Dot(vel,dir); + + TPE_getVelocitiesAfterCollision(&v1,&v2,mass1,mass2,elasticity); + + vel = TPE_vec3Times(dir,v1); + +#define assignVec(j,i,d,o) \ + j->velocity[i] = j->velocity[i] + vel.d o (((frictionVec.d * ratio) / \ + TPE_F) * friction) / TPE_F; + + assignVec(j1,0,x,+) + assignVec(j1,1,y,+) + assignVec(j1,2,z,+) + + vel = TPE_vec3Times(dir,v2); + + ratio = TPE_F - ratio; + + assignVec(j2,0,x,-) + assignVec(j2,1,y,-) + assignVec(j2,2,z,-) + +#undef assignVec + + if (env != 0) + { + // ensure the joints aren't colliding with environment + + if (TPE_jointEnvironmentResolveCollision(j1,elasticity,friction,env) == 2) + j1->position = pos1Backup; + + if (TPE_jointEnvironmentResolveCollision(j2,elasticity,friction,env) == 2) + j2->position = pos2Backup; + } + + return 1; + } + + return 0; +} + +TPE_Vec3 TPE_vec3Times(TPE_Vec3 v, TPE_Unit units) +{ + v.x = (v.x * units) / TPE_F; + v.y = (v.y * units) / TPE_F; + v.z = (v.z * units) / TPE_F; + + return v; +} + +TPE_Vec3 TPE_vec3TimesPlain(TPE_Vec3 v, TPE_Unit q) +{ + v.x *= q; + v.y *= q; + v.z *= q; + + return v; +} + +void TPE_getVelocitiesAfterCollision(TPE_Unit *v1, TPE_Unit *v2, + TPE_Unit m1, TPE_Unit m2, TPE_Unit elasticity) +{ + /* In the following a lot of TPE_F cancel out, feel free to + check if confused. */ + + TPE_Unit m1Pm2 = TPE_nonZero(m1 + m2); + TPE_Unit v2Mv1 = TPE_nonZero(*v2 - *v1); + + TPE_Unit m1v1Pm2v2 = ((m1 * *v1) + (m2 * *v2)); + + *v1 = (((elasticity * m2 / TPE_F) * v2Mv1) + + m1v1Pm2v2) / m1Pm2; + + *v2 = (((elasticity * m1 / TPE_F) * -1 * v2Mv1) + + m1v1Pm2v2) / m1Pm2; +} + +uint8_t TPE_jointEnvironmentResolveCollision(TPE_Joint *joint, + TPE_Unit elasticity, TPE_Unit friction, TPE_ClosestPointFunction env) +{ + TPE_Vec3 toJoint = + TPE_vec3Minus(joint->position,env(joint->position,TPE_JOINT_SIZE(*joint))); + + TPE_Unit len = TPE_LENGTH(toJoint); + + if (len <= TPE_JOINT_SIZE(*joint)) + { + if (_TPE_collisionCallback != 0) + if (!_TPE_collisionCallback(_TPE_body1Index, + _TPE_joint1Index,_TPE_body2Index,_TPE_joint2Index, + TPE_vec3Minus(joint->position,toJoint))) + return 0; + + // colliding + + TPE_Vec3 positionBackup = joint->position, shift; + uint8_t success = 0; + + if (len > 0) + { + /* Joint center is still outside the geometry so we can determine the + normal and use it to shift it outside. This can still leave the joint + colliding though, so try to repeat it a few times. */ + + for (int i = 0; i < TPE_COLLISION_RESOLUTION_ITERATIONS; ++i) + { + shift = toJoint; + + TPE_vec3Normalize(&shift); + + shift = TPE_vec3Times(shift,TPE_JOINT_SIZE(*joint) - len + + TPE_COLLISION_RESOLUTION_MARGIN); + + joint->position = TPE_vec3Plus(joint->position,shift); + + toJoint = TPE_vec3Minus(joint->position,env(joint->position, + TPE_JOINT_SIZE(*joint))); + + len = TPE_LENGTH(toJoint); // still colliding? + + if (len >= TPE_JOINT_SIZE(*joint)) + { + success = 1; + break; + } + } + } + + if (!success) + { + /* Shifting along normal was unsuccessfull, now try different approach: + shift back by joint velocity. */ + + shift = TPE_vec3(-1 * joint->velocity[0],-1 * joint->velocity[1], + -1 * joint->velocity[2]); + + for (int i = 0; i < TPE_COLLISION_RESOLUTION_ITERATIONS; ++i) + { + joint->position = TPE_vec3Plus(joint->position,shift); + + toJoint = TPE_vec3Minus(joint->position, + env(joint->position,TPE_JOINT_SIZE(*joint))); + + len = TPE_LENGTH(toJoint); // still colliding? + + if (len >= TPE_JOINT_SIZE(*joint)) + { + success = 1; + break; + } + + shift.x /= 2; // decrease the step a bit + shift.y /= 2; + shift.z /= 2; + } + } + + if (success) + { + TPE_Vec3 vel = TPE_vec3(joint->velocity[0],joint->velocity[1], + joint->velocity[2]); + + vel = TPE_vec3Project(vel,shift); // parallel part of velocity + + TPE_Vec3 vel2 = TPE_vec3Minus( // perpendicular part of velocity + TPE_vec3(joint->velocity[0],joint->velocity[1],joint->velocity[2]),vel); + + vel2 = TPE_vec3Times(vel2,friction); + + vel = TPE_vec3Times(vel,TPE_F + elasticity); + + joint->velocity[0] -= vel.x + vel2.x; + joint->velocity[1] -= vel.y + vel2.y; + joint->velocity[2] -= vel.z + vel2.z; + } + else + { + TPE_LOG("WARNING: joint-environment collision couldn't be resolved"); + + joint->position = positionBackup; + joint->velocity[0] = 0; + joint->velocity[1] = 0; + joint->velocity[2] = 0; + + return 2; + } + + return 1; + } + + return 0; +} + +uint8_t TPE_bodyEnvironmentCollide(const TPE_Body *body, + TPE_ClosestPointFunction env) +{ + for (uint16_t i = 0; i < body->jointCount; ++i) + { + const TPE_Joint *joint = body->joints + i; + + TPE_Unit size = TPE_JOINT_SIZE(*joint); + + if (TPE_DISTANCE(joint->position,env(joint->position,size)) <= size) + return 1; + } + + return 0; +} + +void TPE_bodyGetFastBSphere(const TPE_Body *body, TPE_Vec3 *center, + TPE_Unit *radius) +{ + TPE_Vec3 b; + + TPE_bodyGetAABB(body,center,&b); + + center->x = (center->x + b.x) / 2; + center->y = (center->y + b.y) / 2; + center->z = (center->z + b.z) / 2; + + *radius = TPE_DISTANCE(*center,b); +} + +void TPE_bodyGetBSphere(const TPE_Body *body, TPE_Vec3 *center, + TPE_Unit *radius) +{ + *radius = TPE_INFINITY; + *center = TPE_bodyGetCenterOfMass(body); + + const TPE_Joint *j = body->joints; + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + TPE_Vec3 diff; + + TPE_Unit js = TPE_JOINT_SIZE(*j); + + /* Sadly we have to have these conditions here which slow this down. If we + were only computing a BB sphere of a point cloud, we wouldn't have to + compute abs vals (as squaring would effectively compute them), but here + we need to add joint size which needs to know about the sign. */ + + diff.x = ((center->x > j->position.x) ? + (center->x - j->position.x) : (j->position.x - center->x)) + js; + + diff.y = ((center->y > j->position.y) ? + (center->y - j->position.y) : (j->position.y - center->y)) + js; + + diff.z = ((center->z > j->position.z) ? + (center->z - j->position.z) : (j->position.z - center->z)) + js; + + TPE_Unit distSquared = + diff.x * diff.x + diff.y * diff.y + diff.z * diff.z; + + if (distSquared < *radius) + *radius = distSquared; + + j++; + } + + *radius = TPE_sqrt(*radius); +} + +uint8_t TPE_bodyEnvironmentResolveCollision(TPE_Body *body, + TPE_ClosestPointFunction env) +{ + TPE_Vec3 c; + TPE_Unit d; + + TPE_bodyGetFastBSphere(body,&c,&d); + + if (TPE_DISTANCE(c,env(c,d)) > d) + return 0; + + // now test the full body collision: + + uint8_t collision = 0; + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + TPE_Vec3 previousPos = body->joints[i].position; + + _TPE_joint1Index = i; + + uint8_t r = TPE_jointEnvironmentResolveCollision( + body->joints + i,body->elasticity,body->friction,env); + + if (r) + { + collision = 1; + + if (body->flags & TPE_BODY_FLAG_NONROTATING) + _TPE_bodyNonrotatingJointCollided(body,i,previousPos,r == 1); + } + } + + return collision; +} + +TPE_Vec3 TPE_vec3Normalized(TPE_Vec3 v) +{ + TPE_vec3Normalize(&v); + return v; +} + +TPE_Unit TPE_atan(TPE_Unit x) +{ + /* atan approximation by polynomial + WARNING: this will break with different value of TPE_FRACTIONS_PER_UNIT */ + + TPE_Unit sign = 1, x2 = x * x; + + if (x < 0) + { + x *= -1; + sign = -1; + } + + if (x > 30000) // anti overflow + return sign * (TPE_F / 4); + + return sign * + (307 * x + x2) / ((267026 + 633 * x + x2) / 128); +} + +void _TPE_vec2Rotate(TPE_Unit *x, TPE_Unit *y, TPE_Unit angle) +{ + TPE_Unit tmp = *x; + + TPE_Unit s = TPE_sin(angle); + TPE_Unit c = TPE_cos(angle); + + *x = (c * *x - s * *y) / TPE_F; + *y = (s * tmp + c * *y) / TPE_F; +} + +TPE_Unit TPE_vec2Angle(TPE_Unit x, TPE_Unit y) +{ + TPE_Unit r = 0; + + if (x != 0) + { + r = TPE_atan((y * TPE_F) / x); + + if (x < 0) + r += TPE_F / 2; + else if (r < 0) + r += TPE_F; + } + else + { + if (y < 0) + r = (3 * TPE_F) / 4; + else if (y > 0) + r = TPE_F / 4; + // else (y == 0) r stays 0 + } + + return r; +} + +TPE_Vec3 TPE_rotationFromVecs(TPE_Vec3 forward, TPE_Vec3 right) +{ + TPE_Vec3 result; + + // get rotation around Y: + + result.y = TPE_vec2Angle(forward.z,-1 * forward.x); + + // now rotate back by this angle to align with x = 0 plane: + + _TPE_vec2Rotate(&forward.z,&forward.x,result.y); + _TPE_vec2Rotate(&right.z,&right.x,result.y); + + // now do the same for the second axis: + + result.x = + TPE_vec2Angle(forward.z,forward.y); + + _TPE_vec2Rotate(&right.z,&right.y,-1 * result.x); + + result.z = TPE_vec2Angle(right.x,-1 * right.y); + + return result; +} + +TPE_Vec3 _TPE_project3DPoint(TPE_Vec3 p, TPE_Vec3 camPos, TPE_Vec3 camRot, + TPE_Vec3 camView) +{ + // transform to camera space: + + p = TPE_vec3Minus(p,camPos); + + _TPE_vec2Rotate(&p.z,&p.x,camRot.y); + _TPE_vec2Rotate(&p.z,&p.y,-1 * camRot.x); + _TPE_vec2Rotate(&p.y,&p.x,-1 * camRot.z); + + if (p.z <= 0) + return p; + + if (camView.z != 0) + { + // perspective + + p.x = (p.x * camView.z) / p.z; + p.y = (p.y * camView.z) / p.z; + + p.x = camView.x / 2 + (p.x * camView.x) / (2 * TPE_F); + p.y = camView.y / 2 - (p.y * camView.x) / (2 * TPE_F); + // ^ x here intentional + } + else + { + // ortho + + p.x = camView.x / 2 + p.x; + p.y = camView.y / 2 - p.y; + } + + return p; +} + +void _TPE_drawDebugPixel( + TPE_Unit x, TPE_Unit y, TPE_Unit w, TPE_Unit h, uint8_t c, + TPE_DebugDrawFunction f) +{ + if (x >= 0 && x < w && y >= 0 && y < h) + f(x,y,c); +} + +void TPE_worldDebugDraw(TPE_World *world, TPE_DebugDrawFunction drawFunc, + TPE_Vec3 camPos, TPE_Vec3 camRot, TPE_Vec3 camView, uint16_t envGridRes, + TPE_Unit envGridSize) +{ +#define Z_LIMIT 250 + if (world->environmentFunction != 0) + { + // environment: + + TPE_Vec3 testPoint; + + TPE_Unit gridHalfSize = (envGridSize * envGridRes) / 2; + + TPE_Vec3 center; + + if (envGridRes != 0) + { + center = TPE_vec3(0,TPE_sin(camRot.x),TPE_cos(camRot.x)); + + _TPE_vec2Rotate(¢er.x,¢er.z,camRot.y); + + center = TPE_vec3Times(center,gridHalfSize); + center = TPE_vec3Plus(camPos,center); + + center.x = (center.x / envGridSize) * envGridSize; + center.y = (center.y / envGridSize) * envGridSize; + center.z = (center.z / envGridSize) * envGridSize; + } + + testPoint.y = center.y - gridHalfSize; + + for (uint8_t j = 0; j < envGridRes; ++j) + { + testPoint.x = center.x - gridHalfSize; + + for (uint8_t k = 0; k < envGridRes; ++k) + { + testPoint.z = center.z - gridHalfSize; + + for (uint8_t l = 0; l < envGridRes; ++l) + { + TPE_Vec3 r = world->environmentFunction(testPoint,envGridSize); + + if (r.x != testPoint.x || r.y != testPoint.y || r.z != testPoint.z) + { + r = _TPE_project3DPoint(r,camPos,camRot,camView); + + if (r.z > Z_LIMIT) + _TPE_drawDebugPixel(r.x,r.y,camView.x,camView.y, + TPE_DEBUG_COLOR_ENVIRONMENT,drawFunc); + } + + testPoint.z += envGridSize; + } + + testPoint.x += envGridSize; + } + + testPoint.y += envGridSize; + } + } + + for (uint16_t i = 0; i < world->bodyCount; ++i) + { + // connections: + for (uint16_t j = 0; j < world->bodies[i].connectionCount; ++j) + { + TPE_Vec3 + p1 = world->bodies[i].joints[ + world->bodies[i].connections[j].joint1].position, + p2 = world->bodies[i].joints[ + world->bodies[i].connections[j].joint2].position; + + p1 = _TPE_project3DPoint(p1,camPos,camRot,camView); + p2 = _TPE_project3DPoint(p2,camPos,camRot,camView); + + if (p1.z <= Z_LIMIT || p2.z <= Z_LIMIT) + continue; + + TPE_Vec3 diff = TPE_vec3Minus(p2,p1); + +#define SEGS 16 + + uint8_t c = (world->bodies[i].flags & TPE_BODY_FLAG_DEACTIVATED) ? + TPE_DEBUG_COLOR_INACTIVE : TPE_DEBUG_COLOR_CONNECTION; + + for (uint16_t k = 0; k < SEGS; ++k) + { + p2.x = p1.x + (diff.x * k) / SEGS; + p2.y = p1.y + (diff.y * k) / SEGS; + + _TPE_drawDebugPixel(p2.x,p2.y,camView.x,camView.y,c,drawFunc); + } +#undef SEGS + } + + // joints: + for (uint16_t j = 0; j < world->bodies[i].jointCount; ++j) + { + TPE_Vec3 p = _TPE_project3DPoint(world->bodies[i].joints[j].position, + camPos,camRot,camView); + + if (p.z > Z_LIMIT) + { + uint8_t color = (world->bodies[i].flags & TPE_BODY_FLAG_DEACTIVATED) ? + TPE_DEBUG_COLOR_INACTIVE : TPE_DEBUG_COLOR_JOINT; + + _TPE_drawDebugPixel(p.x,p.y,camView.x,camView.y,color,drawFunc); + + TPE_Unit size = TPE_JOINT_SIZE(world->bodies[i].joints[j]); + + if (camView.z != 0) // not ortho? + { + size /= 2; + size = (size * camView.x) / TPE_F; + size = (size * camView.z) / p.z; + } + +#define SEGS 4 + for (uint8_t k = 0; k < SEGS + 1; ++k) + { + TPE_Unit + dx = (TPE_sin(TPE_F * k / (8 * SEGS)) * size) + / TPE_F, + dy = (TPE_cos(TPE_F * k / (8 * SEGS)) * size) + / TPE_F; + +#define dp(a,b,c,d) \ + _TPE_drawDebugPixel(p.x a b,p.y c d,camView.x,camView.y,color,drawFunc); + dp(+,dx,+,dy) dp(+,dx,-,dy) dp(-,dx,+,dy) dp(-,dx,-,dy) + dp(+,dy,+,dx) dp(+,dy,-,dx) dp(-,dy,+,dx) dp(-,dy,-,dx) +#undef dp +#undef SEGS + } + } + } + } +#undef Z_LIMIT +} + +TPE_Vec3 TPE_envBox(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 maxCornerVec, + TPE_Vec3 rotation) +{ + point = TPE_pointRotate(TPE_vec3Minus(point,center), + TPE_rotationInverse(rotation)); + + return TPE_vec3Plus(center,TPE_pointRotate(TPE_envAABox(point,TPE_vec3(0,0,0), + maxCornerVec),rotation)); +} + +TPE_Vec3 TPE_envAABox(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 maxCornerVec) +{ + TPE_Vec3 shifted = TPE_vec3Minus(point,center); + int8_t sign[3] = {1, 1, 1}; + + if (shifted.x < 0) + { + shifted.x *= -1; + sign[0] = -1; + } + + if (shifted.y < 0) + { + shifted.y *= -1; + sign[1] = -1; + } + + if (shifted.z < 0) + { + shifted.z *= -1; + sign[2] = -1; + } + + uint8_t region = + (shifted.x > maxCornerVec.x) | + ((shifted.y > maxCornerVec.y) << 1) | + ((shifted.z > maxCornerVec.z) << 2); + + switch (region) + { +#define align(c,i) point.c = center.c + sign[i] * maxCornerVec.c + + case 0x01: align(x,0); break; + case 0x02: align(y,1); break; + case 0x04: align(z,2); break; + + case 0x03: align(x,0); align(y,1); break; + case 0x05: align(x,0); align(z,2); break; + case 0x06: align(y,1); align(z,2); break; + + case 0x07: align(x,0); align(y,1); align(z,2); break; + default: break; + +#undef align + } + + return point; +} + +TPE_Vec3 TPE_envAABoxInside(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 size) +{ + size.x /= 2; + size.y /= 2; + size.z /= 2; + + TPE_Vec3 shifted = TPE_vec3Minus(point,center); + + TPE_Vec3 a = TPE_vec3Minus(size,shifted), + b = TPE_vec3Plus(shifted,size); + + int8_t sx = 1, sy = 1, sz = 1; + + if (b.x < a.x) + { + a.x = b.x; + sx = -1; + } + + if (b.y < a.y) + { + a.y = b.y; + sy = -1; + } + + if (b.z < a.z) + { + a.z = b.z; + sz = -1; + } + + if (a.x < 0 || a.y < 0 || a.z < 0) + return point; + + if (a.x < a.y) + { + if (a.x < a.z) + point.x = center.x + sx * size.x; + else + point.z = center.z + sz * size.z; + } + else + { + if (a.y < a.z) + point.y = center.y + sy * size.y; + else + point.z = center.z + sz * size.z; + } + + return point; +} + +TPE_Vec3 TPE_envSphereInside(TPE_Vec3 point, TPE_Vec3 center, TPE_Unit radius) +{ + TPE_Vec3 shifted = TPE_vec3Minus(point,center); + + TPE_Unit l = TPE_LENGTH(shifted); + + if (l >= radius) + return point; + else if (l < 0) + return TPE_vec3(center.x + radius,center.y,center.z); + + TPE_vec3Normalize(&shifted); + + return TPE_vec3Plus(center,TPE_vec3Times(shifted,radius)); +} + +TPE_Vec3 TPE_envSphere(TPE_Vec3 point, TPE_Vec3 center, TPE_Unit radius) +{ + TPE_Vec3 dir = TPE_vec3Minus(point,center); + + TPE_Unit l = TPE_LENGTH(dir); + + if (l <= radius) + return point; + + dir.x = (dir.x * radius) / l; + dir.y = (dir.y * radius) / l; + dir.z = (dir.z * radius) / l; + + return TPE_vec3Plus(center,dir); +} + +TPE_Vec3 TPE_envHalfPlane(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 normal) +{ + TPE_Vec3 point2 = TPE_vec3Minus(point,center); + + TPE_Unit tmp = + point2.x * normal.x + point2.y * normal.y + point2.z * normal.z; + + if (tmp < 0) + return point; + + TPE_Unit l = TPE_LENGTH(normal); + + tmp /= l; + + normal.x = (normal.x * TPE_F) / l; + normal.y = (normal.y * TPE_F) / l; + normal.z = (normal.z * TPE_F) / l; + + return TPE_vec3Minus(point, + TPE_vec3Times(normal,tmp)); +} + +uint8_t TPE_checkOverlapAABB(TPE_Vec3 v1Min, TPE_Vec3 v1Max, TPE_Vec3 v2Min, + TPE_Vec3 v2Max) +{ + TPE_Unit dist; + +#define test(c) \ + dist = v1Min.c + v1Max.c - v2Max.c - v2Min.c; \ + if (dist < 0) dist *= -1; \ + if (dist > v1Max.c - v1Min.c + v2Max.c - v2Min.c) return 0; + + test(x) + test(y) + test(z) + +#undef test + + return 1; +} + +void TPE_bodyGetAABB(const TPE_Body *body, TPE_Vec3 *vMin, TPE_Vec3 *vMax) +{ + *vMin = body->joints[0].position; + *vMax = *vMin; + + TPE_Unit js = TPE_JOINT_SIZE(body->joints[0]); + + vMin->x -= js; + vMin->y -= js; + vMin->z -= js; + + vMax->x += js; + vMax->y += js; + vMax->z += js; + + for (uint16_t i = 1; i < body->jointCount; ++i) + { + TPE_Unit v; + + js = TPE_JOINT_SIZE(body->joints[i]); + +#define test(c) \ + v = body->joints[i].position.c - js; \ + if (v < vMin->c) \ + vMin->c = v; \ + v += 2 * js; \ + if (v > vMax->c) \ + vMax->c = v; + + test(x) + test(y) + test(z) + +#undef test + } +} + +void TPE_jointPin(TPE_Joint *joint, TPE_Vec3 position) +{ + joint->position = position; + joint->velocity[0] = 0; + joint->velocity[1] = 0; + joint->velocity[2] = 0; +} + +TPE_Vec3 TPE_pointRotate(TPE_Vec3 point, TPE_Vec3 rotation) +{ + _TPE_vec2Rotate(&point.y,&point.x,rotation.z); + _TPE_vec2Rotate(&point.z,&point.y,rotation.x); + _TPE_vec2Rotate(&point.x,&point.z,rotation.y); + + return point; +} + +TPE_Vec3 TPE_rotationInverse(TPE_Vec3 rotation) +{ + /* If r1 = (X,Y,Z) is rotation in convention ABC then r1^-1 = (-X,-Y,-Z) in + convention CBA is its inverse rotation. We exploit this, i.e. we rotate + forward/right vectors in opposite axis order and then turn the result + into normal rotation/orientation. */ + + TPE_Vec3 f = TPE_vec3(0,0,TPE_F); + TPE_Vec3 r = TPE_vec3(TPE_F,0,0); + + rotation.x *= -1; + rotation.y *= -1; + rotation.z *= -1; + + _TPE_vec2Rotate(&f.x,&f.z,rotation.y); + _TPE_vec2Rotate(&f.z,&f.y,rotation.x); + _TPE_vec2Rotate(&f.y,&f.x,rotation.z); + + _TPE_vec2Rotate(&r.x,&r.z,rotation.y); + _TPE_vec2Rotate(&r.z,&r.y,rotation.x); + _TPE_vec2Rotate(&r.y,&r.x,rotation.z); + + return TPE_rotationFromVecs(f,r); +} + +TPE_Vec3 TPE_rotationRotateByAxis(TPE_Vec3 rotation, TPE_Vec3 rotationByAxis) +{ + TPE_Vec3 f = TPE_pointRotate(TPE_vec3(0,0,TPE_F),rotation); + TPE_Vec3 r = TPE_pointRotate(TPE_vec3(TPE_F,0,0),rotation); + + TPE_Unit a = TPE_LENGTH(rotationByAxis); + TPE_vec3Normalize(&rotationByAxis); + + f = _TPE_rotateByAxis(f,rotationByAxis,a); + r = _TPE_rotateByAxis(r,rotationByAxis,a); + + return TPE_rotationFromVecs(f,r); +} + +TPE_Unit TPE_keepInRange(TPE_Unit x, TPE_Unit xMin, TPE_Unit xMax) +{ + return x > xMin ? (x < xMax ? x : xMax) : xMin; +} + +TPE_Vec3 TPE_vec3KeepWithinDistanceBand(TPE_Vec3 point, TPE_Vec3 center, + TPE_Unit minDistance, TPE_Unit maxDistance) +{ + TPE_Vec3 toPoint = TPE_vec3Minus(point,center); + + TPE_Unit l = TPE_LENGTH(toPoint); + + if (l <= maxDistance) + { + if (l >= minDistance) + return point; + + l = minDistance; + } + else + l = maxDistance; + + return TPE_vec3Plus(center, + TPE_vec3Times(TPE_vec3Normalized(toPoint),l)); +} + +TPE_Vec3 TPE_vec3KeepWithinBox(TPE_Vec3 point, TPE_Vec3 boxCenter, + TPE_Vec3 boxMaxVect) +{ + point.x = TPE_keepInRange(point.x, + boxCenter.x - boxMaxVect.x,boxCenter.x + boxMaxVect.x); + + point.y = TPE_keepInRange(point.y, + boxCenter.y - boxMaxVect.y,boxCenter.y + boxMaxVect.y); + + point.z = TPE_keepInRange(point.z, + boxCenter.z - boxMaxVect.z,boxCenter.z + boxMaxVect.z); + + return point; +} + +TPE_Vec3 TPE_envInfiniteCylinder(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 + direction, TPE_Unit radius) +{ + TPE_Vec3 d = TPE_vec3Minus(point,center); + d = TPE_vec3Minus(d,TPE_vec3Project(d,direction)); + + TPE_Unit l = TPE_LENGTH(d); + + if (l <= radius) + return point; + + radius = l - radius; + + d.x = (d.x * radius) / l; + d.y = (d.y * radius) / l; + d.z = (d.z * radius) / l; + + return TPE_vec3Minus(point,d); +} + +TPE_Vec3 TPE_envCylinder(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 direction, + TPE_Unit radius) +{ + point = TPE_vec3Minus(point,center); + + TPE_Vec3 projected = TPE_vec3Project(point,direction); + + point = TPE_envInfiniteCylinder(point,TPE_vec3(0,0,0),direction,radius); + + TPE_Unit lDir = TPE_nonZero(TPE_LENGTH(direction)); + + TPE_Unit lDiff = TPE_LENGTH(projected) - lDir; + + if (lDiff > 0) + { + direction.x = (direction.x * lDiff) / lDir; + direction.y = (direction.y * lDiff) / lDir; + direction.z = (direction.z * lDiff) / lDir; + + point = (TPE_vec3Dot(projected,direction)) >= 0 ? + TPE_vec3Minus(point,direction) : TPE_vec3Plus(point,direction); + } + + return TPE_vec3Plus(center,point); +} + +TPE_Vec3 TPE_fakeSphereRotation(TPE_Vec3 position1, TPE_Vec3 position2, + TPE_Unit radius) +{ + TPE_Vec3 m; + + m.x = position1.z - position2.z; + m.y = 0; + m.z = position2.x - position1.x; + + TPE_Unit l = TPE_sqrt(m.x * m.x + m.z * m.z); + + if (l == 0) + return TPE_vec3(0,0,0); + + TPE_Unit d = (TPE_DISTANCE(position1,position2) * + TPE_F) / (radius * 4); + + m.x = (m.x * d) / l; + m.z = (m.z * d) / l; + + return m; +} + +TPE_Vec3 TPE_castEnvironmentRay(TPE_Vec3 rayPos, TPE_Vec3 rayDir, + TPE_ClosestPointFunction environment, TPE_Unit insideStepSize, + TPE_Unit rayMarchMaxStep, uint32_t maxSteps) +{ + TPE_Vec3 p = rayPos; + TPE_Vec3 p2 = environment(rayPos,rayMarchMaxStep); + TPE_Unit totalD = 0; + + TPE_vec3Normalize(&rayDir); + + uint8_t found = 0; // 0 = nothing found, 1 = out/in found, 2 = in/out found + + if (p2.x != p.x || p2.y != p.y || p2.z != p.z) + { + // outside ray: ray march + + for (uint32_t i = 0; i < maxSteps; ++i) + { + TPE_Unit d = TPE_DISTANCE(p,p2); + + if (d > rayMarchMaxStep) + d = rayMarchMaxStep; + + totalD += d; + + p2 = TPE_vec3Plus(rayPos,TPE_vec3Times(rayDir,totalD)); + + if (d == 0 || + (p2.x == p.x && p2.y == p.y && p2.z == p.z)) + return p2; // point not inside env but dist == 0, ideal case + + TPE_Vec3 pTest = environment(p2,rayMarchMaxStep); + + if (pTest.x == p2.x && pTest.y == p2.y && pTest.z == p2.z) + { + // stepped into env, will have to iterate + found = 1; + break; + } + + p = p2; + p2 = pTest; + } + } + else if (insideStepSize != 0) + { + // inside ray: iterate by fixed steps + + for (uint32_t i = 0; i < maxSteps; ++i) + { + totalD += insideStepSize; + + p2 = TPE_vec3Plus(rayPos,TPE_vec3Times(rayDir,totalD)); + + TPE_Vec3 pTest = environment(p2,16); + + if (p2.x != pTest.x || p2.y != pTest.y || p2.z != pTest.z) + { + found = 2; + break; + } + + p = p2; + p2 = pTest; + } + } + + if (found) + { + /* Here we've found two points (p, p2), each one the other side of the + env surface. Now iterate (binary search) to find the exact surface + pos. */ + + for (uint8_t i = 0; i < 128; ++i) // upper limit just in case + { + TPE_Vec3 middle = TPE_vec3Plus(p,p2); + + middle.x /= 2; + middle.y /= 2; + middle.z /= 2; + + if ((middle.x == p.x && middle.y == p.y && middle.z == p.z) || + (middle.x == p2.x && middle.y == p2.y && middle.z == p2.z)) + break; // points basically next to each other, don't continue + + TPE_Vec3 pTest = environment(middle,16); // 16: just a small number + + if ((found == 1) == + (pTest.x == middle.x && pTest.y == middle.y && pTest.z == middle.z)) + p2 = middle; + else + p = middle; + } + + return (found == 1) ? p : p2; + } + + return TPE_vec3(TPE_INFINITY,TPE_INFINITY,TPE_INFINITY); +} + +TPE_Vec3 TPE_castBodyRay(TPE_Vec3 rayPos, TPE_Vec3 rayDir, int16_t excludeBody, + const TPE_World *world, int16_t *bodyIndex, int16_t *jointIndex) +{ + TPE_Vec3 bestP = TPE_vec3(TPE_INFINITY,TPE_INFINITY,TPE_INFINITY); + TPE_Unit bestD = TPE_INFINITY; + + if (bodyIndex != 0) + *bodyIndex = -1; + + if (jointIndex != 0) + *jointIndex = -1; + + TPE_vec3Normalize(&rayDir); + + for (uint16_t i = 0; i < world->bodyCount; ++i) + { + TPE_Vec3 c, p; + TPE_Unit r, d; + + TPE_bodyGetFastBSphere(&world->bodies[i],&c,&r); + + c = TPE_vec3Minus(c,rayPos); + p = TPE_vec3ProjectNormalized(c,rayDir); + + if (TPE_vec3Dot(p,rayDir) >= 0) // point is in ray's forward dir? + { + d = TPE_DISTANCE(p,c); + + if (d <= r) + { + // bounding sphere hit, now check all joints: + + const TPE_Joint *joint = world->bodies[i].joints; + + for (uint16_t j = 0; j < world->bodies[i].jointCount; ++j) + { + c = joint->position; + c = TPE_vec3Minus(c,rayPos); + p = TPE_vec3ProjectNormalized(c,rayDir); + + if (TPE_vec3Dot(p,rayDir) >= 0) + { + d = TPE_DISTANCE(p,c); + TPE_Unit js = TPE_JOINT_SIZE(*joint); + + if (d <= js) + { + // joint hit, compute exact coordinates: + + if (bodyIndex != 0) + *bodyIndex = i; + + if (jointIndex != 0) + *jointIndex = j; + + c = TPE_vec3Times(rayDir,TPE_sqrt(js * js - d * d)); + // ^ offset vector to two intersections + p = TPE_vec3Plus(p,rayPos); + + TPE_Vec3 + i1 = TPE_vec3Plus(p,c), // intersection points + i2 = TPE_vec3Minus(p,c); + + d = TPE_DISTANCE(rayPos,i1); + TPE_Unit d2 = TPE_DISTANCE(rayPos,i2); + + if (d2 < d) // take the closer one + { + d = d2; + i1 = i2; + } + + if (d < bestD) + { + bestD = d; + bestP = i1; + } + } + } + + joint++; + } + } + } + } + + return bestP; +} + +void TPE_worldDeactivateAll(TPE_World *world) +{ + for (uint16_t i = 0; i < world->bodyCount; ++i) + TPE_bodyDeactivate(&world->bodies[i]); +} + +void TPE_worldActivateAll(TPE_World *world) +{ + for (uint16_t i = 0; i < world->bodyCount; ++i) + TPE_bodyActivate(&world->bodies[i]); +} + +TPE_Unit TPE_worldGetNetSpeed(const TPE_World *world) +{ + TPE_Unit result = 0; + + for (uint16_t i = 0; i < world->bodyCount; ++i) + result += TPE_bodyGetNetSpeed(world->bodies + i); + + return result; +} + +TPE_Vec3 TPE_bodyGetLinearVelocity(const TPE_Body *body) +{ + TPE_Vec3 r = TPE_vec3(0,0,0); + + for (uint16_t i = 0; i < body->jointCount; ++i) + { + TPE_UnitReduced *v = body->joints[i].velocity; + r = TPE_vec3Plus(r,TPE_vec3(v[0],v[1],v[2])); + } + + r.x /= body->jointCount; + r.y /= body->jointCount; + r.z /= body->jointCount; + + return r; +} + +TPE_Unit TPE_abs(TPE_Unit x) +{ + return x >= 0 ? x : (-1 * x); +} + +TPE_Unit TPE_max(TPE_Unit a, TPE_Unit b) +{ + return (a > b) ? a : b; +} + +TPE_Unit TPE_min(TPE_Unit a, TPE_Unit b) +{ + return (a < b) ? a : b; +} + +TPE_Vec3 TPE_envAATriPrism(TPE_Vec3 point, TPE_Vec3 center, + const TPE_Unit sides[6], TPE_Unit depth, uint8_t direction) +{ + point = TPE_vec3Minus(point,center); + + if (direction == 1) + { + TPE_Unit tmp = point.z; + point.z = point.y; + point.y = tmp; + } + else if (direction == 2) + { + TPE_Unit tmp = point.z; + point.z = point.x; + point.x = tmp; + } + + depth /= 2; + + if (point.z > depth) + point.z = depth; + else + { + depth *= -1; + + if (point.z < depth) + point.z = depth; + } + + for (uint8_t i = 0; i < 6; i += 2) + { + uint8_t i2 = i < 4 ? i + 2 : 0; + + TPE_Vec3 p = + TPE_envHalfPlane(point,TPE_vec3(sides[i],sides[i + 1],0), + TPE_vec3(sides[i2 + 1] - sides[i + 1],sides[i] - sides[i2],0)); + + if (p.x != point.x || p.y != point.y) + { + point = p; + + if ( // dot product to determine which side the point is on + (sides[i2] - sides[i]) * (point.x - sides[i]) + + (sides[i2 + 1] - sides[i + 1]) * (point.y - sides[i + 1]) < 0) + { + point.x = sides[i]; point.y = sides[i + 1]; + } + else if ( // same but for the other vertex + (sides[i] - sides[i2]) * (point.x - sides[i2]) + + (sides[i + 1] - sides[i2 + 1]) * (point.y - sides[i2 + 1]) < 0) + { + point.x = sides[i2]; point.y = sides[i2 + 1]; + } + + break; + } + } + + if (direction == 1) + { + TPE_Unit tmp = point.z; + point.z = point.y; + point.y = tmp; + } + else if (direction == 2) + { + TPE_Unit tmp = point.z; + point.z = point.x; + point.x = tmp; + } + + return TPE_vec3Plus(point,center); +} + +TPE_Vec3 TPE_envGround(TPE_Vec3 point, TPE_Unit height) +{ + if (point.y > height) + point.y = height; + + return point; +} + +uint32_t _TPE_hash(uint32_t n) +{ + // parameters found by hash-prospector project + n = 250009959 * (n ^ (n >> 17)); + n = 2626308659 * (n ^ (n >> 15)); + return n ^ (n >> 16); +} + +uint32_t TPE_jointHash(const TPE_Joint *joint) +{ + uint32_t + r = _TPE_hash(joint->position.x); + r = _TPE_hash(r ^ joint->position.y); + r = _TPE_hash(r ^ joint->position.z); + r = _TPE_hash(r ^ + (((uint32_t) joint->velocity[0]) | + (((uint32_t) joint->velocity[1]) << 16))); + r = _TPE_hash(r ^ + (((uint32_t) joint->velocity[2]) | + ((uint32_t) joint->sizeDivided))); + + return r; +} + +uint32_t TPE_connectionHash(const TPE_Connection *connection) +{ + return _TPE_hash( + ((uint32_t) connection->length) | + (((uint32_t) connection->joint1) << 16) | + (((uint32_t) connection->joint2) << 24)); +} + +uint32_t TPE_bodyHash(const TPE_Body *body) +{ + uint32_t r = _TPE_hash( + ((uint32_t) body->jointMass) | + (((uint32_t) body->flags) << 16) | + (((uint32_t) body->deactivateCount) << 24)) ^ + _TPE_hash( + ((uint32_t) body->friction) | + (((uint32_t) body->elasticity) << 16)); + + for (uint8_t i = 0; i < body->jointCount; ++i) + r = _TPE_hash(r ^ TPE_jointHash(&body->joints[i])); + + for (uint8_t i = 0; i < body->connectionCount; ++i) + r = _TPE_hash(r ^ TPE_connectionHash(&body->connections[i])); + + return r; +} + +uint32_t TPE_worldHash(const TPE_World *world) +{ + uint32_t r = 0; + + for (uint8_t i = 0; i < world->bodyCount; ++i) + r = _TPE_hash(r ^ TPE_bodyHash(&world->bodies[i])); + + return r; +} + +void TPE_bodyMoveTo(TPE_Body *body, TPE_Vec3 position) +{ + position = TPE_vec3Minus(position,TPE_bodyGetCenterOfMass(body)); + + for (uint8_t i = 0; i < body->jointCount; ++i) + body->joints[i].position = TPE_vec3Plus(body->joints[i].position,position); +} + +uint8_t TPE_testClosestPointFunction(TPE_ClosestPointFunction f, + TPE_Vec3 cornerFrom, TPE_Vec3 cornerTo, uint8_t gridResolution, + TPE_UnitReduced allowedError, TPE_Vec3 *errorPoint) +{ + TPE_Vec3 p; + + cornerTo = TPE_vec3Minus(cornerTo,cornerFrom); + + for (uint16_t z = 0; z < gridResolution; ++z) + { + p.z = cornerFrom.z + (z * cornerTo.z) / gridResolution; + + for (uint16_t y = 0; y < gridResolution; ++y) + { + p.y = cornerFrom.y + (y * cornerTo.y) / gridResolution; + + for (uint16_t x = 0; x < gridResolution; ++x) + { + p.x = cornerFrom.x + (x * cornerTo.x) / gridResolution; + + TPE_Vec3 p2 = f(p,TPE_INFINITY); + + if (p.x != p2.x || p.y != p2.y || p.z != p2.z) // only test outside + { + // 1st try to approach the closest point and see if it stays the same: + + TPE_Vec3 p3 = p; + + for (uint8_t i = 0; i < 3; ++i) + { + p3 = + TPE_vec3((p3.x + p2.x) / 2,(p3.y + p2.y) / 2,(p3.z + p2.z) / 2); + + TPE_Vec3 p4 = f(p3,TPE_INFINITY); + + if (TPE_abs(p4.x - p2.x) + TPE_abs(p4.y - p2.y) + + TPE_abs(p4.z - p2.z) > allowedError) // taxicab dist. for speed + { + if (errorPoint != 0) + *errorPoint = p; + + return 0; + } + } + + // now test 8 points inside the sphere of radius: + + TPE_Unit d = TPE_DISTANCE(p,p2); + + p3.z = p.z - d / 2; + + for (uint8_t zz = 0; zz < 2; ++zz) + { + p3.y = p.y - d / 2; + + for (uint8_t yy = 0; yy < 2; ++yy) + { + p3.x = p.x - d / 2; + + for (uint8_t zz = 0; zz < 2; ++zz) + { + if (TPE_DISTANCE(p,f(p3,TPE_INFINITY)) + allowedError < d) + { + /* In the sphere of distance radius to the original point's + closest point we've gotten a closer point which should + never happen. */ + + if (errorPoint != 0) + *errorPoint = p; + + return 0; + } + + p3.x += d; + } + + p3.y += d; + } + + p3.z += d; + } + } + } + } + } + + return 1; +} + +TPE_Vec3 TPE_envLineSegment(TPE_Vec3 point, TPE_Vec3 a, TPE_Vec3 b) +{ + point = TPE_vec3Minus(point,a); + + b = TPE_vec3Minus(b,a); + + point = TPE_vec3Project(point,b); + + if (TPE_vec3Dot(point,b) < 0) + point = TPE_vec3(0,0,0); + else if (TPE_abs(point.x) + TPE_abs(point.y) + TPE_abs(point.z) > + TPE_abs(b.x) + TPE_abs(b.y) + TPE_abs(b.z)) + point = b; + + point = TPE_vec3Plus(point,a); + + return point; +} + +TPE_Vec3 TPE_envHeightmap(TPE_Vec3 point, TPE_Vec3 center, TPE_Unit gridSize, + TPE_Unit (*heightFunction)(int32_t x, int32_t y), TPE_Unit maxDist) +{ + point = TPE_vec3Minus(point,center); + + TPE_Vec3 closestP = TPE_vec3(TPE_INFINITY,TPE_INFINITY,TPE_INFINITY); + TPE_Unit closestD = TPE_INFINITY; + + int16_t startSquareX = point.x / gridSize - (point.x < 0), + startSquareY = point.z / gridSize - (point.z < 0); + + int16_t squareX = startSquareX, + squareY = startSquareY; + + uint8_t spiralDir = 1; + uint16_t spiralStep = 1, spiralStepsLeft = 1; + + TPE_Vec3 // 4 corners of the current square + bl = TPE_vec3(squareX * gridSize,heightFunction(squareX,squareY), + squareY * gridSize), + br = TPE_vec3(bl.x + gridSize,heightFunction(squareX + 1,squareY),bl.z), + tl = TPE_vec3(bl.x,heightFunction(squareX,squareY + 1),bl.z + gridSize), + tr = TPE_vec3(br.x,heightFunction(squareX + 1,squareY + 1),tl.z); + + for (uint16_t i = 0; i < 1024; ++i) // while (1) should work in theory but... + { + if ((TPE_min(TPE_abs(squareX - startSquareX), + TPE_abs(squareY - startSquareY)) - 1) * gridSize + > TPE_min(maxDist,closestD)) + break; // here we can no longer find the dist we're looking for => end + + for (uint8_t j = 0; j < 2; ++j) // check the two triangles of the segment + { + TPE_Vec3 testP = TPE_envHalfPlane(point,j == 0 ? bl : tr, + TPE_vec3Normalized(j == 0 ? + TPE_vec3Cross(TPE_vec3Minus(tl,bl),TPE_vec3Minus(br,bl)) : + TPE_vec3Cross(TPE_vec3Minus(br,tr),TPE_vec3Minus(tl,tr)))); + + TPE_Unit testD = TPE_DISTANCE(testP,point); + + if (testD < closestD) + { + if (j == 0 ? // point is inside the triangle? + (testP.x >= bl.x && testP.z >= bl.z && + (testP.x - bl.x <= tl.z - testP.z)) : + (testP.x <= tr.x && testP.z <= tr.z && + (testP.x - bl.x >= tl.z - testP.z))) + { + closestP = testP; + closestD = testD; + } + else + { + // point outside the triangle, check individual boundary sides +#define testEdge(a,b) \ + testP = TPE_envLineSegment(point,a,b); testD = TPE_DISTANCE(testP,point); \ + if (testD < closestD) { closestP = testP; closestD = testD; } + + testEdge(j == 0 ? bl : tr,br) + testEdge(j == 0 ? bl : tr,tl) + testEdge(br,tl) + +#undef testEdge + } + } + } + + // now step to another square, in spiralling way: + + switch (spiralDir) + { + case 0: // moving up + squareY++; + + bl = tl; br = tr; + tl = TPE_vec3(bl.x,heightFunction(squareX,squareY + 1),bl.z + gridSize); + tr = TPE_vec3(br.x,heightFunction(squareX + 1,squareY + 1),bl.z + + gridSize); + + break; + + case 1: // moving right + squareX++; + + bl = br; tl = tr; + tr = TPE_vec3(tl.x + gridSize,heightFunction(squareX + 1,squareY + 1), + tl.z); + br = TPE_vec3(bl.x + gridSize,heightFunction(squareX + 1,squareY),bl.z); + + break; + + case 2: // moving down + squareY--; + + tl = bl; tr = br; + bl = TPE_vec3(tl.x,heightFunction(squareX,squareY),tl.z - gridSize); + br = TPE_vec3(tr.x,heightFunction(squareX + 1,squareY),tr.z - gridSize); + + break; + + case 3: // moving left + squareX--; + + br = bl; tr = tl; + tl = TPE_vec3(tr.x - gridSize,heightFunction(squareX,squareY + 1),tr.z); + bl = TPE_vec3(br.x - gridSize,heightFunction(squareX,squareY),br.z); + + break; + + default: break; + } + + spiralStepsLeft--; + + if (spiralStepsLeft == 0) + { + spiralDir = spiralDir != 0 ? spiralDir - 1 : 3; + + if (spiralDir == 3 || spiralDir == 1) + spiralStep++; + + spiralStepsLeft = spiralStep; + } + } + + return TPE_vec3Plus(closestP,center); +} + +TPE_Vec3 TPE_envCone(TPE_Vec3 point, TPE_Vec3 center, TPE_Vec3 direction, + TPE_Unit radius) +{ + point = TPE_vec3Minus(point,center); + + if (TPE_vec3Dot(point,direction) <= 0) + { + // underneath the cone + + direction.x *= -1; + direction.y *= -1; + direction.z *= -1; + + point = TPE_envHalfPlane(point,TPE_vec3(0,0,0),direction); + + TPE_Unit dist = TPE_LENGTH(point); + + if (dist > radius) + { + point.x = (point.x * radius) / dist; + point.y = (point.y * radius) / dist; + point.z = (point.z * radius) / dist; + } + } + else + { + TPE_Unit height = TPE_LENGTH(direction); + + TPE_Vec3 helper = TPE_vec3Project(point,direction); + TPE_Unit y = TPE_LENGTH(helper); + + helper = TPE_vec3Minus(point,helper); + + TPE_Unit x = TPE_LENGTH(helper); + + if (x < 20) + { + // for such small distance big numeric errors occur in the other branch + if (y >= height) + point = direction; + } + else + { + TPE_Unit scaledRadius = radius - ((y * radius) / height); + + if (y > height || x > scaledRadius) // outside? + { + if (x <= 0) + { + TPE_LOG("WARNING: arithmetic error in envCone (library bug)"); + x = 1; // shouldn't happen but just in case, to prevent div by zero + } + + helper.x = (helper.x * radius) / x; + helper.y = (helper.y * radius) / x; + helper.z = (helper.z * radius) / x; + + point = TPE_envLineSegment(point,helper,direction); + } + } + } + + return TPE_vec3Plus(point,center); +} + +static inline uint8_t TPE_bodyIsActive(const TPE_Body *body) +{ + return !(body->flags & TPE_BODY_FLAG_DEACTIVATED); +} + +#endif // guard