From b2f745b90ead224d31840c7cf59088f615a255d2 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Fri, 1 Feb 2019 13:51:20 -0800 Subject: [PATCH 1/4] Allow unaligned input/output to SPI::transferBytes Fixes #4967 Support any alignment of input and output pointers and transfer lengths in SPI::transferBytes. Use 32-bit transfers and FIFO as much as possible. --- libraries/SPI/SPI.cpp | 101 +++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 31 deletions(-) diff --git a/libraries/SPI/SPI.cpp b/libraries/SPI/SPI.cpp index 0f1786a4df..c2d743a60c 100644 --- a/libraries/SPI/SPI.cpp +++ b/libraries/SPI/SPI.cpp @@ -509,9 +509,6 @@ void SPIClass::writePattern(const uint8_t * data, uint8_t size, uint32_t repeat) } /** - * Note: - * in and out need to be aligned to 32Bit - * or you get an Fatal exception (9) * @param out uint8_t * * @param in uint8_t * * @param size uint32_t @@ -531,48 +528,90 @@ void SPIClass::transferBytes(const uint8_t * out, uint8_t * in, uint32_t size) { } /** - * Note: - * in and out need to be aligned to 32Bit - * or you get an Fatal exception (9) * @param out uint8_t * * @param in uint8_t * * @param size uint8_t (max 64) */ void SPIClass::transferBytes_(const uint8_t * out, uint8_t * in, uint8_t size) { - while(SPI1CMD & SPIBUSY) {} - // Set in/out Bits to transfer + while(SPI1CMD & SPIBUSY) {} // Make sure we're IDLE on the SPI - setDataBits(size * 8); + int bytesToTransfer = size; // How much to read/write from the FIFO in bytes - volatile uint32_t * fifoPtr = &SPI1W0; - uint8_t dataSize = ((size + 3) / 4); + if (out && !in) { + int bytesLeft = size; - if(out) { - uint32_t * dataPtr = (uint32_t*) out; - while(dataSize--) { - *fifoPtr = *dataPtr; - dataPtr++; - fifoPtr++; + // Only transmitting, get out 32b aligned + while (bytesLeft && ((uint32_t)out & 3)) { + SPI.transfer(*(out++)); + bytesLeft--; } - } else { - // no out data only read fill with dummy data! - while(dataSize--) { - *fifoPtr = 0xFFFFFFFF; - fifoPtr++; + + if (!bytesLeft) return; + bytesToTransfer = bytesLeft; + + // Now do 32b writes until we have 0 or fewer bytes left + // Note we can read and store past the end of string because it will not be sent (setDataBits) + uint32_t *dataPtr = (uint32_t*)out; + volatile uint32_t *fifoPtr = &SPI1W0; + while (bytesLeft > 0) { + *(fifoPtr++) = *(dataPtr++); + bytesLeft -= 4; } + // Remainder will be sent out of loaded FIFO below + } else if (in && !out) { + int bytesLeft = size; + + // Only receiving, get in 32b aligned + while (bytesLeft && ((uint32_t)in & 3)) { + *(in++) = SPI.transfer(0xff); + bytesLeft--; + } + + if (!bytesLeft) return; + bytesToTransfer = bytesLeft; + + // Now send 32b writes of all 0xff, actual read will be done below + // Note we can write add'l 0xffs and they won't be sent thanks to setDataBits below + volatile uint32_t *fifoPtr = &SPI1W0; + while (bytesLeft > 0) { + *(fifoPtr++) = 0xffffffff; + bytesLeft -= 4; + } + } else if (in && out && (((uint32_t)in & 3) || ((uint32_t)out & 3))) { + // Bidirectional and we have one or both pointers misaligned + while (size) { + *(in++) = SPI.transfer(*(out++)); + size--; + } + // All the data was transferred byte-wise, we're done + return; } + // At this point we've got aligned (or null) in and out and the FIFO is filled + // with whatever data we're going to transmit as aligned + + // Set in/out bits to transfer + while (SPI1CMD & SPIBUSY) {} // Paranoia, make sure SPI is idle + setDataBits(bytesToTransfer * 8); + + // Start the transfer and wait for done SPI1CMD |= SPIBUSY; - while(SPI1CMD & SPIBUSY) {} + while (SPI1CMD & SPIBUSY) { /* noop, waiting */ } + + if (in) { + // Bulk read out by 4 until we have 0..3 left + uint32_t *dataPtr = (uint32_t*)in; + volatile uint32_t *fifoPtr = &SPI1W0; + while (bytesToTransfer >= 4) { + *(dataPtr++) = *(fifoPtr++); + bytesToTransfer -= 4; + in += 4; // Keep track of the in ptr for any stragglers + } - if(in) { - uint32_t * dataPtr = (uint32_t*) in; - fifoPtr = &SPI1W0; - dataSize = ((size + 3) / 4); - while(dataSize--) { - *dataPtr = *fifoPtr; - dataPtr++; - fifoPtr++; + // Bytewise read out the remainder + volatile uint8_t *fifoPtrB = (uint8_t*)fifoPtr; + while (bytesToTransfer--) { + *(in++) = *(fifoPtrB++); } } } From 06d609df8f03dd7c834eb3778ea72799f6e4213e Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Fri, 1 Feb 2019 23:50:29 -0800 Subject: [PATCH 2/4] Refactor misaligned transfer, avoid RMW to FIFO The SPI FIFO can't properly do RMW (i.e. bytewise updates) because when you read the FIFO you are actually reading the SPI read data, not what was written into the write FIFO. Refactor the transferBytes to take account of this. For aligned input and outputs, perform as before (but handle non-x4 sizes properly). For misaligned inputs, if it's unidirectional then do bytewise until the direction data pointer is aligned and then do 32b accesses. Fod bidirectional and misaligned inputs, copy the output data to an aligned buffer, do the transfer, then copy the read back data from temp aligned buffer to the real input buffer. --- libraries/SPI/SPI.cpp | 132 +++++++++++++++++++++--------------------- libraries/SPI/SPI.h | 1 + 2 files changed, 68 insertions(+), 65 deletions(-) diff --git a/libraries/SPI/SPI.cpp b/libraries/SPI/SPI.cpp index c2d743a60c..1222e4fc80 100644 --- a/libraries/SPI/SPI.cpp +++ b/libraries/SPI/SPI.cpp @@ -528,94 +528,96 @@ void SPIClass::transferBytes(const uint8_t * out, uint8_t * in, uint32_t size) { } /** + * Note: + * in and out need to be aligned to 32Bit + * or you get an Fatal exception (9) * @param out uint8_t * * @param in uint8_t * * @param size uint8_t (max 64) */ -void SPIClass::transferBytes_(const uint8_t * out, uint8_t * in, uint8_t size) { - while(SPI1CMD & SPIBUSY) {} // Make sure we're IDLE on the SPI - - int bytesToTransfer = size; // How much to read/write from the FIFO in bytes - if (out && !in) { - int bytesLeft = size; - - // Only transmitting, get out 32b aligned - while (bytesLeft && ((uint32_t)out & 3)) { - SPI.transfer(*(out++)); - bytesLeft--; - } +void SPIClass::transferBytesAligned_(const uint8_t * out, uint8_t * in, uint8_t size) { + if (!size) + return; - if (!bytesLeft) return; - bytesToTransfer = bytesLeft; + while(SPI1CMD & SPIBUSY) {} + // Set in/out Bits to transfer - // Now do 32b writes until we have 0 or fewer bytes left - // Note we can read and store past the end of string because it will not be sent (setDataBits) - uint32_t *dataPtr = (uint32_t*)out; - volatile uint32_t *fifoPtr = &SPI1W0; - while (bytesLeft > 0) { - *(fifoPtr++) = *(dataPtr++); - bytesLeft -= 4; - } - // Remainder will be sent out of loaded FIFO below - } else if (in && !out) { - int bytesLeft = size; - - // Only receiving, get in 32b aligned - while (bytesLeft && ((uint32_t)in & 3)) { - *(in++) = SPI.transfer(0xff); - bytesLeft--; - } + setDataBits(size * 8); - if (!bytesLeft) return; - bytesToTransfer = bytesLeft; + volatile uint32_t *fifoPtr = &SPI1W0; + uint8_t dataSize = ((size + 3) / 4); - // Now send 32b writes of all 0xff, actual read will be done below - // Note we can write add'l 0xffs and they won't be sent thanks to setDataBits below - volatile uint32_t *fifoPtr = &SPI1W0; - while (bytesLeft > 0) { - *(fifoPtr++) = 0xffffffff; - bytesLeft -= 4; + if (out) { + uint32_t *dataPtr = (uint32_t*) out; + while (dataSize--) { + *fifoPtr = *dataPtr; + dataPtr++; + fifoPtr++; } - } else if (in && out && (((uint32_t)in & 3) || ((uint32_t)out & 3))) { - // Bidirectional and we have one or both pointers misaligned - while (size) { - *(in++) = SPI.transfer(*(out++)); - size--; + } else { + // no out data only read fill with dummy data! + while (dataSize--) { + *fifoPtr = 0xFFFFFFFF; + fifoPtr++; } - // All the data was transferred byte-wise, we're done - return; } - // At this point we've got aligned (or null) in and out and the FIFO is filled - // with whatever data we're going to transmit as aligned - - // Set in/out bits to transfer - while (SPI1CMD & SPIBUSY) {} // Paranoia, make sure SPI is idle - setDataBits(bytesToTransfer * 8); - - // Start the transfer and wait for done SPI1CMD |= SPIBUSY; - while (SPI1CMD & SPIBUSY) { /* noop, waiting */ } + while(SPI1CMD & SPIBUSY) {} if (in) { - // Bulk read out by 4 until we have 0..3 left - uint32_t *dataPtr = (uint32_t*)in; - volatile uint32_t *fifoPtr = &SPI1W0; - while (bytesToTransfer >= 4) { + uint32_t *dataPtr = (uint32_t*) in; + fifoPtr = &SPI1W0; + dataSize = size; + while (dataSize >= 4) { *(dataPtr++) = *(fifoPtr++); - bytesToTransfer -= 4; - in += 4; // Keep track of the in ptr for any stragglers + dataSize -= 4; + in += 4; } - - // Bytewise read out the remainder - volatile uint8_t *fifoPtrB = (uint8_t*)fifoPtr; - while (bytesToTransfer--) { + volatile uint8_t *fifoPtrB = (volatile uint8_t *)fifoPtr; + while (dataSize--) { *(in++) = *(fifoPtrB++); } } } + +void SPIClass::transferBytes_(const uint8_t * out, uint8_t * in, uint8_t size) { + if (!((uint32_t)out & 3) && !((uint32_t)in & 3)) { + // Input and output are both 32b aligned or NULL + transferBytesAligned_(out, in, size); + } else if (!out && ((uint32_t)in & 3)) { + // Input only and misaligned, do bytewise until in aligned + while (size && ((uint32_t)in & 3)) { + *(in++) = transfer(0xff); + size--; + } + transferBytesAligned_(out, in, size); + } else if (!in && ((uint32_t)out & 3)) { + // Output only and misaligned, bytewise xmit until aligned + while (size && ((uint32_t)out & 3)) { + transfer(*(out++)); + size--; + } + transferBytesAligned_(out, in, size); + } else { + // HW FIFO has 64b limit, so just align in RAM and then send to FIFO aligned + uint8_t outAligned[64]; // Stack vars will be 32b aligned + uint8_t inAligned[64]; // Stack vars will be 32b aligned + if (out) { + memcpy(outAligned, out, size); + } else { + memset(outAligned, 0xff, size); // 0xff = no xmit data + } + transferBytesAligned_(outAligned, inAligned, size); + if (in) { + memcpy(in, inAligned, size); + } + } +} + + #if !defined(NO_GLOBAL_INSTANCES) && !defined(NO_GLOBAL_SPI) SPIClass SPI; #endif diff --git a/libraries/SPI/SPI.h b/libraries/SPI/SPI.h index 84b619f670..e8fb8a3902 100644 --- a/libraries/SPI/SPI.h +++ b/libraries/SPI/SPI.h @@ -79,6 +79,7 @@ class SPIClass { uint8_t pinSet; void writeBytes_(const uint8_t * data, uint8_t size); void transferBytes_(const uint8_t * out, uint8_t * in, uint8_t size); + void transferBytesAligned_(const uint8_t * out, uint8_t * in, uint8_t size); inline void setDataBits(uint16_t bits); }; From 8ea94fff52a005af33597b92da680d0ae1048856 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Sat, 2 Feb 2019 22:22:39 -0800 Subject: [PATCH 3/4] Fix comments, clean condition checks, save stack Add more comments and adjust naming to be more informative in transferBytes_ and *aligned_. Save 64bytes of stack in double misaligned case. --- libraries/SPI/SPI.cpp | 50 +++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/libraries/SPI/SPI.cpp b/libraries/SPI/SPI.cpp index 1222e4fc80..caa44cc461 100644 --- a/libraries/SPI/SPI.cpp +++ b/libraries/SPI/SPI.cpp @@ -546,20 +546,18 @@ void SPIClass::transferBytesAligned_(const uint8_t * out, uint8_t * in, uint8_t setDataBits(size * 8); volatile uint32_t *fifoPtr = &SPI1W0; - uint8_t dataSize = ((size + 3) / 4); if (out) { + uint8_t outSize = ((size + 3) / 4); uint32_t *dataPtr = (uint32_t*) out; - while (dataSize--) { - *fifoPtr = *dataPtr; - dataPtr++; - fifoPtr++; + while (outSize--) { + *(fifoPtr++) = *(dataPtr++); } } else { + uint8_t outSize = ((size + 3) / 4); // no out data only read fill with dummy data! - while (dataSize--) { - *fifoPtr = 0xFFFFFFFF; - fifoPtr++; + while (outSize--) { + *(fifoPtr++) = 0xFFFFFFFF; } } @@ -569,14 +567,15 @@ void SPIClass::transferBytesAligned_(const uint8_t * out, uint8_t * in, uint8_t if (in) { uint32_t *dataPtr = (uint32_t*) in; fifoPtr = &SPI1W0; - dataSize = size; - while (dataSize >= 4) { + int inSize = size; + // Unlike outSize above, inSize tracks *bytes* since we must transfer only the requested bytes to the app to avoid overwriting other vars. + while (inSize >= 4) { *(dataPtr++) = *(fifoPtr++); - dataSize -= 4; + inSize -= 4; in += 4; } volatile uint8_t *fifoPtrB = (volatile uint8_t *)fifoPtr; - while (dataSize--) { + while (inSize--) { *(in++) = *(fifoPtrB++); } } @@ -587,33 +586,28 @@ void SPIClass::transferBytes_(const uint8_t * out, uint8_t * in, uint8_t size) { if (!((uint32_t)out & 3) && !((uint32_t)in & 3)) { // Input and output are both 32b aligned or NULL transferBytesAligned_(out, in, size); - } else if (!out && ((uint32_t)in & 3)) { + } else if (!out) { // Input only and misaligned, do bytewise until in aligned while (size && ((uint32_t)in & 3)) { *(in++) = transfer(0xff); - size--; + size--; } transferBytesAligned_(out, in, size); - } else if (!in && ((uint32_t)out & 3)) { + } else if (!in) { // Output only and misaligned, bytewise xmit until aligned while (size && ((uint32_t)out & 3)) { transfer(*(out++)); - size--; + size--; } transferBytesAligned_(out, in, size); } else { - // HW FIFO has 64b limit, so just align in RAM and then send to FIFO aligned - uint8_t outAligned[64]; // Stack vars will be 32b aligned - uint8_t inAligned[64]; // Stack vars will be 32b aligned - if (out) { - memcpy(outAligned, out, size); - } else { - memset(outAligned, 0xff, size); // 0xff = no xmit data - } - transferBytesAligned_(outAligned, inAligned, size); - if (in) { - memcpy(in, inAligned, size); - } + // HW FIFO has 64b limit and ::transferBytes breaks up large xfers into 64byte chunks before calling this function + // We know at this point it is a bidirectional transfer + uint8_t aligned[64]; // Stack vars will be 32b aligned + // No need for separate out and in aligned copies, we can overwrite our out copy with the input data safely + memcpy(aligned, out, size); + transferBytesAligned_(aligned, aligned, size); + memcpy(in, aligned, size); } } From fc35ecb1682907d88cfb81d38a2ddb4dd7feb995 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Sat, 2 Feb 2019 23:52:08 -0800 Subject: [PATCH 4/4] Optimize misaligned transfers, reduce code size On any misaligned input or output, always use a temp buffer. No need for special casing and bytewise ::transfer(). This should be faster as bytewise ::transfer involves a significant number of IO register accesses and setup. Thanks to @devyte for the suggestion. --- libraries/SPI/SPI.cpp | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/libraries/SPI/SPI.cpp b/libraries/SPI/SPI.cpp index caa44cc461..f67c6d69fc 100644 --- a/libraries/SPI/SPI.cpp +++ b/libraries/SPI/SPI.cpp @@ -586,28 +586,18 @@ void SPIClass::transferBytes_(const uint8_t * out, uint8_t * in, uint8_t size) { if (!((uint32_t)out & 3) && !((uint32_t)in & 3)) { // Input and output are both 32b aligned or NULL transferBytesAligned_(out, in, size); - } else if (!out) { - // Input only and misaligned, do bytewise until in aligned - while (size && ((uint32_t)in & 3)) { - *(in++) = transfer(0xff); - size--; - } - transferBytesAligned_(out, in, size); - } else if (!in) { - // Output only and misaligned, bytewise xmit until aligned - while (size && ((uint32_t)out & 3)) { - transfer(*(out++)); - size--; - } - transferBytesAligned_(out, in, size); } else { // HW FIFO has 64b limit and ::transferBytes breaks up large xfers into 64byte chunks before calling this function - // We know at this point it is a bidirectional transfer - uint8_t aligned[64]; // Stack vars will be 32b aligned + // We know at this point at least one direction is misaligned, so use temporary buffer to align everything // No need for separate out and in aligned copies, we can overwrite our out copy with the input data safely - memcpy(aligned, out, size); - transferBytesAligned_(aligned, aligned, size); - memcpy(in, aligned, size); + uint8_t aligned[64]; // Stack vars will be 32b aligned + if (out) { + memcpy(aligned, out, size); + } + transferBytesAligned_(out ? aligned : nullptr, in ? aligned : nullptr, size); + if (in) { + memcpy(in, aligned, size); + } } }