-
Notifications
You must be signed in to change notification settings - Fork 143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
STM32 DMA rework #371
STM32 DMA rework #371
Conversation
For testing I use a derived SPI: namespace modm::platform
{
template <class DmaController>
class SpiMaster1_Dma : public SpiMaster1
{
using DmaMapping = typename DmaController::template RequestMapping<Peripheral::Spi1>;
using RxChannel = DmaMapping::Rx;
using TxChannel = DmaMapping::Tx;
public:
template< class SystemClock, baudrate_t baudrate, percent_t tolerance=pct(5) >
static void
initialize()
{
using modm::platform::DmaBase;
RxChannel::configure(DmaBase::DataTransferDirection::PeripheralToMemory,
DmaBase::MemoryDataSize::Byte, DmaBase::PeripheralDataSize::Byte,
DmaBase::MemoryIncrementMode::Increment, DmaBase::PeripheralIncrementMode::Fixed,
DmaBase::Priority::High);
RxChannel::setSourceAddress(SPI1_BASE + 0x0c);
RxChannel::setTransferErrorIrqHandler(handleTransferError);
RxChannel::setTransferCompleteIrqHandler(handleReceiveComplete);
RxChannel::enableInterruptVector();
RxChannel::enableInterrupt(DmaBase::Interrupt::Error | DmaBase::Interrupt::TransferComplete);
RxChannel::template setPeripheralRequest<DmaMapping::RxRequest>();
TxChannel::configure(DmaBase::DataTransferDirection::MemoryToPeripheral,
DmaBase::MemoryDataSize::Byte, DmaBase::PeripheralDataSize::Byte,
DmaBase::MemoryIncrementMode::Increment, DmaBase::PeripheralIncrementMode::Fixed,
DmaBase::Priority::High);
TxChannel::setDestinationAddress(SPI1_BASE + 0x0c);
TxChannel::setTransferErrorIrqHandler(handleTransferError);
TxChannel::setTransferCompleteIrqHandler(handleTransmitComplete);
TxChannel::enableInterruptVector();
TxChannel::enableInterrupt(DmaBase::Interrupt::Error | DmaBase::Interrupt::TransferComplete);
TxChannel::template setPeripheralRequest<DmaMapping::TxRequest>();
SpiMaster1::initialize<SystemClock, baudrate, tolerance>();
SpiHal1::setRxFifoThreshold(SpiHal1::RxFifoThreshold::QuarterFull);
}
static uint8_t
transferBlocking(uint8_t data)
{
return RF_CALL_BLOCKING(transfer(data));
}
static void
transferBlocking(uint8_t *tx, uint8_t *rx, std::size_t length)
{
RF_CALL_BLOCKING(transfer(tx, rx, length));
}
static modm::ResumableResult<uint8_t>
transfer(uint8_t data)
{
// this is a manually implemented "fast resumable function"
// there is no context or nesting protection, since we don't need it.
// there are only two states encoded into 1 bit (LSB of state):
// 1. waiting to start, and
// 2. waiting to finish.
// LSB != Bit0 ?
if ( !(state & Bit0) )
{
// disable DMA for single byte transfer
SpiHal1::disableInterrupt(SpiBase::Interrupt::TxDmaEnable | SpiBase::Interrupt::RxDmaEnable);
// wait for previous transfer to finish
if (!SpiHal1::isTransmitRegisterEmpty())
return {modm::rf::Running};
// start transfer by copying data into register
SpiHal1::write(data);
// set LSB = Bit0
state |= Bit0;
}
if (!SpiHal1::isReceiveRegisterNotEmpty())
return {modm::rf::Running};
SpiHal1::read(data);
// transfer finished
state &= ~Bit0;
return {modm::rf::Stop, data};
}
static modm::ResumableResult<void>
transfer(uint8_t *tx, uint8_t *rx, std::size_t length)
{
// this is a manually implemented "fast resumable function"
// there is no context or nesting protection, since we don't need it.
// there are only two states encoded into 1 bit (Bit1 of state):
// 1. initialize index, and
// 2. wait for 1-byte transfer to finish.
// we are only interested in Bit1
switch(state & Bit1)
{
case 0:
// we will only visit this state once
state |= Bit1;
error = false;
SpiHal1::enableInterrupt(SpiBase::Interrupt::TxDmaEnable | SpiBase::Interrupt::RxDmaEnable);
if (tx) {
TxChannel::setSourceAddress(uint32_t(tx));
TxChannel::setMemoryIncrementMode(true);
} else {
TxChannel::setSourceAddress(uint32_t(&dmaDummy));
TxChannel::setMemoryIncrementMode(false);
}
if (rx) {
RxChannel::setDestinationAddress(uint32_t(rx));
RxChannel::setMemoryIncrementMode(true);
} else {
RxChannel::setDestinationAddress(uint32_t(&dmaDummy));
RxChannel::setMemoryIncrementMode(false);
}
RxChannel::setDataLength(length);
receiveComplete = false;
RxChannel::start();
TxChannel::setDataLength(length);
transmitComplete = false;
TxChannel::start();
[[fallthrough]];
default:
while (true) {
if (error)
break;
if (not transmitComplete and not receiveComplete)
return { modm::rf::Running };
if (SpiHal1::getInterruptFlags() & SpiBase::InterruptFlag::FifoTxLevel)
return { modm::rf::Running };
if (SpiHal1::getInterruptFlags() & SpiBase::InterruptFlag::Busy)
return { modm::rf::Running };
if (SpiHal1::getInterruptFlags() & SpiBase::InterruptFlag::FifoRxLevel)
return { modm::rf::Running };
break;
}
SpiHal1::disableInterrupt(SpiBase::Interrupt::TxDmaEnable | SpiBase::Interrupt::RxDmaEnable);
// clear the state
state &= ~Bit1;
return {modm::rf::Stop};
}
}
private:
static void
handleTransferError()
{
SpiHal1::disableInterrupt(SpiBase::Interrupt::TxDmaEnable | SpiBase::Interrupt::RxDmaEnable);
RxChannel::stop();
TxChannel::stop();
error = true;
}
static void
handleReceiveComplete()
{
RxChannel::stop();
receiveComplete = true;
}
static void
handleTransmitComplete()
{
TxChannel::stop();
transmitComplete = true;
}
static bool error;
static bool transmitComplete;
static bool receiveComplete;
// needed for transfer where no RX or TX buffers are given
static uint8_t dmaDummy;
};
template <class DmaController>
bool
SpiMaster1_Dma<DmaController>::error(false);
template <class DmaController>
bool
SpiMaster1_Dma<DmaController>::receiveComplete(false);
template <class DmaController>
bool
SpiMaster1_Dma<DmaController>::transmitComplete(false);
template <class DmaController>
uint8_t
SpiMaster1_Dma<DmaController>::dmaDummy(0);
} The following files require a change to make the above SPI work: spi_base_hpp.in enum class
Interrupt : uint32_t
{
RxBufferNotEmpty = SPI_CR2_RXNEIE,
TxBufferEmpty = SPI_CR2_TXEIE,
Error = SPI_CR2_ERRIE,
RxDmaEnable = SPI_CR2_RXDMAEN,
TxDmaEnable = SPI_CR2_TXDMAEN,
}; Add Fifo threshold enum: enum class
RxFifoThreshold : uint32_t
{
HalfFull = 0,
QuarterFull = SPI_CR2_FRXTH,
}; spi_hal_impl.hpp void
setRxFifoThreshold(RxFifoThreshold threshold)
{
SPI{{ id }}->CR2 = (SPI{{ id }}->CR2 & ~static_cast<uint32_t>(RxFifoThreshold::QuarterFull))
| static_cast<uint32_t>(threshold);
} |
Did you had time to look into it? |
In my code is an example how the device file data can be accessed: https://github.com/modm-io/modm/pull/358/files#diff-592bf27d51037d3004e1948047e9ef08R59-R94 I'll take a look on your code later. |
Looks to me I'd need to prepare the data in the module.lb file and pass it to the templates, correct? I thought, when I do this line properties["dma"] = device.get_driver("dma") in module.lb, then I could access everything of the dma section in the templates directly. But I have no clue how. |
You get the raw tree data structure converted to Python, you can dump it via: import pprint
pp = pprint.PrettyPrinter()
pp.pprint(device.get_driver("dma"))
Unfortunately this is it, there is only raw access to the data and thus I recommend converting this in Python to a simpler data structure for use in Jinja. Consider the devices files to be internal, such a verbose access wasn't intended to be used to much, and I plan to move common conversion algorithms from modm into modm-devices with a proper Python API to make this easier. It'll also decouple the XML data structure format from the Python API format. I just need to fix all the other things in modm, then I can fix that 😇. |
Thanks. Will get back to it once I finished the other task (most likely next weekend). |
Finally managed to walk through the device tree and get the information into the templates. Came across some inconsistencies, where the signal driver name has no instance (when there is only one) in the DMA data, but the peripheral has a "1" added. Also changed the way of using it a bit. E.g. for SPI it looks like: using DmaRx = modm::platform::Dma1::Channel<modm::platform::DmaBase::Channel::Channel2>;
using DmaTx = modm::platform::Dma1::Channel<modm::platform::DmaBase::Channel::Channel3>;
using Spi = modm::platform::SpiMaster1_Dma<DmaRx, DmaTx>; And the SPI class is modified to: template <class DmaRx, class DmaTx>
class SpiMaster1_Dma : public SpiMaster1
{
using RxChannel = typename DmaRx::template RequestMapping<Peripheral::Spi1, DmaBase::Signal::Rx>::Channel;
using TxChannel = typename DmaTx::template RequestMapping<Peripheral::Spi1, DmaBase::Signal::Tx>::Channel;
static constexpr DmaBase::Request RxRequest = DmaRx::template RequestMapping<Peripheral::Spi1, DmaBase::Signal::Rx>::Request;
static constexpr DmaBase::Request TxRequest = DmaTx::template RequestMapping<Peripheral::Spi1, DmaBase::Signal::Tx>::Request;
...
}; The RequestMapping struct makes sure, that the DMA channel is valid for this hardware. PS: I know I still need to update the modm base, so I expect tests to fail. |
I had another look into the F76x data sheet. The DMA works more or less the same, major difference is the naming they use. In F76x data sheet they talk about streams and channels, while for L4 they use channels and requests. At the end it is the same and I propose to stick with one naming convention. We develop code that will be used on L4 and F7, so I want to avoid different names. |
Rebased the branch to latest develop. |
Ciao, any comments on the concept or the code? Otherwise I'd start adding documentation and make it ready for merging. |
I'm not very knowledgable or experienced on DMA, so I can't really contribute beyond a superficial review, which I don't have any complaints about. However, I very much appreciate that you used the data in modm-devices, since this makes it easier to port to and verify other DMA implementations later. I would just run with this and deal with issues down the (porting) line later, it's not like any feature in xpcc/modm was ever perfect from the very start, and it's unrealistic to expect it to be. cc @rleh what say you? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm sorry I haven't been able to get a closer look at your great new DMA driver. I will try to read and review the code in the next days.
Regarding the naming: I strongly advocate to clearly name the things that ST inconsistently names as channels, streams or requests. Either as "slots" (common in literature) or as "channels" (so called in Linux). The naming should be well documented for microcontrollers other than STM32 and to avoid confusion in the future.
Have been looking around and would propose to keep channels as name. Surely it needs to be documented for MCU using a different terminology. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice work! Is there a particular reason to restrict the driver to the L431, L432, L443 or is this just the hardware you work on? Probably it wouldn't hurt to also enable other controllers from the same reference manual. They should work identically.
Yes, currently working an a L431. It is created with the L432 and L443 from the same device XML file, that's why I restricted it to those in the beginning. As my project proceeds I'll add other MCU as well (in a few weeks I'll start on a F765). I looked into the data sheets and expanded support to few more L4, where the DMA chapters are equal. |
It would be fine for me to merge this with just a few STM32L4 devices. We (or someone else) can add (and test!) support for more devices later. |
Is there any example you @mikewolfram could commit to this branch? |
If there are on more objections let’s merge it and I’ll contribute the SPI with DMA next. What else needs to be done to have it ready for merging? |
Do we need both static void setAddresses(uint32_t sourceAddress, uint32_t destinationAddress); and static void setSourceAddress(uint32_t address);
static void setDestinationAddress(uint32_t address); ? Besides: I would prefer some kind of pointer as argument, see above. |
* @param[in] increment Enable/disable | ||
*/ | ||
static void | ||
setPeripheralIncrementMode(bool increment) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if I would prefer a less hardware-specific abstraction (setSourceIncrementMode(...)
/ setDestinationIncrementMode(...)
), even if this would add complexity here...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See my comment here.
@mikewolfram I am wondering if we could extend your proposal to handle the case of a user trying to use the same DMA channels with different peripherals in one program. If we had an SPI driver like your example and a DAC driver both using DMA1 channel 3, it would fail at runtime in a probably non-obvious way. With all the template magic and power of C++ at hand, we probably could do better. When all the DMA configuration details are abstracted away, it feels to easy to me to overlook the case. If we chose to not permit this, we could detect it at build time. If we wanted to allow the use case of channels shared between peripherals, we could at least have a run-time mechanism that asserts on concurrent use (or something more sophisticated). |
Good point @chris-durand! I guess it is nearly impossible to detect multiple usages of DMA channels in the compiler across multiple compilation units. Which brings me to an idea: Similar to this hacked heap-is-not-available linker warning we could get the linker to throw an error message if DMA channels are used twice. Do we have use-cases where one DMA channel is used by multiple peripherals? In this case only a runtime-solution would be possible. |
I would remove the combined one because it is possible to get the order wrong, e.g. |
I removed the combined one and looking at the comments about the peripheral/memory data size and increment mode functions I wonder if it wouldn't be better to rename the address functions to setPeripheralAddress() and setMemoryAddress()? That way the checks whether the direction has been set could be removed. Since DMA is mostly (?) used with peripherals the only thing where the user has to think a bit more is when using DMA between memories. |
I cannot imagine how that will work unless an instance of the DMA channel is created. What could be done is to check the DMA configuration register at runtime and throw an error if it isn't 0. And a way to reset it in case someone wants to use the channel for something else. We cannot take all the responsibilities from the users, there are too many cases and possibilities. Working with DMA requires him to look into the manual anyway to figure out the right channel (I haven't found a way yet to assign default channels to peripheral from device files), so he should be able to avoid duplicate uses. |
Nah, the DMA can do all four combinations, Mem2Mem, Per2Per, Mem2Per and Per2Mem. In detail, regarding DMA channel/requests assignment: A complete solution to this resource problem would require using some form of shared data at compile time over multiple compilation units to operate on the whole picture. The compiler doesn't support that via distributed constexpr (sadly, cos that would be amazing). Similarly to the SystemClock, which describes a whole system state in one place, we would need a global DMA config (perhaps with hints for runtime priority arbitration), but I think this is only user-friendly with an external GUI tool (for a HTML+JS clone of CubeMX using modm-devices), that understands the whole device configuration and outputs modm-flavoured C++ code for GPIO, DMA, SystemClock. Btw, even "simple" things like GPIO<->peripheral connections aren't really safe, due to a lack of compile-time statefulness. This makes me quite sad, but of course I understand the technical limitations of compiler architecture. As pointed out before, the linker can help here, but it's quite a high-effort solution. Uart1::connect<GpioD0::Tx>();
Uart1::connect<GpioA0::Tx>(); // Reconnecting, but no compile time warning possible I'm all for doing sanity checks in C++, but a fully fledged solution will requires some centralized approach (sadly), which is out-of-scope of this PR and probably out-of-motivation of Mike ;-P In general: I'm worried about the tendency of modm PRs to go stale after a while, so I'd prefer to merge a solution that works now within some limited scope (here some L4 devices) and then collect more experience with this code to improve later. lbuild supports this "conditional composition" of modm features very well, so there should be no adverse impact on user code (apart from the DMA only being available for some devices). This isn't a weakness either, I don't recall a single feature I added, that I didn't have to rework again later. Fazit: I'm very ok with merging "feature-incomplete" PRs. |
The data sheets I read so far were of STM32 only supporting Mem2Mem, Per2Mem and Mem2Per. I haven't seen one supporting Per2Per. When a channel is configured one has to tell the direction, which is either Per2Mem, Mem2Per or Mem2Mem (one the MCU I know of). In that case the specific functions make sense. But whether to use setSourceAddress() and setDestinationAddress() or setPeripheralAddress() and setMemoryAddress() is a minor detail at the end of the day. I'm with you, using DMA is a bit more sophisticated. For most of the users it will be hidden in the hardware driver, all he has to do is to provide which channels to use. |
I think still open is the question whether to name the methods setSourceAddress() and setDestinationAddress() or stick with the setPeripheralAddress() and setMemoryAddress()? Would be great to get this done soon. I want to provide the SPI DMA as well. |
Just stay with the name you prefer. |
Well, then I'd change it to setPeripheralAddress() and setMemoryAddress(), which seems more reasonable since we also use PerihperalDataSize and MemoryDataSize. One more question, do I need to rebase this PR on the latest develop for merging? |
Maintainer says "yes please". |
So we will merge this first and later you add the DMA-accelerated SPI driver and an example in another pull request? |
Exactly. |
29083f1
to
6f7f5f8
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Go go go!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Go!
Working on a new implementation of DMA for STM32. Started with support for L4 (L431). As discussed in #358 this PR is to discuss and finish the work.
Things to do: