Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 76 additions & 73 deletions Marlin/src/HAL/HAL_AVR/math_AVR.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,92 +23,95 @@
#ifndef _MATH_AVR_H_
#define _MATH_AVR_H_

#define a(CODE) " " CODE "\n\t"

/**
* Optimized math functions for AVR
*/

// intRes = longIn1 * longIn2 >> 24
// uses:
// r26 to store 0
// r27 to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
// A[tmp] to store 0
// B[tmp] to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
// note that the lower two bytes and the upper byte of the 48bit result are not calculated.
// this can cause the result to be out by one as the lower bytes may cause carries into the upper ones.
// B0 A0 are bits 24-39 and are the returned value
// C1 B1 A1 is longIn1
// D2 C2 B2 A2 is longIn2
// B A are bits 24-39 and are the returned value
// C B A is longIn1
// D C B A is longIn2
//
#define MultiU24X32toH16(intRes, longIn1, longIn2) \
asm volatile ( \
A("clr r26") \
A("mul %A1, %B2") \
A("mov r27, r1") \
A("mul %B1, %C2") \
A("movw %A0, r0") \
A("mul %C1, %C2") \
A("add %B0, r0") \
A("mul %C1, %B2") \
A("add %A0, r0") \
A("adc %B0, r1") \
A("mul %A1, %C2") \
A("add r27, r0") \
A("adc %A0, r1") \
A("adc %B0, r26") \
A("mul %B1, %B2") \
A("add r27, r0") \
A("adc %A0, r1") \
A("adc %B0, r26") \
A("mul %C1, %A2") \
A("add r27, r0") \
A("adc %A0, r1") \
A("adc %B0, r26") \
A("mul %B1, %A2") \
A("add r27, r1") \
A("adc %A0, r26") \
A("adc %B0, r26") \
A("lsr r27") \
A("adc %A0, r26") \
A("adc %B0, r26") \
A("mul %D2, %A1") \
A("add %A0, r0") \
A("adc %B0, r1") \
A("mul %D2, %B1") \
A("add %B0, r0") \
A("clr r1") \
: \
"=&r" (intRes) \
: \
"d" (longIn1), \
"d" (longIn2) \
: \
"r26" , "r27" \
)
static FORCE_INLINE uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2) {
register uint8_t tmp1;
register uint8_t tmp2;
register uint16_t intRes;
__asm__ __volatile__(
A("clr %[tmp1]")
A("mul %A[longIn1], %B[longIn2]")
A("mov %[tmp2], r1")
A("mul %B[longIn1], %C[longIn2]")
A("movw %A[intRes], r0")
A("mul %C[longIn1], %C[longIn2]")
A("add %B[intRes], r0")
A("mul %C[longIn1], %B[longIn2]")
A("add %A[intRes], r0")
A("adc %B[intRes], r1")
A("mul %A[longIn1], %C[longIn2]")
A("add %[tmp2], r0")
A("adc %A[intRes], r1")
A("adc %B[intRes], %[tmp1]")
A("mul %B[longIn1], %B[longIn2]")
A("add %[tmp2], r0")
A("adc %A[intRes], r1")
A("adc %B[intRes], %[tmp1]")
A("mul %C[longIn1], %A[longIn2]")
A("add %[tmp2], r0")
A("adc %A[intRes], r1")
A("adc %B[intRes], %[tmp1]")
A("mul %B[longIn1], %A[longIn2]")
A("add %[tmp2], r1")
A("adc %A[intRes], %[tmp1]")
A("adc %B[intRes], %[tmp1]")
A("lsr %[tmp2]")
A("adc %A[intRes], %[tmp1]")
A("adc %B[intRes], %[tmp1]")
A("mul %D[longIn2], %A[longIn1]")
A("add %A[intRes], r0")
A("adc %B[intRes], r1")
A("mul %D[longIn2], %B[longIn1]")
A("add %B[intRes], r0")
A("clr r1")
: [intRes] "=&r" (intRes),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2)
: [longIn1] "d" (longIn1),
[longIn2] "d" (longIn2)
: "cc"
);
return intRes;
}

// intRes = intIn1 * intIn2 >> 16
// uses:
// r26 to store 0
// r27 to store the byte 1 of the 24 bit result
#define MultiU16X8toH16(intRes, charIn1, intIn2) \
asm volatile ( \
A("clr r26") \
A("mul %A1, %B2") \
A("movw %A0, r0") \
A("mul %A1, %A2") \
A("add %A0, r1") \
A("adc %B0, r26") \
A("lsr r0") \
A("adc %A0, r26") \
A("adc %B0, r26") \
A("clr r1") \
: \
"=&r" (intRes) \
: \
"d" (charIn1), \
"d" (intIn2) \
: \
"r26" \
)

static FORCE_INLINE uint16_t MultiU16X8toH16(uint8_t charIn1, uint16_t intIn2) {
register uint8_t tmp;
register uint16_t intRes;
__asm__ __volatile__ (
A("clr %[tmp]")
A("mul %[charIn1], %B[intIn2]")
A("movw %A[intRes], r0")
A("mul %[charIn1], %A[intIn2]")
A("add %A[intRes], r1")
A("adc %B[intRes], %[tmp]")
A("lsr r0")
A("adc %A[intRes], %[tmp]")
A("adc %B[intRes], %[tmp]")
A("clr r1")
: [intRes] "=&r" (intRes),
[tmp] "=&r" (tmp)
: [charIn1] "d" (charIn1),
[intIn2] "d" (intIn2)
: "cc"
);
return intRes;
}

#endif // _MATH_AVR_H_
8 changes: 5 additions & 3 deletions Marlin/src/HAL/math_32bit.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
#ifndef MATH_32BIT_H
#define MATH_32BIT_H

#include "../core/macros.h"

/**
* Math helper functions for 32 bit CPUs
*/

#define MultiU32X32toH32(intRes, longIn1, longIn2) intRes = ((uint64_t)longIn1 * longIn2 + 0x80000000) >> 32
#define MultiU32X24toH32(intRes, longIn1, longIn2) intRes = ((uint64_t)longIn1 * longIn2 + 0x00800000) >> 24
static FORCE_INLINE uint32_t MultiU32X24toH32(uint32_t longIn1, uint32_t longIn2) {
return ((uint64_t)longIn1 * longIn2 + 0x00800000) >> 24;
}

#endif // MATH_32BIT_H
23 changes: 9 additions & 14 deletions Marlin/src/module/stepper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,12 @@ HAL_STEP_TIMER_ISR {
HAL_timer_isr_epilogue(STEP_TIMER_NUM);
}

#ifdef CPU_32_BIT
#define STEP_MULTIPLY(A,B) MultiU32X24toH32(A, B);
#else
#define STEP_MULTIPLY(A,B) MultiU24X32toH16(A, B);
#endif

void Stepper::isr() {

#define ENDSTOP_NOMINAL_OCR_VAL 1500 * HAL_TICKS_PER_US // Check endstops every 1.5ms to guarantee two stepper ISRs within 5ms for BLTouch
Expand Down Expand Up @@ -1525,14 +1531,7 @@ void Stepper::isr() {
? _eval_bezier_curve(acceleration_time)
: current_block->cruise_rate;
#else
#ifdef CPU_32_BIT
MultiU32X24toH32(acc_step_rate, acceleration_time, current_block->acceleration_rate);
#else
MultiU24X32toH16(acc_step_rate, acceleration_time, current_block->acceleration_rate);
#endif
acc_step_rate += current_block->initial_rate;

// upper limit
acc_step_rate = STEP_MULTIPLY(acceleration_time, current_block->acceleration_rate) + current_block->initial_rate;
NOMORE(acc_step_rate, current_block->nominal_rate);
#endif

Expand Down Expand Up @@ -1576,18 +1575,14 @@ void Stepper::isr() {
#else

// Using the old trapezoidal control
#ifdef CPU_32_BIT
MultiU32X24toH32(step_rate, deceleration_time, current_block->acceleration_rate);
#else
MultiU24X32toH16(step_rate, deceleration_time, current_block->acceleration_rate);
#endif

step_rate = STEP_MULTIPLY(deceleration_time, current_block->acceleration_rate);
if (step_rate < acc_step_rate) { // Still decelerating?
step_rate = acc_step_rate - step_rate;
NOLESS(step_rate, current_block->final_rate);
}
else
step_rate = current_block->final_rate;

#endif

// step_rate to timer interval
Expand Down
20 changes: 10 additions & 10 deletions Marlin/src/module/stepper.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,24 +340,24 @@ class Stepper {

#ifdef CPU_32_BIT
// In case of high-performance processor, it is able to calculate in real-time
const uint32_t MIN_TIME_PER_STEP = (HAL_STEPPER_TIMER_RATE) / ((STEP_DOUBLER_FREQUENCY) * 2);
const uint32_t min_time_per_step = (HAL_STEPPER_TIMER_RATE) / ((STEP_DOUBLER_FREQUENCY) * 2);
timer = uint32_t(HAL_STEPPER_TIMER_RATE) / step_rate;
NOLESS(timer, MIN_TIME_PER_STEP); // (STEP_DOUBLER_FREQUENCY * 2 kHz - this should never happen)
NOLESS(timer, min_time_per_step); // (STEP_DOUBLER_FREQUENCY * 2 kHz - this should never happen)
#else
NOLESS(step_rate, F_CPU / 500000);
step_rate -= F_CPU / 500000; // Correct for minimal speed
if (step_rate >= (8 * 256)) { // higher step rate
unsigned short table_address = (unsigned short)&speed_lookuptable_fast[(unsigned char)(step_rate >> 8)][0];
unsigned char tmp_step_rate = (step_rate & 0x00FF);
unsigned short gain = (unsigned short)pgm_read_word_near(table_address + 2);
MultiU16X8toH16(timer, tmp_step_rate, gain);
timer = (unsigned short)pgm_read_word_near(table_address) - timer;
uint8_t tmp_step_rate = (step_rate & 0x00FF);
uint16_t table_address = (uint16_t)&speed_lookuptable_fast[(uint8_t)(step_rate >> 8)][0];
uint16_t gain = (uint16_t)pgm_read_word_near(table_address + 2);
timer = MultiU16X8toH16(tmp_step_rate, gain);
timer = (uint16_t)pgm_read_word_near(table_address) - timer;
}
else { // lower step rates
unsigned short table_address = (unsigned short)&speed_lookuptable_slow[0][0];
uint16_t table_address = (uint16_t)&speed_lookuptable_slow[0][0];
table_address += ((step_rate) >> 1) & 0xFFFC;
timer = (unsigned short)pgm_read_word_near(table_address);
timer -= (((unsigned short)pgm_read_word_near(table_address + 2) * (unsigned char)(step_rate & 0x0007)) >> 3);
timer = (uint16_t)pgm_read_word_near(table_address);
timer -= (((uint16_t)pgm_read_word_near(table_address + 2) * (uint8_t)(step_rate & 0x0007)) >> 3);
}
if (timer < 100) { // (20kHz - this should never happen)
timer = 100;
Expand Down