From c945d04ea53bc262048746de9d1db9b7b469c092 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Fri, 26 Aug 2016 22:08:27 +0200 Subject: [PATCH] Extract stable and useful features from cp2k fprettify Examples and improved tester add README.md --- README.md | 54 ++ examples/fortran_after.f90 | 257 ++++++ examples/fortran_before.f90 | 258 ++++++ formatting/__init__.py | 0 formatting/normalizeFortranFile.py | 1149 -------------------------- formatting/reformatFortranFile.py | 908 -------------------- formatting/replacer.py | 39 - formatting/selftest.py | 262 ------ fparse_utils.py | 155 ++++ fprettify.py | 1227 +++++++++++++++++++++------- setup.py | 6 +- 11 files changed, 1669 insertions(+), 2646 deletions(-) create mode 100644 README.md create mode 100644 examples/fortran_after.f90 create mode 100644 examples/fortran_before.f90 delete mode 100644 formatting/__init__.py delete mode 100644 formatting/normalizeFortranFile.py delete mode 100644 formatting/reformatFortranFile.py delete mode 100644 formatting/replacer.py delete mode 100644 formatting/selftest.py create mode 100644 fparse_utils.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..a1f034d --- /dev/null +++ b/README.md @@ -0,0 +1,54 @@ +# fprettify + +fprettify is an auto-formatter for modern Fortran code that imposes strict whitespace formatting. + + +## Features + +* Auto-indentation. +* Line continuations are aligned with the previous opening delimiter `(`, `[` or `(/` or with an assignment operator `=` or `=>`. If none of the above is present, a default hanging indent is applied. +* All operators are surrounded by exactly one whitespace character, except for arithmetic operators. +* Removal of extraneous whitespace and consecutive blank lines. +* Works only for modern Fortran (Fortran 90 upwards). + + +## Requirements + +Python 2.7 or Python 3.x + + +## Examples + +Have a look at examples/fortran_after.f90 to see reformatted Fortran code. + + +## Installation + +``` +./setup install +``` + +For local installation, use `--user` option. + + +## Usage + +``` +fprettify file1, file2, ... +``` +The default indent is 3. If you prefer something else, use `--indent=` argument. + +For editor integration, use +``` +fprettify --no-report-errors +``` + +For more information, read +``` +fprettify --help +``` + + +## Trivia + +fprettify is part of the coding conventions of [CP2K](https://www.cp2k.org/) and thus tested with a large code base. Compared with CP2K's internal version (cp2k branch), this version is reduced in functionality. It contains only stable and general features that don't rely on specific coding conventions. diff --git a/examples/fortran_after.f90 b/examples/fortran_after.f90 new file mode 100644 index 0000000..11bc51a --- /dev/null +++ b/examples/fortran_after.f90 @@ -0,0 +1,257 @@ +module prettify_selftest + implicit none + private + public :: dp, test_routine, & + test_function, test_type, str_function + integer, parameter :: dp = selected_real_kind(15, 307) + type test_type + real(kind=dp) :: r = 1.0d-3 + integer :: i + end type test_type + +contains + + subroutine test_routine( & + r, i, j, k, l) + integer, intent(in) :: r, i, j, k + integer, intent(out) :: l + + l = test_function(r, i, j, k) + end & + subroutine + + pure function test_function(r, i, j, & + k) & + result(l) + integer, intent(in) :: r, i, j, k + integer :: l + + l = r + i + j + k + end function + function & + str_function(a) result(l) + character(len=*) :: a + integer :: l + + if (len(a) < 5) then + l = 0 + else + l = 1 + endif + end function + +end module + +program example_prog + use example, only: dp, test_routine, test_function, test_type,str_function + + implicit none + integer :: r, i, j, k, l, my_integer, m + integer, dimension(5) :: arr + integer, dimension(20) :: big_arr + integer :: endif + type(test_type) :: t + real(kind=dp) :: r1, r2, r3, r4, r5, r6 + integer, pointer :: point + + point => null() + +! 1) white space formatting ! +!***************************! +! example 1.1 + r = 1; i = -2; j = 3; k = 4; l = 5 + r2 = 0.0_dp; r3 = 1.0_dp; r4 = 2.0_dp; r5 = 3.0_dp; r6 = 4.0_dp + r1 = -(r2**i*(r3 + r5*(-r4) - r6)) - 2.e+2 + if (r .eq. 2 .and. r <= 5) i = 3 + write (*, *) (merge(3, 1, i <= 2)) + write (*, *) test_function(r, i, j, k) + t%r = 4.0_dp + t%i = str_function("t % i = ") + +! example 1.2 + my_integer = 2 + i = 3 + j = 5 + + big_arr = [1, 2, 3, 4, 5, & + 6, 7, 8, 9, 10, & + 11, 12, 13, 14, 15, & + 16, 17, 18, 19, 20] + +! example 1.3: disabling auto-formatter: + my_integer = 2 !& + i = 3 !& + j = 5 !& + +!&< + my_integer = 2 + i = 3 + j = 5 +!&> + + big_arr = [ 1, 2, 3, 4, 5, & !& + 6, 7, 8, 9, 10, & !& + 11, 12, 13, 14, 15, & !& + 16, 17, 18, 19, 20] !& + +! example 1.4: + + big_arr = [1, 2, 3, 4, 5,& + & 6, 7, 8, 9, 10, & + & 11, 12, 13, 14, 15,& + &16, 17, 18, 19, 20] + +! 2) auto indentation for loops ! +!*******************************! + +! example 2.1 + l = 0 + do r = 1, 10 + select case (r) + case (1) + do_label: do i = 1, 100 + if (i <= 2) then + m = 0 + do while (m < 4) + m = m + 1 + do k = 1, 3 + if (k == 1) l = l + 1 + end do + enddo + endif + enddo do_label + case (2) + l = i + j + k + end select + enddo + +! example 2.2 + do m = 1, 2 + do r = 1, 3 + write (*, *) r + do k = 1, 4 + do l = 1, 3 + do i = 4, 5 + do my_integer = 1, 1 + do j = 1, 2 + write (*, *) test_function(m, r, k, l) + i + enddo + enddo + enddo + enddo + enddo + enddo + enddo + +! 3) auto alignment for linebreaks ! +!************************************! + +! example 3.1 + l = test_function(1, 2, test_function(1, 2, 3, 4), 4) + 3*(2 + 1) + + l = test_function(1, 2, test_function(1, 2, 3, 4), 4) + & + 3*(2 + 1) + + l = test_function(1, 2, & + test_function(1, 2, 3, 4), 4) + & + 3*(2 + 1) + + l = test_function(1, 2, & + test_function(1, 2, 3, & + 4), 4) + & + 3*(2 + 1) + +! example 3.2 + arr = [1, (/3, 4, 5/), 6] + [1, 2, 3, 4, 5] + + arr = [1, (/3, 4, 5/), & + 6] + [1, 2, 3, 4, 5] + + arr = [1, (/3, 4, 5/), & + 6] + & + [1, 2, 3, 4, 5] + + arr = [1, (/3, 4, & + 5/), & + 6] + & + [1, 2, 3, 4, 5] + +! example 3.3 + l = test_function(1, 2, & + 3, 4) + + l = test_function( & + 1, 2, 3, 4) + + arr = [1, 2, & + 3, 4, 5] + arr = [ & + 1, 2, 3, 4, 5] + +! 4) more complex formatting and tricky test cases ! +!**************************************************! + +! example 4.1 + l = 0 + do r = 1, 10 + select case (r) + case (1) + do i = 1, 100; if (i <= 2) then ! comment + do j = 1, 5 + do k = 1, 3 + l = l + 1 +! unindented comment + ! indented comment + end do; enddo + elseif (.not. j == 4) then + my_integer = 4 + else + write (*, *) " hello" + endif + enddo + case (2) + l = i + j + k + end select + enddo + +! example 4.2 + if ( & + l == & + 111) & + then + do k = 1, 2 + if (k == 1) & + l = test_function(1, & + test_function(r=4, i=5, & + j=6, k=test_function(1, 2*(3*(1 + 1)), str_function(")a!(b['(;=dfe"), & + 9) + & + test_function(1, 2, 3, 4)), 9, 10) & + ! test_function(1,2,3,4)),9,10) & + ! +13*str_function('') + str_function('"') + + 13*str_function('') + str_function('"') + end & ! comment + ! comment + do + endif + +! example 4.3 + arr = [1, (/3, 4, & + 5/), & + 6] + & + [1, 2, 3, 4, 5]; arr = [1, 2, & + 3, 4, 5] + +! example 4.4 + endif = 3 + if (endif == 2) then + endif = 5 + else if (endif == 3) then + write (*, *) endif + endif + +! example 4.5 + do i = 1, 2; if (.true.) then + write (*, *) "hello" + endif; enddo + +end program diff --git a/examples/fortran_before.f90 b/examples/fortran_before.f90 new file mode 100644 index 0000000..6ae6a18 --- /dev/null +++ b/examples/fortran_before.f90 @@ -0,0 +1,258 @@ +module prettify_selftest + implicit none + private + public :: dp, test_routine, & + test_function, test_type, str_function + integer, parameter :: dp = selected_real_kind ( 15 , 307) + type test_type + real (kind =dp ) :: r = 1.0d-3 + integer :: i + end type test_type + +contains + + + subroutine test_routine( & + r, i, j, k, l) + integer, intent(in) :: r, i, j, k + integer, intent (out) :: l + + l = test_function(r,i,j,k) + end & +subroutine + + pure function test_function(r, i, j, & + k) & + result(l) + integer, intent(in) :: r, i, j, k + integer :: l + + l=r + i +j +k + end function + function & + str_function(a) result(l) + character(len=*) :: a + integer :: l + + if(len(a)<5)then + l=0 + else + l=1 + endif +end function + +end module + +program example_prog + use example, only: dp, test_routine, test_function, test_type,str_function + + implicit none + integer :: r,i,j,k,l,my_integer,m + integer, dimension(5) :: arr + integer, dimension(20) :: big_arr +integer :: endif + type(test_type) :: t +real(kind=dp) :: r1, r2, r3, r4, r5, r6 + integer, pointer :: point + + point=> null( ) + +! 1) white space formatting ! +!***************************! +! example 1.1 + r=1;i=-2;j=3;k=4;l=5 + r2 = 0.0_dp; r3= 1.0_dp; r4 =2.0_dp; r5=3.0_dp; r6 = 4.0_dp + r1=-(r2**i*(r3+r5*(-r4)-r6))-2.e+2 + if( r.eq.2.and.r<=5) i=3 + write(*, *)(merge(3, 1, i<=2)) + write(*, *)test_function(r,i,j , k) + t % r = 4.0_dp + t%i = str_function( "t % i = " ) + +! example 1.2 + my_integer=2 + i=3 + j=5 + + big_arr = [1, 2, 3, 4, 5, & + 6, 7, 8, 9, 10, & + 11, 12, 13, 14, 15, & + 16, 17, 18, 19, 20] + +! example 1.3: disabling auto-formatter: + my_integer = 2 !& + i = 3 !& + j = 5 !& + +!&< + my_integer = 2 + i = 3 + j = 5 +!&> + + big_arr = [ 1, 2, 3, 4, 5, & !& + 6, 7, 8, 9, 10, & !& + 11, 12, 13, 14, 15, & !& + 16, 17, 18, 19, 20] !& + +! example 1.4: + + big_arr = [1, 2, 3, 4, 5,& + & 6, 7, 8, 9, 10, & + & 11, 12, 13, 14, 15,& + &16, 17, 18, 19, 20] + +! 2) auto indentation for loops ! +!*******************************! + +! example 2.1 + l = 0 + do r= 1 , 10 + select case (r) + case(1) + do_label: do i = 1,100 + if (i<=2) then + m =0 + do while(m <4) + m =m+1 + do k=1,3 + if (k==1) l =l +1 + end do + enddo + endif + enddo do_label + case ( 2 ) + l=i + j + k + end select + enddo + +! example 2.2 + do m = 1, 2 + do r = 1, 3 + write (*, *) r + do k = 1, 4 + do l = 1, 3 + do i = 4, 5 + do my_integer = 1, 1 + do j = 1, 2 + write (*, *) test_function(m, r, k, l) + i + enddo + enddo + enddo + enddo + enddo + enddo + enddo + +! 3) auto alignment for linebreaks ! +!************************************! + +! example 3.1 + l = test_function(1, 2, test_function(1, 2, 3, 4), 4) + 3 *(2+1) + + l = test_function (1, 2, test_function(1,2, 3, 4),4) +& + 3*(2+ 1 ) + + l = test_function(1, 2, & + test_function(1, 2, 3, 4), 4)+ & + 3 * (2+1) + + l = test_function(1, 2, & + test_function(1, 2, 3, & + 4), 4) + & + 3*(2 + 1) + +! example 3.2 + arr = [1, (/3,4, 5/), 6] + [ 1, 2,3, 4,5 ] + + arr = [1,(/ 3, 4, 5 /) , & + 6] +[1,2, 3, 4, 5 ] + + arr = [1,(/3,4,5/), & + 6]+ & + [1, 2, 3, 4, 5] + + arr = [1, (/3, 4, & + 5/), & + 6] + & + [1, 2,3, 4, 5 ] + +! example 3.3 + l = test_function(1, 2, & + 3, 4) + + l = test_function( & + 1, 2, 3, 4) + + arr = [1, 2, & + 3, 4, 5] + arr = [ & + 1, 2, 3, 4, 5] + +! 4) more complex formatting and tricky test cases ! +!**************************************************! + +! example 4.1 + l = 0 + do r = 1, 10 + select case ( r ) + case( 1) + do i=1,100;if (i<=2) then! comment + do j = 1,5 + do k= 1, 3 + l = l + 1 +! unindented comment + ! indented comment + end do; enddo + elseif ( .not. j ==4 ) then + my_integer = 4 + else + write (*,*) " hello" + endif + enddo + case(2 ) + l = i+ j + k + end select + enddo + +! example 4.2 + if ( & + l == & + 111) & + then + do k = 1, 2 + if (k == 1) & + l = test_function(1, & + test_function(r=4, i=5, & + j=6, k=test_function(1,2*(3*(1 +1)), str_function ( ")a!(b['(;=dfe"), & + 9) + & + test_function(1, 2, 3, 4)), 9, 10) & + ! test_function(1,2,3,4)),9,10) & + ! +13*str_function('') + str_function('"') + + 13*str_function('') + str_function('"') + end & ! comment + ! comment + do + endif + +! example 4.3 + arr = [1,( /3,4, & + 5 /),& + 6 ]+ & + [1,2, 3, 4,5] ; arr = [1, 2,& + 3, 4, 5] + +! example 4.4 + endif = 3 + if(endif==2)then + endif=5 + else if(endif==3)then + write(*,*)endif + endif + +! example 4.5 + do i=1,2;if(.true.)then + write(*, *)"hello" + endif; enddo + + end program diff --git a/formatting/__init__.py b/formatting/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/formatting/normalizeFortranFile.py b/formatting/normalizeFortranFile.py deleted file mode 100644 index e17ef34..0000000 --- a/formatting/normalizeFortranFile.py +++ /dev/null @@ -1,1149 +0,0 @@ -import sys -import re -import string -from sys import argv -from collections import deque -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - -rUse = 0 -rVar = 0 -varRe = re.compile(r" *(?P[a-zA-Z_0-9]+) *(?P(?:\((?P(?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*)\))? *(?:= *(?P(:?[^\"',()]+|\((?:[^()\"']+|\([^()\"']*\)|\"[^\"]*\"|'[^']*')*\)|\"[^\"]*\"|'[^']*')+))?)? *(?:(?P,)|\n?) *", re.IGNORECASE) -useParseRe = re.compile( - r" *use +(?P[a-zA-Z_][a-zA-Z_0-9]*)(?P *, *only *:)? *(?P.*)$", - flags=re.IGNORECASE) -commonUsesRe = re.compile( - "^#include *\"([^\"]*(cp_common_uses.f90|base_uses.f90))\"") -localNameRe = re.compile( - " *(?P[a-zA-Z_0-9]+)(?: *= *> *[a-zA-Z_0-9]+)? *$") -typeRe = re.compile(r" *(?Pinteger(?: *\* *[0-9]+)?|logical|character(?: *\* *[0-9]+)?|real(?: *\* *[0-9]+)?|complex(?: *\* *[0-9]+)?|type) *(?P\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))? *(?P(?: *, *[a-zA-Z_0-9]+(?: *\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))?)+)? *(?P::)?(?P[^\n]+)\n?", re.IGNORECASE) # $ -indentSize = 2 -decllinelength = 100 -decloffset = 50 - -ompDirRe = re.compile(r"^\s*(!\$omp)", re.IGNORECASE) -ompRe = re.compile(r"^\s*(!\$)", re.IGNORECASE) - - -class CharFilter(object): - """ - An iterator to wrap the iterator returned by `enumerate` - and ignore comments and characters inside strings - """ - - def __init__(self, it): - self._it = it - self._instring = '' - - def __iter__(self): - return self - - def __next__(self): - """ python 3 version""" - pos, char = next(self._it) - if not self._instring and char == '!': - raise StopIteration - - # detect start/end of a string - if char == '"' or char == "'": - if self._instring == char: - self._instring = '' - elif not self._instring: - self._instring = char - - if self._instring: - return self.__next__() - - return (pos, char) - - def next(self): - """ python 2 version""" - pos, char = self._it.next() - if not self._instring and char == '!': - raise StopIteration - - # detect start/end of a string - if char == '"' or char == "'": - if self._instring == char: - self._instring = '' - elif not self._instring: - self._instring = char - - if self._instring: - return self.next() - - return (pos, char) - - -class InputStream(object): - """ - Class to read logical Fortran lines from a Fortran file. - """ - - def __init__(self, infile): - self.line_buffer = deque([]) - self.infile = infile - self.line_nr = 0 - - def nextFortranLine(self): - """Reads a group of connected lines (connected with &, separated by newline or semicolon) - returns a touple with the joined line, and a list with the original lines. - Doesn't support multiline character constants! - """ - lineRe = re.compile( - # $ - r"(?:(?P#.*\n?)| *(&)?(?P(?:!\$|[^&!\"']+|\"[^\"]*\"|'[^']*')*)(?P&)? *(?P!.*)?\n?)", - re.IGNORECASE) - joinedLine = "" - comments = [] - lines = [] - continuation = 0 - - while 1: - if not self.line_buffer: - line = self.infile.readline().replace("\t", 8 * " ") - self.line_nr += 1 - # convert OMP-conditional fortran statements into normal fortran statements - # but remember to convert them back - is_omp_conditional = False - omp_indent = 0 - if ompRe.match(line): - omp_indent = len(line) - len(line.lstrip(' ')) - line = ompRe.sub('', line, count=1) - is_omp_conditional = True - line_start = 0 - for pos, char in CharFilter(enumerate(line)): - if char == ';' or pos + 1 == len(line): - self.line_buffer.append(omp_indent * ' ' + '!$' * is_omp_conditional + - line[line_start:pos + 1]) - omp_indent = 0 - is_omp_conditional = False - line_start = pos + 1 - if(line_start < len(line)): - # line + comment - self.line_buffer.append('!$' * is_omp_conditional + - line[line_start:]) - - if self.line_buffer: - line = self.line_buffer.popleft() - - if not line: - break - - lines.append(line) - m = lineRe.match(line) - if not m or m.span()[1] != len(line): - # FIXME: does not handle line continuation of - # omp conditional fortran statements - # starting with an ampersand. - raise SyntaxError("unexpected line format:" + repr(line)) - if m.group("preprocessor"): - if len(lines) > 1: - raise SyntaxError( - "continuation to a preprocessor line not supported " + repr(line)) - comments.append(line) - break - coreAtt = m.group("core") - if ompRe.match(coreAtt) and joinedLine.strip(): - # remove omp '!$' for line continuation - coreAtt = ompRe.sub('', coreAtt, count=1).lstrip() - joinedLine = joinedLine.rstrip("\n") + coreAtt - if coreAtt and not coreAtt.isspace(): - continuation = 0 - if m.group("continue"): - continuation = 1 - if line.lstrip().startswith('!') and not ompRe.search(line): - comments.append(line.rstrip('\n')) - elif m.group("comment"): - comments.append(m.group("comment")) - else: - comments.append('') - if not continuation: - break - return (joinedLine, comments, lines) - - -def parseRoutine(inFile): - """Parses a routine""" - startRe = re.compile( - r" *(?:recursive +|pure +|elemental +)*(?:subroutine|function)", re.IGNORECASE) - endRe = re.compile(r" *end\s*(?:subroutine|function)", re.IGNORECASE) - startRoutineRe = re.compile( - r" *(?:recursive +|pure +|elemental +)*(?Psubroutine|function) +(?P[a-zA-Z_][a-zA-Z_0-9]*) *(?:\((?P[^()]*)\))? *(?:result *\( *(?P[a-zA-Z_][a-zA-Z_0-9]*) *\))? *(?:bind *\([^()]+\))? *\n?", re.IGNORECASE) # $ - typeBeginRe = re.compile(r" *(?Pinteger(?: *\* *[0-9]+)?|logical|character(?: *\* *[0-9]+)?|real(?: *\* *[0-9]+)?|complex(?: *\* *[0-9]+)?|type)[,( ]", - re.IGNORECASE) - attributeRe = re.compile( - r" *, *(?P[a-zA-Z_0-9]+) *(?:\( *(?P(?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*)\))? *", re.IGNORECASE) - ignoreRe = re.compile(r" *(?:|implicit +none *)$", re.IGNORECASE) - interfaceStartRe = re.compile(r" *interface *$", re.IGNORECASE) - interfaceEndRe = re.compile(r" *end +interface *$", re.IGNORECASE) - routine = {'preRoutine': [], - 'core': [], - 'strippedCore': [], - 'begin': [], - 'end': [], - 'preDeclComments': [], - 'declarations': [], - 'declComments': [], - 'postDeclComments': [], - 'parsedDeclarations': [], - 'postRoutine': [], - 'kind': None, 'name': None, 'arguments': None, 'result': None, - 'interfaceCount': 0, - 'use': [] - } - includeRe = re.compile( - r"#? *include +[\"'](?P.+)[\"'] *$", re.IGNORECASE) - stream = InputStream(inFile) - while 1: - (jline, _, lines) = stream.nextFortranLine() - if len(lines) == 0: - break - if startRe.match(jline): - break - routine['preRoutine'].extend(lines) - m = includeRe.match(lines[0]) - if m: - try: - subF = file(m.group('file')) - subStream = InputStream(subF) - while 1: - (subjline, _, sublines) = subStream.nextFortranLine() - if not sublines: - break - routine['strippedCore'].append(subjline) - subF.close() - except: - import traceback - sys.stderr.write( - "error trying to follow include " + m.group('file') + '\n') - sys.stderr.write( - "warning this might lead to the removal of used variables\n") - traceback.print_exc() - if jline: - routine['begin'] = lines - m = startRoutineRe.match(jline) - if not m or m.span()[1] != len(jline): - raise SyntaxError( - "unexpected subroutine start format:" + repr(lines)) - routine['name'] = m.group('name') - routine['kind'] = m.group('kind') - if (m.group('arguments') and m.group('arguments').strip()): - routine['arguments'] = map(lambda x: x.strip(), - m.group('arguments').split(",")) - if (m.group('result')): - routine['result'] = m.group('result') - if (not routine['result'])and(routine['kind'].lower() == "function"): - routine['result'] = routine['name'] - while 1: - (jline, comment_list, lines) = stream.nextFortranLine() - comments = '\n'.join(_ for _ in comment_list) - if len(lines) == 0: - break - if lines[0].lower().startswith("#include"): - break - if not ignoreRe.match(jline): - if typeBeginRe.match(jline): - if routine['postDeclComments']: - routine['declComments'].extend(routine['postDeclComments']) - routine['postDeclComments'] = [] - - if typeBeginRe.match(jline): - m = typeRe.match(jline) - if (m.group('type').lower() == 'type' and - not m.group('parameters')): - break - if not m or m.span()[1] != len(jline): - raise SyntaxError("unexpected type format:" + repr(jline)) - decl = {'type': m.group("type"), - 'parameters': None, - 'attributes': [], - 'vars': []} - if m.group('parameters'): - decl['parameters'] = (m.group("parameters").replace(" ", ""). - replace(",", ", ")) - str = m.group("attributes") - while(str): - m2 = attributeRe.match(str) - if not m2: - raise SyntaxError("unexpected attribute format " + - repr(str) + " in " + repr(lines)) - decl['attributes'].append(m2.group().replace(" ", ""). - replace(",", ", ")[2:]) - str = str[m2.span()[1]:] - str = m.group("vars") - while 1: - m2 = varRe.match(str) - if not m2: - raise SyntaxError("unexpected var format " + - repr(str) + " in " + repr(lines)) - var = m2.group("var") - if m2.group("param"): - var += "(" + m2.group("param") + ")" - if m2.group("value"): - var += " = " - var += m2.group("value") - decl['vars'].append(var) - str = str[m2.span()[1]:] - if not m2.group("continue"): - if str: - raise SyntaxError("error parsing vars (leftover=" + - repr(str) + ") in " + repr(lines)) - break - routine['parsedDeclarations'].append(decl) - elif interfaceStartRe.match(jline): - istart = lines - interfaceDeclFile = StringIO() - while 1: - (jline, _, lines) = stream.nextFortranLine() - if interfaceEndRe.match(jline): - iend = lines - break - interfaceDeclFile.writelines(lines) - interfaceDeclFile = StringIO(interfaceDeclFile.getvalue()) - iroutines = [] - while 1: - iroutine = parseRoutine(interfaceDeclFile) - if not iroutine['kind']: - if len(iroutines) == 0: - interfaceDeclFile.seek(0) - raise SyntaxError("error parsing interface:" + - repr(interfaceDeclFile.read())) - iroutines[-1]['postRoutine'].extend( - iroutine['preRoutine']) - break - iroutines.append(iroutine) - for iroutine in iroutines: - routine['interfaceCount'] += 1 - decl = {'type': 'z_interface%02d' % (routine['interfaceCount']), - 'parameters': None, - 'attributes': [], - 'vars': [iroutine['name']], - 'iroutine': iroutine, - 'istart': istart, - 'iend': iend - } - routine['parsedDeclarations'].append(decl) - elif useParseRe.match(jline): - routine['use'].append("".join(lines)) - else: - break - routine['declarations'].append("".join(lines)) - if (len(routine['parsedDeclarations']) == 0 and len(routine['use']) == 0 and - not re.match(" *implicit +none *$", jline, re.IGNORECASE)): - routine['preDeclComments'].append("".join(lines)) - else: - routine['postDeclComments'].append(comments) - containsRe = re.compile(r" *contains *$", re.IGNORECASE) - - while len(lines) > 0: - if endRe.match(jline): - routine['end'] = lines - break - routine['strippedCore'].append(jline) - routine['core'].append("".join(lines)) - if containsRe.match(lines[0]): - break - m = includeRe.match(lines[0]) - if m: - try: - subF = file(m.group('file')) - subStream = InputStream(subF) - while 1: - (subjline, _, sublines) = subStream.nextFortranLine() - if not sublines: - break - routine['strippedCore'].append(subjline) - subF.close() - except: - import traceback - sys.stderr.write( - "error trying to follow include " + m.group('file') + '\n') - sys.stderr.write( - "warning this might lead to the removal of used variables\n") - traceback.print_exc() - (jline, _, lines) = stream.nextFortranLine() - return routine - - -def findWord(word, text, options=re.IGNORECASE): - """Returns the position of word in text or -1 if not found. - A match is valid only if it is a whole word (i.e. findWord('try','retry') - returns false)""" - wordRe = re.compile("(? 100000: - raise Error("could not enforce all constraints") - m = varRe.match(declarations[idecl2]['vars'][ivar2]) - if (ivar == 0 and - findWord(m.group('var').lower(), typeParam) != -1): - declarations.insert( - idecl2 + 1, declarations[idecl]) - del declarations[idecl] - ivar = 0 - moved = 1 - break - if rest and findWord(m.group('var').lower(), rest) != -1: - if len(declarations[idecl]['vars']) > 1: - newDecl = {} - newDecl.update(declarations[idecl]) - newDecl['vars'] = [ - declarations[idecl]['vars'][ivar]] - declarations.insert(idecl2 + 1, newDecl) - del declarations[idecl]['vars'][ivar] - else: - declarations.insert(idecl2 + 1, - declarations[idecl]) - del declarations[idecl] - ivar = 0 - moved = 1 - break - if moved: - break - if not moved: - ivar += 1 - idecl += 1 - - for i in range(len(declarations) - 1, 0, -1): - if (declarations[i]['normalizedType'].lower() == - declarations[i - 1]['normalizedType'].lower()): - declarations[i - 1]['vars'].extend(declarations[i]['vars']) - del declarations[i] - - -def sortDeclarations(declarations): - """sorts, compacts declarations and respects dependencies - normalizedType has to be defined for the declarations""" - - declarations.sort(key=lambda x: x['normalizedType'].lower()) - - for i in range(len(declarations) - 1, 0, -1): - if (declarations[i]['normalizedType'].lower() == - declarations[i - 1]['normalizedType'].lower()): - declarations[i - 1]['vars'].extend(declarations[i]['vars']) - del declarations[i] - - for decl in declarations: - decl['vars'].sort(key=lambda x: x.lower()) - enforceDeclDependecies(declarations) - - -def writeRoutine(routine, outFile): - """writes the given routine to outFile""" - outFile.writelines(routine["preRoutine"]) - outFile.writelines(routine["begin"]) - outFile.writelines(routine["declarations"]) - outFile.writelines(routine["core"]) - outFile.writelines(routine["end"]) - outFile.writelines(routine["postRoutine"]) - - -def writeInCols(dLine, indentCol, maxCol, indentAtt, file): - """writes out the strings (trying not to cut them) in dLine up to maxCol - indenting each newline with indentCol. - The '&' of the continuation line is at maxCol. - indentAtt is the actual intent, and the new indent is returned""" - - strRe = re.compile(r"('[^'\n]*'|\"[^\"\n]*\")") - nonWordRe = re.compile(r"(\(/|/\)|[^-+a-zA-Z0-9_.])") - maxSize = maxCol - indentCol - 1 - tol = min(maxSize / 6, 6) + indentCol - for fragment in dLine: - if indentAtt + len(fragment) < maxCol: - file.write(fragment) - indentAtt += len(fragment) - elif len(fragment.lstrip()) <= maxSize: - file.write("&\n" + (" " * indentCol)) - file.write(fragment.lstrip()) - indentAtt = indentCol + len(fragment.lstrip()) - else: - sPieces = strRe.split(fragment) - for sPiece in sPieces: - if sPiece and (not (sPiece[0] == '"' or sPiece[0] == "'")): - subPieces = nonWordRe.split(sPiece) - else: - subPieces = [sPiece] - for subPiece in subPieces: - if indentAtt == indentCol: - file.write(subPiece.lstrip()) - indentAtt += len(subPiece.lstrip()) - elif indentAtt < tol or indentAtt + len(subPiece) < maxCol: - file.write(subPiece) - indentAtt += len(subPiece) - else: - file.write("&\n" + (" " * indentCol)) - file.write(subPiece.lstrip()) - indentAtt = indentCol + len(subPiece.lstrip()) - return indentAtt - - -def writeCompactDeclaration(declaration, file): - """Writes a declaration in a compact way""" - d = declaration - if d.has_key('iroutine'): - file.writelines(d['istart']) - writeRoutine(d['iroutine'], file) - file.writelines(d['iend']) - else: - if len(d['vars']) > 0: - decl = " " * indentSize * 2 + d['type'] - if d['parameters']: # do not drop empty parameter lists? - decl += d['parameters'] - if d['attributes']: - for a in d['attributes']: - decl += ", " + a - decl += " :: " - - dLine = [decl] - for var in d['vars']: - cur_len = sum([len(l) for l in dLine]) - if(len(dLine) > 1 and cur_len + len(var) > 600): - writeInCols(dLine, 3 * indentSize, decllinelength, 0, file) - file.write("\n") - dLine = [decl] - if(len(dLine) > 1): - dLine[-1] += ", " - dLine.append(var) - writeInCols(dLine, 3 * indentSize, decllinelength, 0, file) - file.write("\n") - - -def writeExtendedDeclaration(declaration, file): - """Writes a declaration in a nicer way (using more space)""" - d = declaration - if len(d['vars']) == 0: - return - if d.has_key('iroutine'): - file.writelines(d['istart']) - writeRoutine(d['iroutine'], file) - file.writelines(d['iend']) - else: - dLine = [] - dLine.append(" " * indentSize * 2 + d['type']) - if d['parameters']: # do not drop empty parameter lists? - dLine.append(d['parameters']) - if d['attributes']: - for a in d['attributes']: - dLine[-1:] = [dLine[-1] + ", "] - dLine.append(a) - - indentAtt = writeInCols(dLine, 3 * indentSize, - decloffset + 1 + 2 * indentSize, 0, file) - file.write(" " * (decloffset + 2 * indentSize - indentAtt)) - file.write(" :: ") - indentAtt = decloffset + 8 - - dLine = [] - for var in d['vars'][:-1]: - dLine.append(var + ", ") - dLine.append(d['vars'][-1]) - - writeInCols(dLine, decloffset + 4 + 2 * indentSize, - decllinelength, indentAtt, file) - file.write("\n") - - -def writeDeclarations(parsedDeclarations, file): - """Writes the declarations to the given file""" - for d in parsedDeclarations: - maxLenVar = 0 - totalLen = 0 - for v in d['vars']: - maxLenVar = max(maxLenVar, len(v)) - totalLen += len(v) - if maxLenVar > 30 or totalLen > decllinelength - 4: - writeCompactDeclaration(d, file) - else: - writeExtendedDeclaration(d, file) - - -def cleanDeclarations(routine, logFile=sys.stderr): - """cleans up the declaration part of the given parsed routine - removes unused variables""" - global rVar - containsRe = re.compile(r" *contains *$", re.IGNORECASE) - if routine['core']: - if containsRe.match(routine['core'][-1]): - logFile.write("*** routine %s contains other routines ***\n*** declarations not cleaned ***\n" % - (routine['name'])) - return - commentToRemoveRe = re.compile( - r" *! *(?:interface|arguments|parameters|locals?|\** *local +variables *\**|\** *local +parameters *\**) *$", re.IGNORECASE) - nullifyRe = re.compile( - r" *nullify *\(([^()]+)\) *\n?", re.IGNORECASE | re.MULTILINE) - - if not routine['kind']: - return - if (routine['core']): - if re.match(" *type *[a-zA-Z_]+ *$", routine['core'][0], re.IGNORECASE): - logFile.write("*** routine %s contains local types, not fully cleaned ***\n" % - (routine['name'])) - if re.match(" *import+ *$", routine['core'][0], re.IGNORECASE): - logFile.write("*** routine %s contains import, not fully cleaned ***\n" % - (routine['name'])) - if re.search("^#", "".join(routine['declarations']), re.MULTILINE): - logFile.write("*** routine %s declarations contain preprocessor directives ***\n*** declarations not cleaned ***\n" % ( - routine['name'])) - return - try: - rest = "".join(routine['strippedCore']).lower() - nullifys = ",".join(nullifyRe.findall(rest)) - rest = nullifyRe.sub("", rest) - paramDecl = [] - decls = [] - for d in routine['parsedDeclarations']: - d['normalizedType'] = d['type'] - if d['parameters']: - d['normalizedType'] += d['parameters'] - if (d["attributes"]): - d['attributes'].sort(key=lambda x: x.lower()) - d['normalizedType'] += ', ' - d['normalizedType'] += ', '.join(d['attributes']) - if "parameter" in map(str.lower, d['attributes']): - paramDecl.append(d) - else: - decls.append(d) - - sortDeclarations(paramDecl) - sortDeclarations(decls) - has_routinen = 0 - pos_routinep = -1 - for d in paramDecl: - for i in range(len(d['vars'])): - v = d['vars'][i] - m = varRe.match(v) - lowerV = m.group("var").lower() - if lowerV == "routinen": - has_routinen = 1 - d['vars'][i] = "routineN = '" + routine['name'] + "'" - elif lowerV == "routinep": - pos_routinep = i - d['vars'][i] = "routineP = moduleN//':'//routineN" - if not has_routinen and pos_routinep >= 0: - d['vars'].insert( - pos_routinep, "routineN = '" + routine['name'] + "'") - - if routine['arguments']: - routine['lowercaseArguments'] = map( - lambda x: x.lower(), routine['arguments']) - else: - routine['lowercaseArguments'] = [] - if routine['result']: - routine['lowercaseArguments'].append(routine['result'].lower()) - argDeclDict = {} - localDecl = [] - for d in decls: - localD = {} - localD.update(d) - localD['vars'] = [] - argD = None - for v in d['vars']: - m = varRe.match(v) - lowerV = m.group("var").lower() - if lowerV in routine['lowercaseArguments']: - argD = {} - argD.update(d) - argD['vars'] = [v] - if argDeclDict.has_key(lowerV): - raise SyntaxError( - "multiple declarations not supported. var=" + v + - " declaration=" + str(d) + "routine=" + routine['name']) - argDeclDict[lowerV] = argD - else: - pos = findWord(lowerV, rest) - if (pos != -1): - localD['vars'].append(v) - else: - if findWord(lowerV, nullifys) != -1: - if not rmNullify(lowerV, routine['core']): - raise SyntaxError( - "could not remove nullify of " + lowerV + - " as expected, routine=" + routine['name']) - logFile.write("removed var %s in routine %s\n" % - (lowerV, routine['name'])) - rVar += 1 - if (len(localD['vars'])): - localDecl.append(localD) - argDecl = [] - for arg in routine['lowercaseArguments']: - if argDeclDict.has_key(arg): - argDecl.append(argDeclDict[arg]) - else: - sys.stderr.write("warning, implicitly typed argument '" + - arg + "' in routine " + routine['name'] + '\n') - if routine['kind'].lower() == 'function': - aDecl = argDecl[:-1] - else: - aDecl = argDecl - - # try to have arg/param/local, but checks for dependencies arg/param - # and param/local - argDecl.extend(paramDecl) - enforceDeclDependecies(argDecl) - splitPos = 0 - for i in range(len(argDecl) - 1, -1, -1): - if not 'parameter' in map(str.lower, argDecl[i]['attributes']): - splitPos = i + 1 - break - paramDecl = argDecl[splitPos:] - argDecl = argDecl[:splitPos] - paramDecl.extend(localDecl) - enforceDeclDependecies(paramDecl) - splitPos = 0 - for i in range(len(paramDecl) - 1, -1, -1): - if 'parameter' in map(str.lower, paramDecl[i]['attributes']): - splitPos = i + 1 - break - localDecl = paramDecl[splitPos:] - paramDecl = paramDecl[:splitPos] - - newDecl = StringIO() - for comment in routine['preDeclComments']: - if not commentToRemoveRe.match(comment): - newDecl.write(comment) - newDecl.writelines(routine['use']) - writeDeclarations(argDecl, newDecl) - if argDecl and paramDecl: - newDecl.write("\n") - writeDeclarations(paramDecl, newDecl) - if (argDecl or paramDecl) and localDecl: - newDecl.write("\n") - writeDeclarations(localDecl, newDecl) - if argDecl or paramDecl or localDecl: - newDecl.write("\n") - wrote = 0 - for comment in routine['declComments']: - if comment.strip() and not commentToRemoveRe.match(comment): - newDecl.write(comment.strip()) - newDecl.write("\n") - wrote = 1 - if wrote: - newDecl.write("\n") - routine['declarations'] = [newDecl.getvalue()] - except: - if routine.has_key('name'): - logFile.write("**** exception cleaning routine " + - routine['name'] + " ****") - logFile.write("parsedDeclartions=" + - str(routine['parsedDeclarations'])) - raise - - newDecl = StringIO() - if routine['postDeclComments']: - comment_start = 0 - for comment in routine['postDeclComments']: - if comment.strip(): - break - else: - comment_start += 1 - - for comment in routine['postDeclComments'][comment_start:]: - if not commentToRemoveRe.match(comment): - newDecl.write(comment) - newDecl.write("\n") - routine['declarations'][0] += newDecl.getvalue() - - -def rmNullify(var, strings): - removed = 0 - var = var.lower() - nullifyRe = re.compile(r" *nullify *\(", re.IGNORECASE) - nullify2Re = re.compile( - r"(?P *nullify *\()(?P[^()!&]+)\)", re.IGNORECASE) - - for i in range(len(strings) - 1, -1, -1): - line = strings[i] - comments = [] - if nullifyRe.match(line) and findWord(var, line) != -1: - core = "" - comments = [] - for l in line.splitlines(): - pos = l.find("&") - pos2 = l.find("!") - if pos == -1: - if pos2 == -1: - core += l - else: - core += l[:pos2] - comments.append(l[pos2:] + "\n") - else: - core += l[:pos] - if pos2 != -1: - comments.append(l[pos2:] + "\n") - m = nullify2Re.match(core) - if not m: - raise SyntaxError("could not match nullify to " + repr(core) + - "in" + repr(line)) - allVars = [] - vars = m.group("vars") - v = map(string.strip, vars.split(",")) - removedNow = 0 - for j in range(len(v) - 1, -1, -1): - if findWord(var, v[j].lower()) != -1: - del v[j] - removedNow = 1 - if removedNow: - if len(v) == 0: - if not comments: - del strings[i] - else: - strings[i] = "".join(comments) - else: - for j in range(len(v) - 1): - v[j] += ", " - v[-1] += ")" - newS = StringIO() - v.insert(0, m.group("nullif")) - writeInCols(v, len(v[0]) - - len(v[0].lstrip()) + 5, 77, 0, newS) - newS.write("\n") - if comments: - for c in comments: - newS.write(c) - strings[i] = newS.getvalue() - removed += 1 - return removed - - -def parseUse(inFile): - """Parses the use statements in inFile - The parsing stops at the first non use statement. - Returns something like: - ([{'module':'module1','only':['el1','el2=>el3']},...], - '! comment1\\n!comment2...\\n', - 'last line (the line that stopped the parsing)') - """ - lineNr = 0 - preComments = [] - modules = [] - origLines = [] - commonUses = "" - stream = InputStream(inFile) - while 1: - (jline, comment_list, lines) = stream.nextFortranLine() - comments = '\n'.join(_ for _ in comment_list if _) - lineNr = lineNr + len(lines) - if not lines: - break - origLines.append("".join(lines)) - # parse use - m = useParseRe.match(jline) - if m: - useAtt = {'module': m.group('module'), 'comments': []} - - if m.group('only'): - useAtt['only'] = map(string.strip, - string.split(m.group('imports'), ',')) - else: - useAtt['renames'] = map(string.strip, - string.split(m.group('imports'), ',')) - if useAtt['renames'] == [""]: - del useAtt['renames'] - if comments: - useAtt['comments'].append(comments) - # add use to modules - modules.append(useAtt) - elif jline and not jline.isspace(): - break - else: - if comments and commonUsesRe.match(comments): - commonUses += "".join(lines) - elif len(modules) == 0: - preComments.append(("".join(lines))) - elif comments: - modules[-1]['comments'].append(comments) - - return {'modules': modules, 'preComments': preComments, 'commonUses': commonUses, - 'postLine': "".join(lines), 'origLines': origLines[:-1]} - - -def normalizeModules(modules): - """Sorts the modules and their export and removes duplicates. - renames aren't sorted correctly""" - # orders modules - modules.sort(key=lambda x: x['module']) - for i in range(len(modules) - 1, 0, -1): - if modules[i]['module'].lower() == modules[i - 1]['module'].lower(): - if not (modules[i - 1].has_key('only') and - modules[i].has_key('only')): - raise SyntaxError('rejoining of module ' + - str(modules[i]['module']) + - ' failed as at least one of the use is not a use ...,only:') - modules[i - 1]['only'].extend(modules[i]['only']) - del modules[i] - # orders imports - for m in modules: - if m.has_key('only'): - m['only'].sort() - for i in range(len(m['only']) - 1, 0, -1): - if m['only'][i - 1].lower() == m['only'][i].lower(): - del m['only'][i] - - -def writeUses(modules, outFile): - """Writes the use declaration using a long or short form depending on how - many only statements there are""" - for m in modules: - if m.has_key('only') and len(m['only']) > 8: - writeUseShort(m, outFile) - else: - writeUseLong(m, outFile) - - -def writeUseLong(m, outFile): - """Writes a use declaration in a nicer, but longer way""" - if m.has_key('only'): - outFile.write(indentSize * ' ' + "USE " + m['module'] + "," + - string.rjust('ONLY: ', 38 - len(m['module']))) - if m['only']: - outFile.write(m['only'][0]) - for i in range(1, len(m['only'])): - outFile.write(",&\n" + string.ljust("", 43 + - indentSize) + m['only'][i]) - else: - outFile.write(indentSize * ' ' + "USE " + m['module']) - if m.has_key('renames') and m['renames']: - outFile.write("," + string.ljust("", 38) + - m['renames'][0]) - for i in range(1, len(m['renames'])): - outFile.write(",&\n" + string.ljust("", 43 + - indentSize) + m['renames'][i]) - if m['comments']: - outFile.write("\n") - outFile.write('\n'.join(m['comments'])) - outFile.write("\n") - - -def writeUseShort(m, file): - """Writes a use declaration in a compact way""" - uLine = [] - if m.has_key('only'): - file.write(indentSize * ' ' + "USE " + m['module'] + "," + - string.rjust('ONLY: &\n', 40 - len(m['module']))) - for k in m['only'][:-1]: - uLine.append(k + ", ") - uLine.append(m['only'][-1]) - uLine[0] = " " * (5 + indentSize) + uLine[0] - elif m.has_key('renames') and m['renames']: - uLine.append(indentSize * ' ' + "USE " + m['module'] + ", ") - for k in m['renames'][:-1]: - uLine.append(k + ", ") - uLine.append(m['renames'][-1]) - else: - uLine.append(indentSize * ' ' + "USE " + m['module']) - writeInCols(uLine, 5 + indentSize, decllinelength, 0, file) - if m['comments']: - file.write("\n") - file.write('\n'.join(m['comments'])) - file.write("\n") - - -def prepareImplicitUses(modules): - """Transforms a modulesDict into an implictUses (dictionary of module names - each containing a dictionary with the only, and the special key '_WHOLE_' - wich is true if the whole mosule is implicitly present""" - mods = {} - for m in modules: - m_name = m['module'].lower() - if (not mods.has_key(m_name)): - mods[m['module']] = {'_WHOLE_': 0} - m_att = mods[m_name] - if m.has_key('only'): - for k in m['only']: - m = localNameRe.match(k) - if not m: - raise SyntaxError('could not parse use only:' + repr(k)) - impAtt = m.group('localName').lower() - m_att[impAtt] = 1 - else: - m_att['_WHOLE_'] = 1 - return mods - - -def cleanUse(modulesDict, rest, implicitUses=None, logFile=sys.stderr): - """Removes the unneded modules (the ones that are not used in rest)""" - global rUse - exceptions = {} - modules = modulesDict['modules'] - rest = rest.lower() - for i in range(len(modules) - 1, -1, -1): - m_att = {} - m_name = modules[i]['module'].lower() - if implicitUses and implicitUses.has_key(m_name): - m_att = implicitUses[m_name] - if m_att.has_key('_WHOLE_') and m_att['_WHOLE_']: - rUse += 1 - logFile.write("removed USE of module " + m_name + "\n") - del modules[i] - elif modules[i].has_key("only"): - els = modules[i]['only'] - for j in range(len(els) - 1, -1, -1): - m = localNameRe.match(els[j]) - if not m: - raise SyntaxError( - 'could not parse use only:' + repr(els[j])) - impAtt = m.group('localName').lower() - if m_att.has_key(impAtt): - rUse += 1 - logFile.write("removed USE " + m_name + - ", only: " + repr(els[j]) + "\n") - del els[j] - elif not exceptions.has_key(impAtt): - if findWord(impAtt, rest) == -1: - rUse += 1 - logFile.write("removed USE " + m_name + - ", only: " + repr(els[j]) + "\n") - del els[j] - if len(modules[i]['only']) == 0: - if modules[i]['comments']: - modulesDict['preComments'].extend( - map(lambda x: x + "\n", modules[i]['comments'])) - del modules[i] - - -def resetModuleN(moduleName, lines): - "resets the moduleN variable to the module name in the lines lines" - moduleNRe = re.compile(r".*:: *moduleN *= *(['\"])[a-zA-Z_0-9]+\1", - flags=re.IGNORECASE) - for i in range(len(lines)): - lines[i] = moduleNRe.sub( - " " * indentSize + "CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = '" + - moduleName + "'", - lines[i]) - - -def rewriteFortranFile(inFile, outFile, indent, decl_linelength, decl_offset, logFile=sys.stderr, orig_filename=None): - """rewrites the use statements and declarations of inFile to outFile. - It sorts them and removes the repetitions.""" - import os.path - - global indentSize - global decloffset - global decllinelength - indentSize = indent - decloffset = decl_offset - decllinelength = decl_linelength - - moduleRe = re.compile(r" *(?:module|program) +(?P[a-zA-Z_][a-zA-Z_0-9]*) *(?:!.*)?$", - flags=re.IGNORECASE) - coreLines = [] - while 1: - line = inFile.readline() - if not line: - break - if line[0] == '#': - coreLines.append(line) - outFile.write(line) - m = moduleRe.match(line) - if m: - if not orig_filename: - orig_filename = inFile.name - fn = os.path.basename(orig_filename).rsplit(".", 1)[0] - break - try: - modulesDict = parseUse(inFile) - routines = [] - coreLines.append(modulesDict['postLine']) - routine = parseRoutine(inFile) - coreLines.extend(routine['preRoutine']) - if m: - resetModuleN(m.group('moduleName'), routine['preRoutine']) - routines.append(routine) - while routine['kind']: - routine = parseRoutine(inFile) - routines.append(routine) - map(lambda x: cleanDeclarations(x, logFile), routines) - for routine in routines: - coreLines.extend(routine['declarations']) - coreLines.extend(routine['strippedCore']) - rest = "".join(coreLines) - nonStPrep = 0 - for line in modulesDict['origLines']: - if (re.search('^#', line) and not commonUsesRe.match(line)): - sys.stderr.write('noMatch ' + repr(line) + '\n') - nonStPrep = 1 - if nonStPrep: - logFile.write( - "*** use statements contains preprocessor directives, not cleaning ***\n") - outFile.writelines(modulesDict['origLines']) - else: - implicitUses = None - if modulesDict['commonUses']: - try: - inc_fn = commonUsesRe.match( - modulesDict['commonUses']).group(1) - inc_absfn = os.path.join( - os.path.dirname(orig_filename), inc_fn) - f = file(inc_absfn) - implicitUsesRaw = parseUse(f) - f.close() - implicitUses = prepareImplicitUses( - implicitUsesRaw['modules']) - except: - sys.stderr.write( - "ERROR trying to parse use statements contained in common uses precompiler file " + inc_absfn + '\n') - raise - cleanUse(modulesDict, rest, - implicitUses=implicitUses, logFile=logFile) - normalizeModules(modulesDict['modules']) - outFile.writelines(modulesDict['preComments']) - writeUses(modulesDict['modules'], outFile) - outFile.write(modulesDict['commonUses']) - if modulesDict['modules']: - outFile.write('\n') - outFile.write(modulesDict['postLine']) - for routine in routines: - writeRoutine(routine, outFile) - except: - import traceback - logFile.write('-' * 60 + "\n") - traceback.print_exc(file=logFile) - logFile.write('-' * 60 + "\n") - - logFile.write("Processing file '" + orig_filename + "'\n") - raise - -# EOF diff --git a/formatting/reformatFortranFile.py b/formatting/reformatFortranFile.py deleted file mode 100644 index a75accf..0000000 --- a/formatting/reformatFortranFile.py +++ /dev/null @@ -1,908 +0,0 @@ -""" - Impose white space conventions and indentation based on scopes / subunits - - normalization of white spaces supported for following operators: - - relational operators: - .EQ. .NE. .LT. .LE. .GT. .GE. - == /= < <= > >= - - logical operators: - .AND. .OR. .EQV. .NEQV. - .NOT. - - bracket delimiters - - commas and semicolons: - - arithmetic operators: - * / ** + - - - other operators: - % - (sign) = (function argument) - = (assignment) => (pointer assignment) - - supported criteria for alignment / indentation: - Fortran lines: - - if, else, endif - - do, enddo - - select case, case, end select - - subroutine, end subroutine - - function, end function - - module, end module - - program, end program - - interface, end interface - - type, end type - Actual lines (parts of Fortran lines separated by linebreaks): - - bracket delimiters (.), (/./), and [.] - - assignments by value = and pointer =>. - - LIMITATIONS - - assumes that all subunits are explicitly ended within same file, no treatment of #include statements - - can not deal with f77 constructs (files are ignored) -""" - -import re -import sys -import os -from formatting.normalizeFortranFile import useParseRe, typeRe, InputStream, CharFilter, ompRe, ompDirRe - -#========================================================================= -# constants, mostly regular expressions - -RE_FLAGS = re.IGNORECASE # all regex should be case insensitive - -FORTRAN_DEFAULT_ERROR_MESSAGE = " Syntax error - this formatter can not handle invalid Fortran files." -FORMATTER_ERROR_MESSAGE = " Wrong usage of formatting-specific directives '&', '!&', '!&<' or '!&>'." - -EOL_STR = r"\s*;?\s*$" # end of fortran line -EOL_SC = r"\s*;\s*$" # whether line is ended with semicolon -SOL_STR = r"^\s*" # start of fortran line - -F77_STYLE = re.compile(r"^\s*\d", RE_FLAGS) - -# regular expressions for parsing statements that start, continue or end a -# subunit: -IF_RE = re.compile( - SOL_STR + r"(\w+\s*:)?\s*IF\s*\(.+\)\s*THEN" + EOL_STR, RE_FLAGS) -ELSE_RE = re.compile( - SOL_STR + r"ELSE(\s*IF\s*\(.+\)\s*THEN)?" + EOL_STR, RE_FLAGS) -ENDIF_RE = re.compile(SOL_STR + r"END\s*IF(\s+\w+)?" + EOL_STR, RE_FLAGS) - -DO_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*DO(" + EOL_STR + r"|\s)", RE_FLAGS) -ENDDO_RE = re.compile(SOL_STR + r"END\s*DO(\s+\w+)?" + EOL_STR, RE_FLAGS) - -SELCASE_RE = re.compile( - SOL_STR + r"SELECT\s*CASE\s*\(.+\)" + EOL_STR, RE_FLAGS) -CASE_RE = re.compile(SOL_STR + r"CASE\s*(\(.+\)|DEFAULT)" + EOL_STR, RE_FLAGS) -ENDSEL_RE = re.compile(SOL_STR + r"END\s*SELECT" + EOL_STR, RE_FLAGS) - -SUBR_RE = re.compile( - r"^([^\"'!]* )?SUBROUTINE\s+\w+\s*(\(.*\))?" + EOL_STR, RE_FLAGS) -ENDSUBR_RE = re.compile( - SOL_STR + r"END\s*SUBROUTINE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -FCT_RE = re.compile( - r"^([^\"'!]* )?FUNCTION\s+\w+\s*(\(.*\))?(\s*RESULT\s*\(\w+\))?" + EOL_STR, RE_FLAGS) -ENDFCT_RE = re.compile( - SOL_STR + r"END\s*FUNCTION(\s+\w+)?" + EOL_STR, RE_FLAGS) - -MOD_RE = re.compile(SOL_STR + r"MODULE\s+\w+" + EOL_STR, RE_FLAGS) -ENDMOD_RE = re.compile(SOL_STR + r"END\s*MODULE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -TYPE_RE = re.compile( - SOL_STR + r"TYPE(\s*,\s*BIND\s*\(\s*C\s*\))?(\s*::\s*|\s+)\w+" + EOL_STR, RE_FLAGS) -ENDTYPE_RE = re.compile(SOL_STR + r"END\s*TYPE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -PROG_RE = re.compile(SOL_STR + r"PROGRAM\s+\w+" + EOL_STR, RE_FLAGS) -ENDPROG_RE = re.compile( - SOL_STR + r"END\s*PROGRAM(\s+\w+)?" + EOL_STR, RE_FLAGS) - -INTERFACE_RE = re.compile( - r"^([^\"'!]* )?INTERFACE(\s+\w+)?" + EOL_STR, RE_FLAGS) -ENDINTERFACE_RE = re.compile( - SOL_STR + r"END\s*INTERFACE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -CONTAINS_RE = re.compile(SOL_STR + r"CONTAINS" + EOL_STR, RE_FLAGS) - -PUBLIC_RE = re.compile(SOL_STR + r"PUBLIC\s*::") - -# intrinsic statements with parenthesis notation that are not functions -INTR_STMTS_PAR = "(ALLOCATE|DEALLOCATE|REWIND|BACKSPACE|INQUIRE|OPEN|CLOSE|WRITE|READ|FORALL|WHERE|NULLIFY)" - -# regular expressions for parsing linebreaks -LINEBREAK_STR = r"(&)[\s]*(?:!.*)?$" - -# regular expressions for parsing operators -# Note: +/- in real literals and sign operator is ignored -PLUSMINUS_RE = re.compile( - r"(?<=[\w\)\]])(?(?!=)|>=))\s*", RE_FLAGS) -LOG_OP_RE = re.compile(r"\s*(\.(?:AND|OR|EQV|NEQV)\.)\s*", RE_FLAGS) - -# regular expressions for parsing delimiters -DEL_OPEN_STR = r"(\(\/?|\[)" -DEL_OPEN_RE = re.compile(DEL_OPEN_STR, RE_FLAGS) -DEL_CLOSE_STR = r"(\/?\)|\])" -DEL_CLOSE_RE = re.compile(DEL_CLOSE_STR, RE_FLAGS) - -# empty line regex, treats omp statements as exception -EMPTY_RE = re.compile(SOL_STR + r"(![^\$].*)?$", RE_FLAGS) - -# two-sided operators -LR_OPS_RE = [REL_OP_RE, LOG_OP_RE, PLUSMINUS_RE] - -# markups to deactivate formatter -NO_ALIGN_RE = re.compile(SOL_STR + r"&\s*[^\s*]+") - -# combine regex that define subunits -NEW_SCOPE_RE = [IF_RE, DO_RE, SELCASE_RE, SUBR_RE, - FCT_RE, MOD_RE, PROG_RE, INTERFACE_RE, TYPE_RE] -CONTINUE_SCOPE_RE = [ELSE_RE, None, CASE_RE, CONTAINS_RE, - CONTAINS_RE, CONTAINS_RE, CONTAINS_RE, None, None] -END_SCOPE_RE = [ENDIF_RE, ENDDO_RE, ENDSEL_RE, ENDSUBR_RE, - ENDFCT_RE, ENDMOD_RE, ENDPROG_RE, ENDINTERFACE_RE, ENDTYPE_RE] - -#========================================================================= - - -class F90Indenter(object): - """ - Parses encapsulation of subunits / scopes line by line and updates the indentation. - """ - - def __init__(self, filename): - self._scope_storage = [] - self._indent_storage = [0] - self._filename = filename - self._line_indents = [] - self._aligner = F90Aligner(filename) - - def process_lines_of_fline(self, f_line, lines, rel_ind, rel_ind_con, line_nr, manual_lines_indent=None): - """ - Process all lines that belong to a Fortran line `f_line`, impose a relative indent of `rel_ind` for - current Fortran line, and `rel_ind_con` for line continuation. By default line continuations are - auto-aligned by F90Aligner - manual offsets can be set by manual_lines_indents. - """ - - self._line_indents = [0] * len(lines) - br_indent_list = [0] * len(lines) - line_indents = self._line_indents - scopes = self._scope_storage - indents = self._indent_storage - filename = self._filename - - # check statements that start new scope - is_new = False - valid_new = False - - debug = False - - for new_n, newre in enumerate(NEW_SCOPE_RE): - if newre.search(f_line) and not END_SCOPE_RE[new_n].search(f_line): - what_new = new_n - is_new = True - valid_new = True - scopes.append(what_new) - if debug: - sys.stderr.write(f_line + '\n') - break - - # check statements that continue scope - is_con = False - valid_con = False - for con_n, conre in enumerate(CONTINUE_SCOPE_RE): - if conre is not None and conre.search(f_line): - what_con = con_n - is_con = True - if len(scopes) > 0: - what = scopes[-1] - if what == what_con: - valid_con = True - if debug: - sys.stderr.write(f_line + '\n') - break - - # check statements that end scope - is_end = False - valid_end = False - for end_n, endre in enumerate(END_SCOPE_RE): - if endre.search(f_line): - what_end = end_n - is_end = True - if len(scopes) > 0: - what = scopes.pop() - if what == what_end: - valid_end = True - if debug: - sys.stderr.write(f_line + '\n') - break - - # deal with line breaks - if not manual_lines_indent: - self._aligner.process_lines_of_fline( - f_line, lines, rel_ind_con, line_nr) - br_indent_list = self._aligner.get_lines_indent() - else: - br_indent_list = manual_lines_indent - - for pos in range(0, len(lines) - 1): - line_indents[pos + 1] = br_indent_list[pos + 1] - - if is_new: - if not valid_new: - raise SyntaxError(filename + ':' + str(line_nr) + - ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - else: - line_indents = [ind + indents[-1] for ind in line_indents] - old_ind = indents[-1] - - rel_ind += old_ind # prevent originally unindented do / if blocks from being indented - indents.append(rel_ind) - - elif is_con: - if not valid_con: - raise SyntaxError(filename + ':' + str(line_nr) + - ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - else: - line_indents = [ind + indents[-2] for ind in line_indents] - - elif is_end: - if not valid_end: - raise SyntaxError(filename + ':' + str(line_nr) + - ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - else: - line_indents = [ind + indents[-2] for ind in line_indents] - indents.pop() - else: - line_indents = [ind + indents[-1] for ind in line_indents] - - self._line_indents = line_indents - - def get_fline_indent(self): - """ - after processing, retrieve the indentation of the full Fortran line. - """ - return self._indent_storage[-1] - - def get_lines_indent(self): - """ - after processing, retrieve the indents of all line parts. - """ - return self._line_indents - -#========================================================================= - - -class F90Aligner(object): - """ - Alignment of continuations of a broken line, based on the following heuristics - if line break in brackets: We are parsing the level of nesting and align to most inner bracket delimiter. - else if line is an assignment: alignment to '=' or '=>'. - note: assignment operator recognized as any '=' that is not - part of another operator and that is not enclosed in bracket - else if line is a declaration: alignment to '::' - else default indent - """ - - def __init__(self, filename): - self._filename = filename - self.__init_line(0) - - def __init_line(self, line_nr): - self._line_nr = line_nr - self._line_indents = [0] - self._level = 0 - self._br_indent_list = [0] - - def process_lines_of_fline(self, f_line, lines, rel_ind, line_nr): - """ - process all lines that belong to a Fortran line `f_line`, `rel_ind` is the relative indentation size. - """ - - self.__init_line(line_nr) - - is_decl = typeRe.match(f_line) or PUBLIC_RE.match(f_line) - for pos, line in enumerate(lines): - self.__align_line_continuations( - line, is_decl, rel_ind, self._line_nr + pos) - if pos + 1 < len(lines): - self._line_indents.append(self._br_indent_list[-1]) - - if (len(self._br_indent_list) > 2 or self._level): - raise SyntaxError(self._filename + ':' + str(self._line_nr) + - ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - - def get_lines_indent(self): - """ - after processing, retrieve the indents of all line parts. - """ - return self._line_indents - - def __align_line_continuations(self, line, is_decl, indent_size, line_nr): - - indent_list = self._br_indent_list - level = self._level - filename = self._filename - - pos_eq = 0 - pos_ldelim = [] - pos_rdelim = [] - ldelim = [] - rdelim = [] - - # find delimiters that are not ended on this line. - # find proper alignment to most inner delimiter - # or alignment to assignment operator - rel_ind = indent_list[-1] # indentation of prev. line - - instring = '' - end_of_delim = -1 - - for pos, char in CharFilter(enumerate(line)): - - what_del_open = None - what_del_close = None - if not pos == end_of_delim: - what_del_open = DEL_OPEN_RE.match(line[pos:pos + 2]) - what_del_close = DEL_CLOSE_RE.match(line[pos:pos + 2]) - - if not instring and what_del_open: - what_del_open = what_del_open.group() - end_of_delim = pos + len(what_del_open) - 1 - level += 1 - indent_list.append(pos + len(what_del_open) + rel_ind) - pos_ldelim.append(pos) - ldelim.append(what_del_open) - if not instring and what_del_close: - what_del_close = what_del_close.group() - end_of_delim = pos + len(what_del_close) - 1 - level += -1 - indent_list.pop() - if level < 0: - raise SyntaxError(filename + ':' + str(line_nr) + - ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - if pos_ldelim: - pos_ldelim.pop() - what_del_open = ldelim.pop() - valid = False - if what_del_open == r"(": - valid = what_del_close == r")" - if what_del_open == r"(/": - valid = what_del_close == r"/)" - if what_del_open == r"[": - valid = what_del_close == r"]" - if not valid: - raise SyntaxError( - filename + ':' + str(line_nr) + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - else: - pos_rdelim.append(pos) - rdelim.append(what_del_close) - if not instring and not level: - if not is_decl and char == '=': - if not REL_OP_RE.match(line[max(0, pos - 1):min(pos + 2, len(line))]): - if pos_eq > 0: - raise SyntaxError( - filename + ':' + str(line_nr) + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - is_pointer = line[pos + 1] == '>' - pos_eq = pos + 1 - # don't align if assignment operator directly before - # line break - if not re.search(r"=>?\s*" + LINEBREAK_STR, line, RE_FLAGS): - indent_list.append( - pos_eq + 1 + is_pointer + indent_list[-1]) - elif is_decl and line[pos:pos + 2] == '::': - if not re.search(r"::\s*" + LINEBREAK_STR, line, RE_FLAGS): - indent_list.append(pos + 3 + indent_list[-1]) - - # Don't align if delimiter opening directly before line break - if level and re.search(DEL_OPEN_STR + r"\s*" + LINEBREAK_STR, line, RE_FLAGS): - if len(indent_list) > 1: - indent_list[-1] = indent_list[-2] - else: - indent_list[-1] = 0 - - if not indent_list[-1]: - indent_list[-1] = indent_size - - self._level = level - -#========================================================================= - - -def inspect_ffile_format(infile, indent_size): - """ - Determine indentation by inspecting original Fortran file (mainly for finding - aligned blocks of DO/IF statements). Also check if - it has f77 constructs. - """ - - adopt = indent_size <= 0 - - is_f90 = True - indents = [] - stream = InputStream(infile) - prev_offset = 0 - first_indent = -1 - while 1: - f_line, _, lines = stream.nextFortranLine() - if not lines: - break - - offset = len(lines[0]) - len(lines[0].lstrip(' ')) - if f_line.strip() and first_indent == -1: - first_indent = offset - indents.append(offset - prev_offset) - if not adopt: # do not adopt indentations but impose fixed rel. ind. - # but don't impose indentation for blocked do/if constructs - if prev_offset != offset or (not IF_RE.search(f_line) and not DO_RE.search(f_line)): - indents[-1] = indent_size - prev_offset = offset - - # can not handle f77 style constructs - if F77_STYLE.search(f_line): - is_f90 = False - return indents, first_indent, is_f90 - -#========================================================================= - - -def format_single_fline(f_line, whitespace, linebreak_pos, ampersand_sep, filename, line_nr, auto_format=True): - """ - format a single Fortran line - imposes white space formatting - and inserts linebreaks. - Takes a logical Fortran line `f_line` as input as well as the positions - of the linebreaks (`linebreak_pos`), and the number of separating whitespace - characters before ampersand (`ampersand_sep`). - `filename` and `line_nr` just for error messages. - The higher `whitespace`, the more white space characters inserted - - whitespace = 0, 1, 2 are currently supported. - auto formatting can be turned off by setting `auto_format` to False. - """ - - # define whether to put whitespaces around operators: - # 0: comma, semicolon - # 1: assignment operators - # 2: relational operators - # 3: logical operators - # 4: arithm. operators plus and minus - if whitespace == 0: - spacey = [0, 0, 0, 0, 0] - elif whitespace == 1: - spacey = [1, 1, 1, 1, 0] - elif whitespace == 2: - spacey = [1, 1, 1, 1, 1] - else: - raise NotImplementedError("unknown value for whitespace") - - line = f_line - line_orig = line - - # rm extraneous whitespace chars, except for declarations - line_ftd = '' - pos_prev = -1 - for pos, char in CharFilter(enumerate(line)): - is_decl = line[pos:].lstrip().startswith('::') or line[ - :pos].rstrip().endswith('::') - if char == ' ': - # remove double spaces - if line_ftd and (re.search(r'[\w"]', line_ftd[-1]) or is_decl): - line_ftd = line_ftd + char - else: - if line_ftd and line_ftd[-1] == ' ' and (not re.search(r'[\w"]', char) and not is_decl): - line_ftd = line_ftd[:-1] # remove spaces except between words - line_ftd = line_ftd + line[pos_prev + 1:pos + 1] - pos_prev = pos - line = line_ftd - - pos_eq = [] - end_of_delim = -1 - level = 0 - for pos, char in CharFilter(enumerate(line)): - # offset w.r.t. unformatted line - offset = len(line_ftd) - len(line) - - # format delimiters - what_del_open = None - what_del_close = None - if pos > end_of_delim: - what_del_open = DEL_OPEN_RE.match( - line[pos:pos + 2]) # opening delimiter token - what_del_close = DEL_CLOSE_RE.match( - line[pos:pos + 2]) # closing delimiter token - - if what_del_open or what_del_close: - sep1 = 0 - sep2 = 0 - - if what_del_open: - delim = what_del_open.group() - else: - delim = what_del_close.group() - - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + len(delim) + offset:] - - # format opening delimiters - if what_del_open: - level += 1 # new scope - # add separating whitespace before opening delimiter - # with some exceptions: - if (not re.search(r"(" + DEL_OPEN_STR + r"|[\w\*/=\+\-:])\s*$", line[:pos], RE_FLAGS) and - not EMPTY_RE.search(line[:pos])) or \ - re.search(SOL_STR + r"(\w+\s*:)?(ELSE)?\s*IF\s*$", line[:pos], RE_FLAGS) or \ - re.search(SOL_STR + r"(\w+\s*:)?\s*DO\s+WHILE\s*$", line[:pos], RE_FLAGS) or \ - re.search(SOL_STR + r"(SELECT)?\s*CASE\s*", line[:pos], RE_FLAGS) or \ - re.search(r"\b" + INTR_STMTS_PAR + r"\s*$", line[:pos], RE_FLAGS): - sep1 = 1 - - # format closing delimiters - else: - level += -1 # close scope - # add separating whitespace after closing delimiter - # with some exceptions: - if not re.search(r"^\s*(" + DEL_CLOSE_STR + r"|[,%:/\*])", line[pos + 1:], RE_FLAGS): - sep2 = 1 - elif re.search(r"^\s*::", line[pos + 1:], RE_FLAGS): - sep2 = len(rhs) - len(rhs.lstrip(' ')) - - # where delimiter token ends - end_of_delim = pos + len(delim) - 1 - - line_ftd = lhs.rstrip(' ') + ' ' * sep1 + \ - delim + ' ' * sep2 + rhs.lstrip(' ') - - # format commas and semicolons - if char == ',' or char == ';': - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 1 + offset:] - line_ftd = lhs.rstrip(' ') + char + ' ' * \ - spacey[0] + rhs.lstrip(' ') - line_ftd = line_ftd.rstrip(' ') - - # format .NOT. - if re.match(r"\.NOT\.", line[pos:pos + 5], RE_FLAGS): - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 5 + offset:] - line_ftd = lhs.rstrip( - ' ') + line[pos:pos + 5] + ' ' * spacey[3] + rhs.lstrip(' ') - - # strip whitespaces from '=' and prepare assignment operator - # formatting - if char == '=': - if not REL_OP_RE.search(line[pos - 1:pos + 2]): - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 1 + offset:] - line_ftd = lhs.rstrip(' ') + '=' + rhs.lstrip(' ') - if not level: # remember position of assignment operator - pos_eq.append(len(lhs.rstrip(' '))) - - line = line_ftd - - # format assignments - for pos in pos_eq: - offset = len(line_ftd) - len(line) - is_pointer = line[pos + 1] == '>' - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 1 + is_pointer + offset:] - if is_pointer: - assign_op = '=>' # pointer assignment - else: - assign_op = '=' # assignment - line_ftd = lhs.rstrip( - ' ') + ' ' * spacey[1] + assign_op + ' ' * spacey[1] + rhs.lstrip(' ') - # offset w.r.t. unformatted line - - line = line_ftd - - # for more advanced replacements we separate comments and strings - line_parts = [] - str_end = -1 - instring = '' - for pos, char in enumerate(line): - if char == '"' or char == "'": # skip string - if not instring: - str_start = pos - line_parts.append(line[str_end + 1:str_start]) - instring = char - elif instring == char: - str_end = pos - line_parts.append(line[str_start:str_end + 1]) - instring = '' - if pos == len(line) - 1: - line_parts.append(line[str_end + 1:]) - - # Two-sided operators - for n_op, lr_re in enumerate(LR_OPS_RE): - for pos, part in enumerate(line_parts): - if not re.match(r"['\"!]", part, RE_FLAGS): # exclude comments, strings - partsplit = lr_re.split(part) - line_parts[pos] = (' ' * spacey[n_op + 2]).join(partsplit) - - line = ''.join(line_parts) - - # format ':' for labels - for newre in NEW_SCOPE_RE[0:2]: - if newre.search(line) and re.search(SOL_STR + r"\w+\s*:", line): - line = ': '.join(_.strip() for _ in line.split(':', 1)) - - if not auto_format: - line = line_orig - - # Now it gets messy - we need to shift line break positions from original - # to reformatted line - pos_new = 0 - pos_old = 0 - linebreak_pos.sort(reverse=True) - linebreak_pos_ftd = [] - while 1: - if pos_new == len(line) or pos_old == len(line_orig): - break - assert line[pos_new] is line_orig[pos_old] - if linebreak_pos and pos_old > linebreak_pos[-1]: - linebreak_pos.pop() - linebreak_pos_ftd.append(pos_new) - continue - if line[pos_new] is line_orig[pos_old]: - pos_new += 1 - while pos_new < len(line) and line[pos_new] is ' ': - pos_new += 1 - pos_old += 1 - while pos_old < len(line_orig) and line_orig[pos_old] is ' ': - pos_old += 1 - elif line[pos_new] is ' ': - pos_new += 1 - elif line_orig[pos_old] is ' ': - pos_old += 1 - else: - assert False - - linebreak_pos_ftd.insert(0, 0) - - # We do not insert ampersands in empty lines and comments lines - lines_out = [line[l:r].rstrip(' ') + ' ' * ampersand_sep[pos] + '&' * min(1, r - l) - for pos, (l, r) in enumerate(zip(linebreak_pos_ftd[0:-1], linebreak_pos_ftd[1:]))] - - lines_out.append(line[linebreak_pos_ftd[-1]:]) - - if level != 0: - raise SyntaxError(filename + ':' + str(line_nr) + - ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) - - return lines_out - -#========================================================================= - - -def reformat_ffile(infile, outfile, logFile=sys.stderr, indent_size=2, whitespace=2, orig_filename=None): - """ - main method to be invoked for formatting a Fortran file. - """ - debug = False - - # don't change original indentation if rel-indents set to 0 - adopt_indents = indent_size <= 0 - - if not orig_filename: - orig_filename = infile.name - - indenter = F90Indenter(orig_filename) - - req_indents, first_indent, is_f90 = inspect_ffile_format( - infile, indent_size) - infile.seek(0) - - if not is_f90: - logFile.write("*** " + orig_filename + - ": formatter can not handle f77 constructs. ***\n") - outfile.write(infile.read()) # does not handle f77 constructs - return - - nfl = 0 # fortran line counter - - do_indent = True - use_same_line = False - stream = InputStream(infile) - skip_blank = False - in_manual_block = False - - while 1: - f_line, comments, lines = stream.nextFortranLine() - if not lines: - break - - comment_lines = [] - for line, comment in zip(lines, comments): - has_comment = bool(comment.strip()) - sep = has_comment and not comment.strip() == line.strip() - if line.strip(): # empty lines between linebreaks are ignored - comment_lines.append(' ' * sep + comment.strip()) - - orig_lines = lines - nfl += 1 - - auto_align = not any(NO_ALIGN_RE.search(_) for _ in lines) - auto_format = not (in_manual_block or any( - _.lstrip().startswith('!&') for _ in comment_lines)) - if not auto_format: - auto_align = False - if (len(lines)) == 1: - valid_directive = True - if lines[0].strip().startswith('!&<'): - if in_manual_block: - valid_directive = False - else: - in_manual_block = True - if lines[0].strip().startswith('!&>'): - if not in_manual_block: - valid_directive = False - else: - in_manual_block = False - if not valid_directive: - raise SyntaxError(orig_filename + ':' + str(stream.line_nr) + - ':' + FORMATTER_ERROR_MESSAGE) - - indent = [0] * len(lines) - - is_omp_conditional = False - - if ompRe.match(f_line) and not ompDirRe.match(f_line): - # convert OMP-conditional fortran statements into normal fortran statements - # but remember to convert them back - f_line = ompRe.sub(' ', f_line, count=1) - lines = [ompRe.sub(' ', l, count=1) for l in lines] - is_omp_conditional = True - - is_empty = EMPTY_RE.search(f_line) # blank line or comment only line - - if useParseRe.match(f_line): - do_indent = False - elif ompDirRe.match(f_line): - # move '!$OMP' to line start, otherwise don't format omp directives - lines = ['!$OMP' + (len(l) - len(l.lstrip())) * - ' ' + ompDirRe.sub('', l, count=1) for l in lines] - do_indent = False - elif lines[0].startswith('#'): # preprocessor macros - assert len(lines) == 1 - do_indent = False - elif EMPTY_RE.search(f_line): # empty lines including comment lines - assert len(lines) == 1 - if any(comments): - if lines[0].startswith('!'): - # don't indent unindented comments - do_indent = False - else: - indent[0] = indenter.get_fline_indent() - elif skip_blank: - continue - else: - do_indent = False - - lines = [l.strip(' ') for l in lines] - else: - - manual_lines_indent = [] - if not auto_align: - manual_lines_indent = [ - len(l) - len(l.lstrip(' ').lstrip('&')) for l in lines] - manual_lines_indent = [ind - manual_lines_indent[0] - for ind in manual_lines_indent] - - # ampersands at line starts are remembered (pre_ampersand) and recovered later; - # define the desired number of separating whitespaces before ampersand at line end (ampersand_sep): - # - insert one whitespace character before ampersand as default formatting - # - don't do this if next line starts with an ampersand but remember the original formatting - # this "special rule" is necessary since ampersands starting a line can be used to break literals, - # so inserting a whitespace in this case leads to invalid syntax. - - pre_ampersand = [] - ampersand_sep = [] - sep_next = None - for pos, line in enumerate(lines): - m = re.search(SOL_STR + r'(&\s*)', line) - if m: - pre_ampersand.append(m.group(1)) - sep = len( - re.search(r'(\s*)&[\s]*(?:!.*)?$', lines[pos - 1]).group(1)) - ampersand_sep.append(sep) - else: - pre_ampersand.append('') - if pos > 0: - ampersand_sep.append(1) - - lines = [l.strip(' ').strip('&') for l in lines] - f_line = f_line.strip(' ') - - # find linebreak positions - linebreak_pos = [] - for pos, line in enumerate(lines): - found = None - for char_pos, char in CharFilter(enumerate(line)): - if char == "&": - found = char_pos - if found: - linebreak_pos.append(found) - elif line.lstrip(' ').startswith('!'): - linebreak_pos.append(0) - - linebreak_pos = [sum(linebreak_pos[0:_ + 1]) - - 1 for _ in range(0, len(linebreak_pos))] - - lines = format_single_fline( - f_line, whitespace, linebreak_pos, ampersand_sep, orig_filename, stream.line_nr, auto_format) - - # we need to insert comments in formatted lines - for pos, (line, comment) in enumerate(zip(lines, comment_lines)): - if pos < len(lines) - 1: - has_nl = True - else: - has_nl = not re.search(EOL_SC, line) - lines[pos] = lines[pos].rstrip(' ') + comment + '\n' * has_nl - - try: - rel_indent = req_indents[nfl] - except IndexError: - rel_indent = 0 - - indenter.process_lines_of_fline( - f_line, lines, rel_indent, indent_size, stream.line_nr, manual_lines_indent) - indent = indenter.get_lines_indent() - - # recover ampersands at line start - for pos, line in enumerate(lines): - amp_insert = pre_ampersand[pos] - if amp_insert: - indent[pos] += -1 - lines[pos] = amp_insert + line - - lines = [re.sub(r"\s+$", '\n', l, RE_FLAGS) - for l in lines] # deleting trailing whitespaces - - for ind, line, orig_line in zip(indent, lines, orig_lines): - # get actual line length excluding comment: - line_length = 0 - for line_length, _ in CharFilter(enumerate(line)): - pass - line_length += 1 - - if do_indent: - ind_use = ind + first_indent - else: - if use_same_line: - ind_use = 1 - else: - ind_use = 0 - if ind_use + line_length <= 133: # 132 plus 1 newline char - outfile.write('!$' * is_omp_conditional + ' ' * - (ind_use - 2 * is_omp_conditional + - len(line) - len(line.lstrip(' '))) + line.lstrip(' ')) - elif line_length <= 133: - outfile.write('!$' * is_omp_conditional + ' ' * - (133 - 2 * is_omp_conditional - - len(line.lstrip(' '))) + line.lstrip(' ')) - if not typeRe.match(f_line): - logFile.write("*** " + orig_filename + ":" + str(stream.line_nr) + - ": auto indentation failed due to 132 chars limit, line should be splitted. ***\n") - else: - outfile.write(orig_line) - logFile.write("*** " + orig_filename + ":" + str(stream.line_nr) + - (": auto indentation and whitespace formatting failed due to 132 chars limit, line should be splitted. ***\n")) - if debug: - sys.stderr.write(' ' * ind_use + line + '\n') - - # no indentation of semicolon separated lines - if re.search(r";\s*$", f_line, RE_FLAGS): - do_indent = False - use_same_line = True - else: - do_indent = True - use_same_line = False - - # rm subsequent blank lines - skip_blank = is_empty and not any(comments) - -try: - any -except NameError: - def any(iterable): - for element in iterable: - if element: - return True - return False - -# EOF diff --git a/formatting/replacer.py b/formatting/replacer.py deleted file mode 100644 index 8096efa..0000000 --- a/formatting/replacer.py +++ /dev/null @@ -1,39 +0,0 @@ -import re -import sys - -repl = { - 'routine_name': 'routineN', - 'module_name': 'moduleN' -} -specialRepl = None -# { re.compile(r"(.*:: *moduleN) *= *(['\"])[a-zA-Z_0-9]+\2",flags=re.IGNORECASE):r"character(len=*), parameter :: moduleN = '__MODULE_NAME__'" } - - -def replaceWords(infile, outfile, replacements=repl, - specialReplacements=specialRepl, - logFile=sys.stderr): - """Replaces the words in infile writing the output to outfile. - - replacements is a dictionary with the words to replace. - specialReplacements is a dictionary with general regexp replacements. - """ - lineNr = 0 - nonWordRe = re.compile(r"(\W+)") - - while 1: - line = infile.readline() - lineNr = lineNr + 1 - if not line: - break - - if specialReplacements: - for subs in specialReplacements.keys(): - line = subs.sub(specialReplacements[subs], line) - - tokens = nonWordRe.split(line) - for token in tokens: - if token in replacements.keys(): - outfile.write(replacements[token]) - else: - outfile.write(token) -# EOF diff --git a/formatting/selftest.py b/formatting/selftest.py deleted file mode 100644 index 7d8434a..0000000 --- a/formatting/selftest.py +++ /dev/null @@ -1,262 +0,0 @@ - -content = """ -MODULE prettify_selftest - IMPLICIT NONE - PRIVATE - PUBLIC :: dp, test_routine, & - test_function, test_type, str_function - INTEGER, PARAMETER ::dp = SELECTED_REAL_KIND(15, 307) - TYPE test_type - REAL(KIND=dp) :: r = 1.0D-3 - INTEGER :: i - END TYPE test_type - -CONTAINS - - SUBROUTINE test_routine( & - r, i, j, k, l) - INTEGER, INTENT(IN) :: r, i, j, k - INTEGER, INTENT(OUT) :: l - - l = test_function(r, i, j, k) - END & - SUBROUTINE - - PURE FUNCTION test_function(r, i, j, & - k) & - RESULT(l) - INTEGER, INTENT(IN) :: r, i, j, k - INTEGER :: l - - l = r+i+j+k - END FUNCTION - FUNCTION & - str_function(a) RESULT(l) - CHARACTER(LEN=*) :: a - INTEGER :: l - - IF (LEN(a) < 5) THEN - l = 0 - ELSE - l = 1 - ENDIF - END FUNCTION - -END MODULE - -PROGRAM example_prog - USE example, ONLY: dp, test_routine, test_function, test_type,str_function - - IMPLICIT NONE - INTEGER :: r, i, j, k, l, my_integer, m - INTEGER, DIMENSION(5) :: arr - INTEGER, DIMENSION(20) :: big_arr - INTEGER :: ENDIF - TYPE(test_type) :: t - REAL(KIND=dp) :: r1, r2, r3, r4, r5, r6 - INTEGER, POINTER :: point - - point => NULL() - -! 1) white space formatting ! -!***************************! -! example 1.1 - r = 1; i = -2; j = 3; k = 4; l = 5 - r2 = 0.0_dp; r3 = 1.0_dp; r4 = 2.0_dp; r5 = 3.0_dp; r6 = 4.0_dp - r1 = -(r2**i*(r3+r5*(-r4)-r6))-2.e+2 - IF (r .EQ. 2 .AND. r <= 5) i = 3 - WRITE (*, *) (MERGE(3, 1, i <= 2)) - WRITE (*, *) test_function(r, i, j, k) - t%r = 4.0_dp - t%i = str_function("t % i = ") - -! example 1.2 - my_integer = 2 - i = 3 - j = 5 - - big_arr = [1, 2, 3, 4, 5, & - 6, 7, 8, 9, 10, & - 11, 12, 13, 14, 15, & - 16, 17, 18, 19, 20] - -! example 1.3: disabling auto-formatter: - my_integer = 2 !& - i = 3 !& - j = 5 !& - -!&< - my_integer = 2 - i = 3 - j = 5 -!&> - - big_arr = [ 1, 2, 3, 4, 5, & !& - 6, 7, 8, 9, 10, & !& - 11, 12, 13, 14, 15, & !& - 16, 17, 18, 19, 20] !& - -! example 1.4: - - big_arr = [1, 2, 3, 4, 5,& - & 6, 7, 8, 9, 10, & - & 11, 12, 13, 14, 15,& - &16, 17, 18, 19, 20] - -! 2) auto indentation for loops ! -!*******************************! - -! example 2.1 - l = 0 - DO r = 1, 10 - SELECT CASE (r) - CASE (1) - do_label: DO i = 1, 100 - IF (i <= 2) THEN - m = 0 - DO WHILE (m < 4) - m = m+1 - DO k = 1, 3 - IF (k == 1) l = l+1 - END DO - ENDDO - ENDIF - ENDDO do_label - CASE (2) - l = i+j+k - END SELECT - ENDDO - -! example 2.2 - DO m = 1, 2 - DO r = 1, 3 - WRITE (*, *) r - DO k = 1, 4 - DO l = 1, 3 - DO i = 4, 5 - DO my_integer = 1, 1 - DO j = 1, 2 - WRITE (*, *) test_function(m, r, k, l)+i - ENDDO - ENDDO - ENDDO - ENDDO - ENDDO - ENDDO - ENDDO - -! 3) auto alignment for linebreaks ! -!************************************! - -! example 3.1 - l = test_function(1, 2, test_function(1, 2, 3, 4), 4)+3*(2+1) - - l = test_function(1, 2, test_function(1, 2, 3, 4), 4)+ & - 3*(2+1) - - l = test_function(1, 2, & - test_function(1, 2, 3, 4), 4)+ & - 3*(2+1) - - l = test_function(1, 2, & - test_function(1, 2, 3, & - 4), 4)+ & - 3*(2+1) - -! example 3.2 - arr = [1, (/3, 4, 5/), 6]+[1, 2, 3, 4, 5] - - arr = [1, (/3, 4, 5/), & - 6]+[1, 2, 3, 4, 5] - - arr = [1, (/3, 4, 5/), & - 6]+ & - [1, 2, 3, 4, 5] - - arr = [1, (/3, 4, & - 5/), & - 6]+ & - [1, 2, 3, 4, 5] - -! example 3.3 - l = test_function(1, 2, & - 3, 4) - - l = test_function( & - 1, 2, 3, 4) - - arr = [1, 2, & - 3, 4, 5] - arr = [ & - 1, 2, 3, 4, 5] - -! 4) More complex formatting and tricky test cases ! -!**************************************************! - -! example 4.1 - l = 0 - DO r = 1, 10 - SELECT CASE (r) - CASE (1) - DO i = 1, 100; IF (i <= 2) THEN ! comment - DO j = 1, 5 - DO k = 1, 3 - l = l+1 -! unindented comment - ! indented comment - END DO; ENDDO - ELSEIF (.NOT. j == 4) THEN - my_integer = 4 - ELSE - WRITE (*, *) "hello" - ENDIF - ENDDO - CASE (2) - l = i+j+k - END SELECT - ENDDO - -! example 4.2 - IF ( & - l == & - 111) & - THEN - DO k = 1, 2 - IF (k == 1) & - l = test_function(1, & - test_function(r=4, i=5, & - j=6, k=test_function(1, 2*(3*(1+1)), str_function(")a!(b['(;=dfe"), & - 9)+ & - test_function(1, 2, 3, 4)), 9, 10) & - ! test_function(1,2,3,4)),9,10) & - ! +13*str_function('') + str_function('"') - +13*str_function('')+str_function('"') - END & ! comment - ! comment - DO - ENDIF - -! example 4.3 - arr = [1, (/3, 4, & - 5/), & - 6]+ & - [1, 2, 3, 4, 5]; arr = [1, 2, & - 3, 4, 5] - -! example 4.4 - ENDIF = 3 - IF (ENDIF == 2) THEN - ENDIF = 5 - ELSE IF (ENDIF == 3) THEN - WRITE (*, *) ENDIF - ENDIF - -! example 4.5 - DO i = 1, 2; IF (.TRUE.) THEN - WRITE (*, *) "hello" - ENDIF; ENDDO - -END PROGRAM -""" - -# EOF diff --git a/fparse_utils.py b/fparse_utils.py new file mode 100644 index 0000000..4167bef --- /dev/null +++ b/fparse_utils.py @@ -0,0 +1,155 @@ +import re +import string +from collections import deque +try: + from cStringIO import StringIO +except ImportError: + from io import StringIO + +USE_PARSE_RE = re.compile( + r" *use +(?P[a-zA-Z_][a-zA-Z_0-9]*)(?P *, *only *:)? *(?P.*)$", + flags=re.IGNORECASE) +VAR_DECL_RE = re.compile(r" *(?Pinteger(?: *\* *[0-9]+)?|logical|character(?: *\* *[0-9]+)?|real(?: *\* *[0-9]+)?|complex(?: *\* *[0-9]+)?|type) *(?P\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))? *(?P(?: *, *[a-zA-Z_0-9]+(?: *\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))?)+)? *(?P::)?(?P[^\n]+)\n?", re.IGNORECASE) # $ + +OMP_DIR_RE = re.compile(r"^\s*(!\$omp)", re.IGNORECASE) +OMP_RE = re.compile(r"^\s*(!\$)", re.IGNORECASE) + + +class CharFilter(object): + """ + An iterator to wrap the iterator returned by `enumerate` + and ignore comments and characters inside strings + """ + + def __init__(self, it): + self._it = it + self._instring = '' + + def __iter__(self): + return self + + def __next__(self): + """ python 3 version""" + pos, char = next(self._it) + if not self._instring and char == '!': + raise StopIteration + + # detect start/end of a string + if char == '"' or char == "'": + if self._instring == char: + self._instring = '' + elif not self._instring: + self._instring = char + + if self._instring: + return self.__next__() + + return (pos, char) + + def next(self): + """ python 2 version""" + pos, char = self._it.next() + if not self._instring and char == '!': + raise StopIteration + + # detect start/end of a string + if char == '"' or char == "'": + if self._instring == char: + self._instring = '' + elif not self._instring: + self._instring = char + + if self._instring: + return self.next() + + return (pos, char) + + +class InputStream(object): + """ + Class to read logical Fortran lines from a Fortran file. + """ + + def __init__(self, infile): + self.line_buffer = deque([]) + self.infile = infile + self.line_nr = 0 + + def nextFortranLine(self): + """Reads a group of connected lines (connected with &, separated by newline or semicolon) + returns a touple with the joined line, and a list with the original lines. + Doesn't support multiline character constants! + """ + lineRe = re.compile( + # $ + r"(?:(?P#.*\n?)| *(&)?(?P(?:!\$|[^&!\"']+|\"[^\"]*\"|'[^']*')*)(?P&)? *(?P!.*)?\n?)", + re.IGNORECASE) + joinedLine = "" + comments = [] + lines = [] + continuation = 0 + + while 1: + if not self.line_buffer: + line = self.infile.readline().replace("\t", 8 * " ") + self.line_nr += 1 + # convert OMP-conditional fortran statements into normal fortran statements + # but remember to convert them back + is_omp_conditional = False + omp_indent = 0 + if OMP_RE.match(line): + omp_indent = len(line) - len(line.lstrip(' ')) + line = OMP_RE.sub('', line, count=1) + is_omp_conditional = True + line_start = 0 + for pos, char in CharFilter(enumerate(line)): + if char == ';' or pos + 1 == len(line): + self.line_buffer.append(omp_indent * ' ' + '!$' * is_omp_conditional + + line[line_start:pos + 1]) + omp_indent = 0 + is_omp_conditional = False + line_start = pos + 1 + if(line_start < len(line)): + # line + comment + self.line_buffer.append('!$' * is_omp_conditional + + line[line_start:]) + + if self.line_buffer: + line = self.line_buffer.popleft() + + if not line: + break + + lines.append(line) + m = lineRe.match(line) + if not m or m.span()[1] != len(line): + # FIXME: does not handle line continuation of + # omp conditional fortran statements + # starting with an ampersand. + raise SyntaxError("unexpected line format:" + repr(line)) + if m.group("preprocessor"): + if len(lines) > 1: + raise SyntaxError( + "continuation to a preprocessor line not supported " + repr(line)) + comments.append(line) + break + coreAtt = m.group("core") + if OMP_RE.match(coreAtt) and joinedLine.strip(): + # remove omp '!$' for line continuation + coreAtt = OMP_RE.sub('', coreAtt, count=1).lstrip() + joinedLine = joinedLine.rstrip("\n") + coreAtt + if coreAtt and not coreAtt.isspace(): + continuation = 0 + if m.group("continue"): + continuation = 1 + if line.lstrip().startswith('!') and not OMP_RE.search(line): + comments.append(line.rstrip('\n')) + elif m.group("comment"): + comments.append(m.group("comment")) + else: + comments.append('') + if not continuation: + break + return (joinedLine, comments, lines) + + diff --git a/fprettify.py b/fprettify.py index 27683a5..7d04feb 100755 --- a/fprettify.py +++ b/fprettify.py @@ -1,372 +1,1023 @@ #!/usr/bin/env python +""" + Impose white space conventions and indentation based on scopes / subunits + + normalization of white spaces supported for following operators: + - relational operators: + .EQ. .NE. .LT. .LE. .GT. .GE. + == /= < <= > >= + - logical operators: + .AND. .OR. .EQV. .NEQV. + .NOT. + - bracket delimiters + - commas and semicolons: + - arithmetic operators: + * / ** + - + - other operators: + % - (sign) = (function argument) + = (assignment) => (pointer assignment) + + supported criteria for alignment / indentation: + Fortran lines: + - if, else, endif + - do, enddo + - select case, case, end select + - subroutine, end subroutine + - function, end function + - module, end module + - program, end program + - interface, end interface + - type, end type + Actual lines (parts of Fortran lines separated by linebreaks): + - bracket delimiters (.), (/./), and [.] + - assignments by value = and pointer =>. + + LIMITATIONS + - assumes that all subunits are explicitly ended within same file, no treatment of #include statements + - can not deal with f77 constructs (files are ignored) +""" -import sys import re -import tempfile +import sys import os -import os.path import tempfile +from fparse_utils import USE_PARSE_RE, VAR_DECL_RE, InputStream, CharFilter, OMP_RE, OMP_DIR_RE +#========================================================================= +# constants, mostly regular expressions + +RE_FLAGS = re.IGNORECASE # all regex should be case insensitive + +FORTRAN_DEFAULT_ERROR_MESSAGE = " Syntax error - this formatter can not handle invalid Fortran files." +FORMATTER_ERROR_MESSAGE = " Wrong usage of formatting-specific directives '&', '!&', '!&<' or '!&>'." + +EOL_STR = r"\s*;?\s*$" # end of fortran line +EOL_SC = r"\s*;\s*$" # whether line is ended with semicolon +SOL_STR = r"^\s*" # start of fortran line + +F77_STYLE = re.compile(r"^\s*\d", RE_FLAGS) + +# regular expressions for parsing statements that start, continue or end a +# subunit: +IF_RE = re.compile( + SOL_STR + r"(\w+\s*:)?\s*IF\s*\(.+\)\s*THEN" + EOL_STR, RE_FLAGS) +ELSE_RE = re.compile( + SOL_STR + r"ELSE(\s*IF\s*\(.+\)\s*THEN)?" + EOL_STR, RE_FLAGS) +ENDIF_RE = re.compile(SOL_STR + r"END\s*IF(\s+\w+)?" + EOL_STR, RE_FLAGS) + +DO_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*DO(" + EOL_STR + r"|\s)", RE_FLAGS) +ENDDO_RE = re.compile(SOL_STR + r"END\s*DO(\s+\w+)?" + EOL_STR, RE_FLAGS) + +SELCASE_RE = re.compile( + SOL_STR + r"SELECT\s*CASE\s*\(.+\)" + EOL_STR, RE_FLAGS) +CASE_RE = re.compile(SOL_STR + r"CASE\s*(\(.+\)|DEFAULT)" + EOL_STR, RE_FLAGS) +ENDSEL_RE = re.compile(SOL_STR + r"END\s*SELECT" + EOL_STR, RE_FLAGS) + +SUBR_RE = re.compile( + r"^([^\"'!]* )?SUBROUTINE\s+\w+\s*(\(.*\))?" + EOL_STR, RE_FLAGS) +ENDSUBR_RE = re.compile( + SOL_STR + r"END\s*SUBROUTINE(\s+\w+)?" + EOL_STR, RE_FLAGS) + +FCT_RE = re.compile( + r"^([^\"'!]* )?FUNCTION\s+\w+\s*(\(.*\))?(\s*RESULT\s*\(\w+\))?" + EOL_STR, RE_FLAGS) +ENDFCT_RE = re.compile( + SOL_STR + r"END\s*FUNCTION(\s+\w+)?" + EOL_STR, RE_FLAGS) + +MOD_RE = re.compile(SOL_STR + r"MODULE\s+\w+" + EOL_STR, RE_FLAGS) +ENDMOD_RE = re.compile(SOL_STR + r"END\s*MODULE(\s+\w+)?" + EOL_STR, RE_FLAGS) + +TYPE_RE = re.compile( + SOL_STR + r"TYPE(\s*,\s*BIND\s*\(\s*C\s*\))?(\s*::\s*|\s+)\w+" + EOL_STR, RE_FLAGS) +ENDTYPE_RE = re.compile(SOL_STR + r"END\s*TYPE(\s+\w+)?" + EOL_STR, RE_FLAGS) + +PROG_RE = re.compile(SOL_STR + r"PROGRAM\s+\w+" + EOL_STR, RE_FLAGS) +ENDPROG_RE = re.compile( + SOL_STR + r"END\s*PROGRAM(\s+\w+)?" + EOL_STR, RE_FLAGS) + +INTERFACE_RE = re.compile( + r"^([^\"'!]* )?INTERFACE(\s+\w+)?" + EOL_STR, RE_FLAGS) +ENDINTERFACE_RE = re.compile( + SOL_STR + r"END\s*INTERFACE(\s+\w+)?" + EOL_STR, RE_FLAGS) + +CONTAINS_RE = re.compile(SOL_STR + r"CONTAINS" + EOL_STR, RE_FLAGS) + +PUBLIC_RE = re.compile(SOL_STR + r"PUBLIC\s*::", RE_FLAGS) + +# intrinsic statements with parenthesis notation that are not functions +INTR_STMTS_PAR = "(ALLOCATE|DEALLOCATE|REWIND|BACKSPACE|INQUIRE|OPEN|CLOSE|WRITE|READ|FORALL|WHERE|NULLIFY)" + +# regular expressions for parsing linebreaks +LINEBREAK_STR = r"(&)[\s]*(?:!.*)?$" + +# regular expressions for parsing operators +# Note: +/- in real literals and sign operator is ignored +PLUSMINUS_RE = re.compile( + r"(?<=[\w\)\]])(?(?!=)|>=))\s*", RE_FLAGS) +LOG_OP_RE = re.compile(r"\s*(\.(?:AND|OR|EQV|NEQV)\.)\s*", RE_FLAGS) + +# regular expressions for parsing delimiters +DEL_OPEN_STR = r"(\(\/?|\[)" +DEL_OPEN_RE = re.compile(DEL_OPEN_STR, RE_FLAGS) +DEL_CLOSE_STR = r"(\/?\)|\])" +DEL_CLOSE_RE = re.compile(DEL_CLOSE_STR, RE_FLAGS) + +# empty line regex, treats omp statements as exception +EMPTY_RE = re.compile(SOL_STR + r"(![^\$].*)?$", RE_FLAGS) + +# two-sided operators +LR_OPS_RE = [REL_OP_RE, LOG_OP_RE, PLUSMINUS_RE] + +# markups to deactivate formatter +NO_ALIGN_RE = re.compile(SOL_STR + r"&\s*[^\s*]+") + +# combine regex that define subunits +NEW_SCOPE_RE = [IF_RE, DO_RE, SELCASE_RE, SUBR_RE, + FCT_RE, MOD_RE, PROG_RE, INTERFACE_RE, TYPE_RE] +CONTINUE_SCOPE_RE = [ELSE_RE, None, CASE_RE, CONTAINS_RE, + CONTAINS_RE, CONTAINS_RE, CONTAINS_RE, None, None] +END_SCOPE_RE = [ENDIF_RE, ENDDO_RE, ENDSEL_RE, ENDSUBR_RE, + ENDFCT_RE, ENDMOD_RE, ENDPROG_RE, ENDINTERFACE_RE, ENDTYPE_RE] + +#========================================================================= + + +class F90Indenter(object): + """ + Parses encapsulation of subunits / scopes line by line and updates the indentation. + """ + + def __init__(self, filename): + self._scope_storage = [] + self._indent_storage = [0] + self._filename = filename + self._line_indents = [] + self._aligner = F90Aligner(filename) + + def process_lines_of_fline(self, f_line, lines, rel_ind, rel_ind_con, line_nr, manual_lines_indent=None): + """ + Process all lines that belong to a Fortran line `f_line`, impose a relative indent of `rel_ind` for + current Fortran line, and `rel_ind_con` for line continuation. By default line continuations are + auto-aligned by F90Aligner - manual offsets can be set by manual_lines_indents. + """ + + self._line_indents = [0] * len(lines) + br_indent_list = [0] * len(lines) + line_indents = self._line_indents + scopes = self._scope_storage + indents = self._indent_storage + filename = self._filename + + # check statements that start new scope + is_new = False + valid_new = False + + debug = False + + for new_n, newre in enumerate(NEW_SCOPE_RE): + if newre.search(f_line) and not END_SCOPE_RE[new_n].search(f_line): + what_new = new_n + is_new = True + valid_new = True + scopes.append(what_new) + if debug: + sys.stderr.write(f_line + '\n') + break + + # check statements that continue scope + is_con = False + valid_con = False + for con_n, conre in enumerate(CONTINUE_SCOPE_RE): + if conre is not None and conre.search(f_line): + what_con = con_n + is_con = True + if len(scopes) > 0: + what = scopes[-1] + if what == what_con: + valid_con = True + if debug: + sys.stderr.write(f_line + '\n') + break + + # check statements that end scope + is_end = False + valid_end = False + for end_n, endre in enumerate(END_SCOPE_RE): + if endre.search(f_line): + what_end = end_n + is_end = True + if len(scopes) > 0: + what = scopes.pop() + if what == what_end: + valid_end = True + if debug: + sys.stderr.write(f_line + '\n') + break + + # deal with line breaks + if not manual_lines_indent: + self._aligner.process_lines_of_fline( + f_line, lines, rel_ind_con, line_nr) + br_indent_list = self._aligner.get_lines_indent() + else: + br_indent_list = manual_lines_indent + + for pos in range(0, len(lines) - 1): + line_indents[pos + 1] = br_indent_list[pos + 1] + + if is_new: + if not valid_new: + raise SyntaxError(filename + ':' + str(line_nr) + + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + else: + line_indents = [ind + indents[-1] for ind in line_indents] + old_ind = indents[-1] + + rel_ind += old_ind # prevent originally unindented do / if blocks from being indented + indents.append(rel_ind) + + elif is_con: + if not valid_con: + raise SyntaxError(filename + ':' + str(line_nr) + + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + else: + line_indents = [ind + indents[-2] for ind in line_indents] + + elif is_end: + if not valid_end: + raise SyntaxError(filename + ':' + str(line_nr) + + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + else: + line_indents = [ind + indents[-2] for ind in line_indents] + indents.pop() + else: + line_indents = [ind + indents[-1] for ind in line_indents] + + self._line_indents = line_indents + + def get_fline_indent(self): + """ + after processing, retrieve the indentation of the full Fortran line. + """ + return self._indent_storage[-1] + + def get_lines_indent(self): + """ + after processing, retrieve the indents of all line parts. + """ + return self._line_indents + +#========================================================================= + + +class F90Aligner(object): + """ + Alignment of continuations of a broken line, based on the following heuristics + if line break in brackets: We are parsing the level of nesting and align to most inner bracket delimiter. + else if line is an assignment: alignment to '=' or '=>'. + note: assignment operator recognized as any '=' that is not + part of another operator and that is not enclosed in bracket + else if line is a declaration: alignment to '::' + else default indent + """ + + def __init__(self, filename): + self._filename = filename + self.__init_line(0) + + def __init_line(self, line_nr): + self._line_nr = line_nr + self._line_indents = [0] + self._level = 0 + self._br_indent_list = [0] + + def process_lines_of_fline(self, f_line, lines, rel_ind, line_nr): + """ + process all lines that belong to a Fortran line `f_line`, `rel_ind` is the relative indentation size. + """ + + self.__init_line(line_nr) + + is_decl = VAR_DECL_RE.match(f_line) or PUBLIC_RE.match(f_line) + for pos, line in enumerate(lines): + self.__align_line_continuations( + line, is_decl, rel_ind, self._line_nr + pos) + if pos + 1 < len(lines): + self._line_indents.append(self._br_indent_list[-1]) + + if (len(self._br_indent_list) > 2 or self._level): + raise SyntaxError(self._filename + ':' + str(self._line_nr) + + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + + def get_lines_indent(self): + """ + after processing, retrieve the indents of all line parts. + """ + return self._line_indents + + def __align_line_continuations(self, line, is_decl, indent_size, line_nr): + + indent_list = self._br_indent_list + level = self._level + filename = self._filename + + pos_eq = 0 + pos_ldelim = [] + pos_rdelim = [] + ldelim = [] + rdelim = [] + + # find delimiters that are not ended on this line. + # find proper alignment to most inner delimiter + # or alignment to assignment operator + rel_ind = indent_list[-1] # indentation of prev. line + + instring = '' + end_of_delim = -1 + + for pos, char in CharFilter(enumerate(line)): + + what_del_open = None + what_del_close = None + if not pos == end_of_delim: + what_del_open = DEL_OPEN_RE.match(line[pos:pos + 2]) + what_del_close = DEL_CLOSE_RE.match(line[pos:pos + 2]) + + if not instring and what_del_open: + what_del_open = what_del_open.group() + end_of_delim = pos + len(what_del_open) - 1 + level += 1 + indent_list.append(pos + len(what_del_open) + rel_ind) + pos_ldelim.append(pos) + ldelim.append(what_del_open) + if not instring and what_del_close: + what_del_close = what_del_close.group() + end_of_delim = pos + len(what_del_close) - 1 + level += -1 + indent_list.pop() + if level < 0: + raise SyntaxError(filename + ':' + str(line_nr) + + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + if pos_ldelim: + pos_ldelim.pop() + what_del_open = ldelim.pop() + valid = False + if what_del_open == r"(": + valid = what_del_close == r")" + if what_del_open == r"(/": + valid = what_del_close == r"/)" + if what_del_open == r"[": + valid = what_del_close == r"]" + if not valid: + raise SyntaxError( + filename + ':' + str(line_nr) + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + else: + pos_rdelim.append(pos) + rdelim.append(what_del_close) + if not instring and not level: + if not is_decl and char == '=': + if not REL_OP_RE.match(line[max(0, pos - 1):min(pos + 2, len(line))]): + if pos_eq > 0: + raise SyntaxError( + filename + ':' + str(line_nr) + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + is_pointer = line[pos + 1] == '>' + pos_eq = pos + 1 + # don't align if assignment operator directly before + # line break + if not re.search(r"=>?\s*" + LINEBREAK_STR, line, RE_FLAGS): + indent_list.append( + pos_eq + 1 + is_pointer + indent_list[-1]) + elif is_decl and line[pos:pos + 2] == '::': + if not re.search(r"::\s*" + LINEBREAK_STR, line, RE_FLAGS): + indent_list.append(pos + 3 + indent_list[-1]) + + # Don't align if delimiter opening directly before line break + if level and re.search(DEL_OPEN_STR + r"\s*" + LINEBREAK_STR, line, RE_FLAGS): + if len(indent_list) > 1: + indent_list[-1] = indent_list[-2] + else: + indent_list[-1] = 0 + + if not indent_list[-1]: + indent_list[-1] = indent_size + + self._level = level + +#========================================================================= + + +def inspect_ffile_format(infile, indent_size): + """ + Determine indentation by inspecting original Fortran file (mainly for finding + aligned blocks of DO/IF statements). Also check if + it has f77 constructs. + """ + + adopt = indent_size <= 0 + + is_f90 = True + indents = [] + stream = InputStream(infile) + prev_offset = 0 + first_indent = -1 + while 1: + f_line, _, lines = stream.nextFortranLine() + if not lines: + break + + offset = len(lines[0]) - len(lines[0].lstrip(' ')) + if f_line.strip() and first_indent == -1: + first_indent = offset + indents.append(offset - prev_offset) + if not adopt: # do not adopt indentations but impose fixed rel. ind. + # but don't impose indentation for blocked do/if constructs + if prev_offset != offset or (not IF_RE.search(f_line) and not DO_RE.search(f_line)): + indents[-1] = indent_size + prev_offset = offset + + # can not handle f77 style constructs + if F77_STYLE.search(f_line): + is_f90 = False + return indents, first_indent, is_f90 + +#========================================================================= -try: - from hashlib import md5 -except ImportError: - from md5 import new as md5 -from formatting import normalizeFortranFile -from formatting import replacer -from formatting import reformatFortranFile -from formatting import selftest +def format_single_fline(f_line, whitespace, linebreak_pos, ampersand_sep, filename, line_nr, auto_format=True): + """ + format a single Fortran line - imposes white space formatting + and inserts linebreaks. + Takes a logical Fortran line `f_line` as input as well as the positions + of the linebreaks (`linebreak_pos`), and the number of separating whitespace + characters before ampersand (`ampersand_sep`). + `filename` and `line_nr` just for error messages. + The higher `whitespace`, the more white space characters inserted - + whitespace = 0, 1, 2 are currently supported. + auto formatting can be turned off by setting `auto_format` to False. + """ + # define whether to put whitespaces around operators: + # 0: comma, semicolon + # 1: assignment operators + # 2: relational operators + # 3: logical operators + # 4: arithm. operators plus and minus + if whitespace == 0: + spacey = [0, 0, 0, 0, 0] + elif whitespace == 1: + spacey = [1, 1, 1, 1, 0] + elif whitespace == 2: + spacey = [1, 1, 1, 1, 1] + else: + raise NotImplementedError("unknown value for whitespace") + + line = f_line + line_orig = line + + # rm extraneous whitespace chars, except for declarations + line_ftd = '' + pos_prev = -1 + for pos, char in CharFilter(enumerate(line)): + is_decl = line[pos:].lstrip().startswith('::') or line[ + :pos].rstrip().endswith('::') + if char == ' ': + # remove double spaces + if line_ftd and (re.search(r'[\w"]', line_ftd[-1]) or is_decl): + line_ftd = line_ftd + char + else: + if line_ftd and line_ftd[-1] == ' ' and (not re.search(r'[\w"]', char) and not is_decl): + line_ftd = line_ftd[:-1] # remove spaces except between words + line_ftd = line_ftd + line[pos_prev + 1:pos + 1] + pos_prev = pos + line = line_ftd + + pos_eq = [] + end_of_delim = -1 + level = 0 + for pos, char in CharFilter(enumerate(line)): + # offset w.r.t. unformatted line + offset = len(line_ftd) - len(line) + + # format delimiters + what_del_open = None + what_del_close = None + if pos > end_of_delim: + what_del_open = DEL_OPEN_RE.match( + line[pos:pos + 2]) # opening delimiter token + what_del_close = DEL_CLOSE_RE.match( + line[pos:pos + 2]) # closing delimiter token + + if what_del_open or what_del_close: + sep1 = 0 + sep2 = 0 + + if what_del_open: + delim = what_del_open.group() + else: + delim = what_del_close.group() + + lhs = line_ftd[:pos + offset] + rhs = line_ftd[pos + len(delim) + offset:] + + # format opening delimiters + if what_del_open: + level += 1 # new scope + # add separating whitespace before opening delimiter + # with some exceptions: + if (not re.search(r"(" + DEL_OPEN_STR + r"|[\w\*/=\+\-:])\s*$", line[:pos], RE_FLAGS) and + not EMPTY_RE.search(line[:pos])) or \ + re.search(SOL_STR + r"(\w+\s*:)?(ELSE)?\s*IF\s*$", line[:pos], RE_FLAGS) or \ + re.search(SOL_STR + r"(\w+\s*:)?\s*DO\s+WHILE\s*$", line[:pos], RE_FLAGS) or \ + re.search(SOL_STR + r"(SELECT)?\s*CASE\s*", line[:pos], RE_FLAGS) or \ + re.search(r"\b" + INTR_STMTS_PAR + r"\s*$", line[:pos], RE_FLAGS): + sep1 = 1 + + # format closing delimiters + else: + level += -1 # close scope + # add separating whitespace after closing delimiter + # with some exceptions: + if not re.search(r"^\s*(" + DEL_CLOSE_STR + r"|[,%:/\*])", line[pos + 1:], RE_FLAGS): + sep2 = 1 + elif re.search(r"^\s*::", line[pos + 1:], RE_FLAGS): + sep2 = len(rhs) - len(rhs.lstrip(' ')) + + # where delimiter token ends + end_of_delim = pos + len(delim) - 1 -operatorsStr = r"\.(?:and|eqv?|false|g[et]|l[et]|n(?:e(?:|qv)|ot)|or|true)\." + line_ftd = lhs.rstrip(' ') + ' ' * sep1 + \ + delim + ' ' * sep2 + rhs.lstrip(' ') -keywordsStr = "(?:a(?:llocat(?:able|e)|ssign(?:|ment))|c(?:a(?:ll|se)|haracter|lose|o(?:m(?:mon|plex)|nt(?:ains|inue))|ycle)|d(?:ata|eallocate|imension|o(?:|uble))|e(?:lse(?:|if|where)|n(?:d(?:|do|file|if)|try)|quivalence|x(?:it|ternal))|f(?:or(?:all|mat)|unction)|goto|i(?:f|mplicit|n(?:clude|quire|t(?:e(?:ger|nt|rface)|rinsic)))|logical|module|n(?:amelist|one|ullify)|o(?:nly|p(?:en|erator|tional))|p(?:a(?:rameter|use)|ointer|r(?:ecision|i(?:nt|vate)|o(?:cedure|gram))|ublic)|re(?:a[dl]|cursive|sult|turn|wind)|s(?:ave|e(?:lect|quence)|top|ubroutine)|t(?:arget|hen|ype)|use|w(?:h(?:ere|ile)|rite))" + # format commas and semicolons + if char == ',' or char == ';': + lhs = line_ftd[:pos + offset] + rhs = line_ftd[pos + 1 + offset:] + line_ftd = lhs.rstrip(' ') + char + ' ' * \ + spacey[0] + rhs.lstrip(' ') + line_ftd = line_ftd.rstrip(' ') -intrinsic_procStr = r"(?:a(?:bs|c(?:har|os)|djust[lr]|i(?:mag|nt)|ll(?:|ocated)|n(?:int|y)|s(?:in|sociated)|tan2?)|b(?:it_size|test)|c(?:eiling|har|mplx|o(?:njg|sh?|unt)|shift)|d(?:ate_and_time|ble|i(?:gits|m)|ot_product|prod)|e(?:oshift|psilon|xp(?:|onent))|f(?:loor|raction)|huge|i(?:a(?:char|nd)|b(?:clr|its|set)|char|eor|n(?:dex|t)|or|shftc?)|kind|l(?:bound|en(?:|_trim)|g[et]|l[et]|og(?:|10|ical))|m(?:a(?:tmul|x(?:|exponent|loc|val))|erge|in(?:|exponent|loc|val)|od(?:|ulo)|vbits)|n(?:earest|int|ot)|p(?:ack|r(?:e(?:cision|sent)|oduct))|r(?:a(?:dix|n(?:dom_(?:number|seed)|ge))|e(?:peat|shape)|rspacing)|s(?:ca(?:le|n)|e(?:lected_(?:int_kind|real_kind)|t_exponent)|hape|i(?:gn|nh?|ze)|p(?:acing|read)|qrt|um|ystem_clock)|t(?:anh?|iny|r(?:ans(?:fer|pose)|im))|u(?:bound|npack)|verify)(?= *\()" + # format .NOT. + if re.match(r"\.NOT\.", line[pos:pos + 5], RE_FLAGS): + lhs = line_ftd[:pos + offset] + rhs = line_ftd[pos + 5 + offset:] + line_ftd = lhs.rstrip( + ' ') + line[pos:pos + 5] + ' ' * spacey[3] + rhs.lstrip(' ') -ompDir = r"(?:atomic|barrier|c(?:apture|ritical)|do|end|flush|if|master|num_threads|ordered|parallel|read|s(?:ection(?:|s)|ingle)|t(?:ask(?:|wait|yield)|hreadprivate)|update|w(?:orkshare|rite)|!\$omp)" + # strip whitespaces from '=' and prepare assignment operator + # formatting + if char == '=': + if not REL_OP_RE.search(line[pos - 1:pos + 2]): + lhs = line_ftd[:pos + offset] + rhs = line_ftd[pos + 1 + offset:] + line_ftd = lhs.rstrip(' ') + '=' + rhs.lstrip(' ') + if not level: # remember position of assignment operator + pos_eq.append(len(lhs.rstrip(' '))) -ompClause = r"(?:a|co(?:llapse|py(?:in|private))|default|fi(?:nal|rstprivate)|i(?:and|eor|or)|lastprivate|m(?:ax|ergeable|in)|n(?:one|owait)|ordered|private|reduction|shared|untied|\.(?:and|eqv|neqv|or)\.)" + line = line_ftd -ompEnv = r"omp_(?:dynamic|max_active_levels|n(?:ested|um_threads)|proc_bind|s(?:tacksize|chedule)|thread_limit|wait_policy)" + # format assignments + for pos in pos_eq: + offset = len(line_ftd) - len(line) + is_pointer = line[pos + 1] == '>' + lhs = line_ftd[:pos + offset] + rhs = line_ftd[pos + 1 + is_pointer + offset:] + if is_pointer: + assign_op = '=>' # pointer assignment + else: + assign_op = '=' # assignment + line_ftd = lhs.rstrip( + ' ') + ' ' * spacey[1] + assign_op + ' ' * spacey[1] + rhs.lstrip(' ') + # offset w.r.t. unformatted line -# FIXME: does not correctly match operator '.op.' if it is not separated -# by whitespaces. -toUpcaseRe = re.compile("(?" + operatorsStr + - "|" + keywordsStr + "|" + intrinsic_procStr + - ")(?![A-Za-z0-9_%])", flags=re.IGNORECASE) -toUpcaseOMPRe = re.compile("(?" - + ompDir + "|" + ompClause + "|" + ompEnv + - ")(?![A-Za-z0-9_%])", flags=re.IGNORECASE) -linePartsRe = re.compile("(?P[^\"'!]*)(?P!.*)?" + - "(?P(?P[\"']).*?(?P=qchar))?") + line = line_ftd + # for more advanced replacements we separate comments and strings + line_parts = [] + str_end = -1 + instring = '' + for pos, char in enumerate(line): + if char == '"' or char == "'": # skip string + if not instring: + str_start = pos + line_parts.append(line[str_end + 1:str_start]) + instring = char + elif instring == char: + str_end = pos + line_parts.append(line[str_start:str_end + 1]) + instring = '' + if pos == len(line) - 1: + line_parts.append(line[str_end + 1:]) -def upcaseStringKeywords(line): - """Upcases the fortran keywords, operators and intrinsic routines - in line""" - res = "" - start = 0 - while start < len(line): - m = linePartsRe.match(line[start:]) - if not m: - raise SyntaxError("Syntax error, open string") - res = res + toUpcaseRe.sub(lambda match: match.group("toUpcase").upper(), - m.group("commands")) - if m.group("comment"): - res = res + m.group("comment") - if m.group("string"): - res = res + m.group("string") - start = start + m.end() - return res + # Two-sided operators + for n_op, lr_re in enumerate(LR_OPS_RE): + for pos, part in enumerate(line_parts): + if not re.match(r"['\"!]", part, RE_FLAGS): # exclude comments, strings + partsplit = lr_re.split(part) + line_parts[pos] = (' ' * spacey[n_op + 2]).join(partsplit) + line = ''.join(line_parts) -def upcaseOMP(line): - """Upcases OpenMP stuff.""" - return toUpcaseOMPRe.sub(lambda match: match.group("toUpcase").upper(), line) + # format ':' for labels + for newre in NEW_SCOPE_RE[0:2]: + if newre.search(line) and re.search(SOL_STR + r"\w+\s*:", line): + line = ': '.join(_.strip() for _ in line.split(':', 1)) + if not auto_format: + line = line_orig -def upcaseKeywords(infile, outfile, upcase_omp, logFile=sys.stderr): - """Writes infile to outfile with all the fortran keywords upcased""" + # Now it gets messy - we need to shift line break positions from original + # to reformatted line + pos_new = 0 + pos_old = 0 + linebreak_pos.sort(reverse=True) + linebreak_pos_ftd = [] while 1: - line = infile.readline() - if not line: + if pos_new == len(line) or pos_old == len(line_orig): break - line = upcaseStringKeywords(line) - if upcase_omp: - if normalizeFortranFile.ompDirRe.match(line): - line = upcaseOMP(line) - outfile.write(line) - - -def prettifyFile(infile, filename, normalize_use, decl_linelength, decl_offset, - reformat, indent, whitespace, upcase_keywords, - upcase_omp, replace, logFile): - """prettifyes the fortran source in infile into a temporary file that is - returned. It can be the same as infile. - if normalize_use normalizes the use statements (defaults to true) - if upcase_keywords upcases the keywords (defaults to true) - if replace does the replacements contained in replacer.py (defaults - to false) - - does not close the input file""" - ifile = infile - orig_filename = filename - tmpfile = None - max_pretty_iter = 5 - n_pretty_iter = 0 - - while True: - n_pretty_iter += 1 - hash_prev = md5() - hash_prev.update(ifile.read().encode("utf8")) - ifile.seek(0) - try: - if replace: - tmpfile2 = tempfile.TemporaryFile(mode="w+") - replacer.replaceWords(ifile, tmpfile2, logFile=logFile) - tmpfile2.seek(0) - if tmpfile: - tmpfile.close() - tmpfile = tmpfile2 - ifile = tmpfile - if reformat: # reformat needs to be done first - tmpfile2 = tempfile.TemporaryFile(mode="w+") - reformatFortranFile.reformat_ffile(ifile, tmpfile2, logFile=logFile, - indent_size=indent, whitespace=whitespace, - orig_filename=orig_filename) - tmpfile2.seek(0) - if tmpfile: - tmpfile.close() - tmpfile = tmpfile2 - ifile = tmpfile - if normalize_use: - tmpfile2 = tempfile.TemporaryFile(mode="w+") - normalizeFortranFile.rewriteFortranFile(ifile, tmpfile2, indent, - decl_linelength, decl_offset, - logFile, orig_filename=orig_filename) - tmpfile2.seek(0) - if tmpfile: - tmpfile.close() - tmpfile = tmpfile2 - ifile = tmpfile - if upcase_keywords: - tmpfile2 = tempfile.TemporaryFile(mode="w+") - upcaseKeywords(ifile, tmpfile2, upcase_omp, logFile) - tmpfile2.seek(0) - if tmpfile: - tmpfile.close() - tmpfile = tmpfile2 - ifile = tmpfile - hash_next = md5() - hash_next.update(ifile.read().encode("utf8")) - ifile.seek(0) - if hash_prev.digest() == hash_next.digest(): - return ifile - elif n_pretty_iter >= max_pretty_iter: - raise RuntimeError( - "Prettify did not converge in", max_pretty_iter, "steps.") - except: - logFile.write("error processing file '" + infile.name + "'\n") - raise - - -def prettfyInplace(fileName, bkDir=None, stdout=False, **kwargs): - """Same as prettify, but inplace, replaces only if needed""" - - if fileName == 'stdin': - infile = os.tmpfile() + assert line[pos_new] is line_orig[pos_old] + if linebreak_pos and pos_old > linebreak_pos[-1]: + linebreak_pos.pop() + linebreak_pos_ftd.append(pos_new) + continue + if line[pos_new] is line_orig[pos_old]: + pos_new += 1 + while pos_new < len(line) and line[pos_new] is ' ': + pos_new += 1 + pos_old += 1 + while pos_old < len(line_orig) and line_orig[pos_old] is ' ': + pos_old += 1 + elif line[pos_new] is ' ': + pos_new += 1 + elif line_orig[pos_old] is ' ': + pos_old += 1 + else: + assert False + + linebreak_pos_ftd.insert(0, 0) + + # We do not insert ampersands in empty lines and comments lines + lines_out = [line[l:r].rstrip(' ') + ' ' * ampersand_sep[pos] + '&' * min(1, r - l) + for pos, (l, r) in enumerate(zip(linebreak_pos_ftd[0:-1], linebreak_pos_ftd[1:]))] + + lines_out.append(line[linebreak_pos_ftd[-1]:]) + + if level != 0: + raise SyntaxError(filename + ':' + str(line_nr) + + ':' + FORTRAN_DEFAULT_ERROR_MESSAGE) + + return lines_out + +#========================================================================= + + +def reformat_inplace(filename, stdout=False, **kwargs): + if filename == 'stdin': + infile = tempfile.TemporaryFile(mode='r+') infile.write(sys.stdin.read()) else: - infile = open(fileName, 'r') + infile = open(filename, 'r') if stdout: - outfile = prettifyFile(infile=infile, filename=fileName, **kwargs) - outfile.seek(0) - sys.stdout.write(outfile.read()) - outfile.close() + newfile = tempfile.TemporaryFile(mode='r+') + reformat_ffile(infile=infile, outfile=newfile, **kwargs) + newfile.seek(0) + sys.stdout.write(newfile.read()) return + else: + outfile = tempfile.TemporaryFile(mode='r+') + reformat_ffile(infile=infile, outfile=outfile, **kwargs) + infile.close() + outfile.seek(0) + newfile = open(filename, 'w') + newfile.write(outfile.read()) - if bkDir and not os.path.exists(bkDir): - os.mkdir(bkDir) - if bkDir and not os.path.isdir(bkDir): - raise Error("bk-dir must be a directory, was " + bkDir) - outfile = prettifyFile(infile=infile, filename=fileName, **kwargs) - if (infile == outfile): - return +def reformat_ffile(infile, outfile, logFile=sys.stderr, indent_size=3, whitespace=2, orig_filename=None): + """ + main method to be invoked for formatting a Fortran file. + """ + debug = False + + # don't change original indentation if rel-indents set to 0 + adopt_indents = indent_size <= 0 + + if not orig_filename: + orig_filename = infile.name + + indenter = F90Indenter(orig_filename) + + infile.seek(0) + req_indents, first_indent, is_f90 = inspect_ffile_format( + infile, indent_size) infile.seek(0) - outfile.seek(0) - same = 1 + + if not is_f90: + logFile.write("*** " + orig_filename + + ": formatter can not handle f77 constructs. ***\n") + outfile.write(infile.read()) # does not handle f77 constructs + return + + nfl = 0 # fortran line counter + + do_indent = True + use_same_line = False + stream = InputStream(infile) + skip_blank = False + in_manual_block = False while 1: - l1 = outfile.readline() - l2 = infile.readline() - if (l1 != l2): - same = 0 + f_line, comments, lines = stream.nextFortranLine() + if not lines: break - if not l1: - break - if (not same): - bkFile = None - if bkDir: - bkName = os.path.join(bkDir, os.path.basename(fileName)) - bName = bkName - i = 0 - while os.path.exists(bkName): - i += 1 - bkName = bName + "." + str(i) - bkFile = file(bkName, "w") - infile.seek(0) - if bkFile: - bkFile.write(infile.read()) - bkFile.close() - outfile.seek(0) - newFile = file(fileName, 'w') - newFile.write(outfile.read()) - newFile.close() - infile.close() - outfile.close() + + comment_lines = [] + for line, comment in zip(lines, comments): + has_comment = bool(comment.strip()) + sep = has_comment and not comment.strip() == line.strip() + if line.strip(): # empty lines between linebreaks are ignored + comment_lines.append(' ' * sep + comment.strip()) + + orig_lines = lines + nfl += 1 + + auto_align = not any(NO_ALIGN_RE.search(_) for _ in lines) + auto_format = not (in_manual_block or any( + _.lstrip().startswith('!&') for _ in comment_lines)) + if not auto_format: + auto_align = False + if (len(lines)) == 1: + valid_directive = True + if lines[0].strip().startswith('!&<'): + if in_manual_block: + valid_directive = False + else: + in_manual_block = True + if lines[0].strip().startswith('!&>'): + if not in_manual_block: + valid_directive = False + else: + in_manual_block = False + if not valid_directive: + raise SyntaxError(orig_filename + ':' + str(stream.line_nr) + + ':' + FORMATTER_ERROR_MESSAGE) + + indent = [0] * len(lines) + + is_omp_conditional = False + + if OMP_RE.match(f_line) and not OMP_DIR_RE.match(f_line): + # convert OMP-conditional fortran statements into normal fortran statements + # but remember to convert them back + f_line = OMP_RE.sub(' ', f_line, count=1) + lines = [OMP_RE.sub(' ', l, count=1) for l in lines] + is_omp_conditional = True + + is_empty = EMPTY_RE.search(f_line) # blank line or comment only line + + if USE_PARSE_RE.match(f_line): + do_indent = False + elif OMP_DIR_RE.match(f_line): + # move '!$OMP' to line start, otherwise don't format omp directives + lines = ['!$OMP' + (len(l) - len(l.lstrip())) * + ' ' + OMP_DIR_RE.sub('', l, count=1) for l in lines] + do_indent = False + elif lines[0].startswith('#'): # preprocessor macros + assert len(lines) == 1 + do_indent = False + elif EMPTY_RE.search(f_line): # empty lines including comment lines + assert len(lines) == 1 + if any(comments): + if lines[0].startswith('!'): + # don't indent unindented comments + do_indent = False + else: + indent[0] = indenter.get_fline_indent() + elif skip_blank: + continue + else: + do_indent = False + + lines = [l.strip(' ') for l in lines] + else: + + manual_lines_indent = [] + if not auto_align: + manual_lines_indent = [ + len(l) - len(l.lstrip(' ').lstrip('&')) for l in lines] + manual_lines_indent = [ind - manual_lines_indent[0] + for ind in manual_lines_indent] + + # ampersands at line starts are remembered (pre_ampersand) and recovered later; + # define the desired number of separating whitespaces before ampersand at line end (ampersand_sep): + # - insert one whitespace character before ampersand as default formatting + # - don't do this if next line starts with an ampersand but remember the original formatting + # this "special rule" is necessary since ampersands starting a line can be used to break literals, + # so inserting a whitespace in this case leads to invalid syntax. + + pre_ampersand = [] + ampersand_sep = [] + sep_next = None + for pos, line in enumerate(lines): + m = re.search(SOL_STR + r'(&\s*)', line) + if m: + pre_ampersand.append(m.group(1)) + sep = len( + re.search(r'(\s*)&[\s]*(?:!.*)?$', lines[pos - 1]).group(1)) + ampersand_sep.append(sep) + else: + pre_ampersand.append('') + if pos > 0: + ampersand_sep.append(1) + + lines = [l.strip(' ').strip('&') for l in lines] + f_line = f_line.strip(' ') + + # find linebreak positions + linebreak_pos = [] + for pos, line in enumerate(lines): + found = None + for char_pos, char in CharFilter(enumerate(line)): + if char == "&": + found = char_pos + if found: + linebreak_pos.append(found) + elif line.lstrip(' ').startswith('!'): + linebreak_pos.append(0) + + linebreak_pos = [sum(linebreak_pos[0:_ + 1]) - + 1 for _ in range(0, len(linebreak_pos))] + + lines = format_single_fline( + f_line, whitespace, linebreak_pos, ampersand_sep, orig_filename, stream.line_nr, auto_format) + + # we need to insert comments in formatted lines + for pos, (line, comment) in enumerate(zip(lines, comment_lines)): + if pos < len(lines) - 1: + has_nl = True + else: + has_nl = not re.search(EOL_SC, line) + lines[pos] = lines[pos].rstrip(' ') + comment + '\n' * has_nl + + try: + rel_indent = req_indents[nfl] + except IndexError: + rel_indent = 0 + + indenter.process_lines_of_fline( + f_line, lines, rel_indent, indent_size, stream.line_nr, manual_lines_indent) + indent = indenter.get_lines_indent() + + # recover ampersands at line start + for pos, line in enumerate(lines): + amp_insert = pre_ampersand[pos] + if amp_insert: + indent[pos] += -1 + lines[pos] = amp_insert + line + + lines = [re.sub(r"\s+$", '\n', l, RE_FLAGS) + for l in lines] # deleting trailing whitespaces + + for ind, line, orig_line in zip(indent, lines, orig_lines): + # get actual line length excluding comment: + line_length = 0 + for line_length, _ in CharFilter(enumerate(line)): + pass + line_length += 1 + + if do_indent: + ind_use = ind + first_indent + else: + if use_same_line: + ind_use = 1 + else: + ind_use = 0 + if ind_use + line_length <= 133: # 132 plus 1 newline char + outfile.write('!$' * is_omp_conditional + ' ' * + (ind_use - 2 * is_omp_conditional + + len(line) - len(line.lstrip(' '))) + line.lstrip(' ')) + elif line_length <= 133: + outfile.write('!$' * is_omp_conditional + ' ' * + (133 - 2 * is_omp_conditional - + len(line.lstrip(' '))) + line.lstrip(' ')) + logFile.write("*** " + orig_filename + ":" + str(stream.line_nr) + + ": auto indentation failed due to 132 chars limit, line should be splitted. ***\n") + else: + outfile.write(orig_line) + logFile.write("*** " + orig_filename + ":" + str(stream.line_nr) + + (": auto indentation and whitespace formatting failed due to 132 chars limit, line should be splitted. ***\n")) + if debug: + sys.stderr.write(' ' * ind_use + line + '\n') + + # no indentation of semicolon separated lines + if re.search(r";\s*$", f_line, RE_FLAGS): + do_indent = False + use_same_line = True + else: + do_indent = True + use_same_line = False + + # rm subsequent blank lines + skip_blank = is_empty and not any(comments) def main(argv=None): if argv is None: argv = sys.argv - defaultsDict = {'upcase': 1, 'normalize-use': 1, 'omp-upcase': 1, - 'decl-linelength': 100, 'decl-offset': 50, - 'reformat': 1, 'indent': 3, 'whitespace': 1, - 'replace': 1, - 'stdout': 0, - 'do-backup': 0, - 'backup-dir': 'preprettify', - 'report-errors': 1} - - usageDesc = ("usage:\nfprettify" +""" - [--[no-]upcase] [--[no-]normalize-use] [--[no-]omp-upcase] [--[no-]replace] - [--[no-]reformat] [--indent=3] [--whitespace=1] [--help] - [--[no-]stdout] [--[no-]do-backup] [--backup-dir=bk_dir] [--[no-]report-errors] file1 [file2 ...] + defaultsDict = {'indent': 3, 'whitespace': 2, + 'stdout': 0, 'report-errors': 1} + + usageDesc = ("usage:\n" + argv[0] + """ + [--indent=3] [--whitespace=2] + [--[no-]stdout] [--[no-]report-errors] file1 [file2 ...] + [--help] Auto-format F90 source file1, file2, ...: If no files are given, stdin is used. - --normalize-use - Sorting and alignment of variable declarations and USE statements, removal of unused list entries. - The line length of declarations is controlled by --decl-linelength=n, the offset of the variable list - is controlled by --decl-offset=n. - --reformat Auto-indentation, auto-alignment and whitespace formatting. - Amount of whitespace controlled by --whitespace = 0, 1, 2. + Amount of whitespace controlled by --whitespace=0,1,2. For indenting with a relative width of n columns specify --indent=n. For manual formatting of specific lines: - disable auto-alignment by starting line continuation with an ampersand '&'. - completely disable reformatting by adding a comment '!&'. For manual formatting of a code block, use: - start a manually formatted block with a '!&<' comment and close it with a '!&>' comment. - --upcase - Upcasing fortran keywords. - --omp-upcase - Upcasing OMP directives. - --replace - If requested the replacements performed by the replacer.py script are also performed. Note: these replacements are specific to CP2K. --stdout write output to stdout - --[no-]do-backup - store backups of original files in backup-dir (--backup-dir option) --[no-]report-errors report warnings and errors - Note: for editor integration, use options --no-normalize-use --no-report-errors - Defaults: """ + str(defaultsDict)) - replace = None if "--help" in argv: sys.stderr.write(usageDesc + '\n') return(0) args = [] for arg in argv[1:]: m = re.match( - r"--(no-)?(normalize-use|upcase|omp-upcase|replace|reformat|stdout|do-backup|report-errors)", arg) + r"--(no-)?(stdout|report-errors)", arg) if m: defaultsDict[m.groups()[1]] = not m.groups()[0] else: m = re.match( - r"--(indent|whitespace|decl-linelength|decl-offset)=(.*)", arg) + r"--(indent|whitespace)=(.*)", arg) if m: defaultsDict[m.groups()[0]] = int(m.groups()[1]) else: - m = re.match(r"--(backup-dir)=(.*)", arg) - if m: - path = os.path.abspath(os.path.expanduser(m.groups()[1])) - defaultsDict[m.groups()[0]] = path + if arg.startswith('--'): + sys.stderr.write('unknown option ' + arg + '\n') else: - if arg.startswith('--'): - sys.stderr.write('unknown option ' + arg + '\n') - else: - args.append(arg) - bkDir = '' - if defaultsDict['do-backup']: - bkDir = defaultsDict['backup-dir'] - if bkDir and not os.path.exists(bkDir): - # Another parallel running instance might just have created the - # dir. - try: - os.mkdir(bkDir) - except: - assert(os.path.exists(bkDir)) - if bkDir and not os.path.isdir(bkDir): - sys.stderr.write("bk-dir must be a directory" + '\n') - sys.stderr.write(usageDesc + '\n') - else: - failure = 0 - if not args: - args = ['stdin'] - for fileName in args: - if not os.path.isfile(fileName) and not fileName == 'stdin': - sys.stderr.write("file " + fileName + " does not exists!\n") - else: - stdout = defaultsDict['stdout'] or fileName == 'stdin' - try: - logFile = sys.stderr if defaultsDict[ - 'report-errors'] else open(os.devnull, "w") - prettfyInplace(fileName, bkDir=bkDir, - stdout=stdout, - logFile=logFile, - normalize_use=defaultsDict[ - 'normalize-use'], - decl_linelength=defaultsDict[ - 'decl-linelength'], - decl_offset=defaultsDict[ - 'decl-offset'], - reformat=defaultsDict['reformat'], - indent=defaultsDict['indent'], - whitespace=defaultsDict[ - 'whitespace'], - upcase_keywords=defaultsDict[ - 'upcase'], - upcase_omp=defaultsDict[ - 'omp-upcase'], - replace=defaultsDict['replace']) - except: - failure += 1 - import traceback - sys.stderr.write('-' * 60 + "\n") - traceback.print_exc(file=sys.stderr) - sys.stderr.write('-' * 60 + "\n") - sys.stderr.write( - "Processing file '" + fileName + "'\n") - return(failure > 0) - -#========================================================================= + args.append(arg) + failure = 0 + if not args: + args = ['stdin'] + for filename in args: + if not os.path.isfile(filename) and not filename == 'stdin': + sys.stderr.write("file " + filename + " does not exists!\n") + else: + stdout = defaultsDict['stdout'] or filename == 'stdin' + try: + logFile = sys.stderr if defaultsDict[ + 'report-errors'] else open(os.devnull, "w") + reformat_inplace(filename, + stdout=stdout, + logFile=logFile, + indent_size=defaultsDict['indent'], + whitespace=defaultsDict['whitespace']) + except: + failure += 1 + import traceback + sys.stderr.write('-' * 60 + "\n") + traceback.print_exc(file=sys.stderr) + sys.stderr.write('-' * 60 + "\n") + sys.stderr.write( + "Processing file '" + filename + "'\n") + return(failure > 0) def run_selftest(): - # create temporary file with example code - fn = os.path.join(tempfile.gettempdir(), "prettify_selftest.F") - ref = selftest.content - f = open(fn, "w") - f.write(ref) - f.close() - - # call prettify - rtn = main([sys.argv[0], fn]) + infile = open('examples/fortran_before.f90', mode='r') + outfile = tempfile.NamedTemporaryFile(mode='r+') + outfile.write(infile.read()) + outfile.seek(0) + + rtn = main([sys.argv[0], outfile.name]) assert(rtn == 0) - # check if file was altered - result = open(fn).read() - for i, (l1, l2) in enumerate(zip(result.split("\n"), ref.split("\n"))): + ref = open('examples/fortran_after.f90', 'r').read() + result = outfile.read() + for i, (l1, l2) in enumerate(zip(result.split('\n'), ref.split('\n'))): if(l1 != l2): print("Error: Line %d is not invariant." % i) print("before: " + l1) print("after : " + l2) - os.remove(fn) return(1) - os.remove(fn) print("Prettify selftest passed.") return(0) #========================================================================= + if(__name__ == '__main__'): if(len(sys.argv) == 2 and sys.argv[-1] == "--selftest"): rtn = run_selftest() @@ -374,4 +1025,14 @@ def run_selftest(): rtn = main() sys.exit(rtn) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + # EOF diff --git a/setup.py b/setup.py index 7a84a46..b035216 100755 --- a/setup.py +++ b/setup.py @@ -7,9 +7,5 @@ author='Patrick Seewald, Ole Schuett, Mohamed Fawzi', license = 'GPL', entry_points={'console_scripts': ['fprettify = fprettify:main']}, - py_modules=['fprettify', - 'formatting.normalizeFortranFile', - 'formatting.reformatFortranFile', - 'formatting.replacer', - 'formatting.selftest'] + py_modules=['fprettify','fparse_utils'] )