Skip to content

Commit ebcf3ca

Browse files
committed
Added a script called nrofftomdconversion.py that will convert nroff files to md files with only occasional hiccups. Note: need to install pandoc to run the script
Signed-off-by: Colton Kammes <[email protected]>
1 parent ee3bd58 commit ebcf3ca

File tree

1 file changed

+352
-0
lines changed

1 file changed

+352
-0
lines changed

contrib/nroff-to-md-conversion.py

Lines changed: 352 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,352 @@
1+
'''
2+
Usage information:
3+
The script should be placed in the same folder as the one containing the man pages.
4+
The first line after the imports specifies which folder contains the man pages,
5+
the program will go through this folder and look for the file/files and convert
6+
them into a markdown format.
7+
8+
The two options to run the script are:
9+
python3 nroff-to-md-conversion.py -f myfile.3in
10+
converts a single file in the subdirectory from nroff to markdown
11+
12+
python3 nroff-to-md-conversion.py --all
13+
converts every file in the subdirectory from nroff to markdown
14+
'''
15+
16+
import os # os.system(command), os.chdir(folder)
17+
import argparse
18+
import re
19+
20+
os.chdir("man3")
21+
22+
# determine what format of the seealso is outputted
23+
newLinks = True
24+
25+
parser = argparse.ArgumentParser()
26+
parser.add_argument('-f', dest='file', type=str, help='.3in file to convert to markdown')
27+
parser.add_argument('--all', dest='convertAll', action="store_true", default=False, help='option to batch convert all files in subdirectory')
28+
args = parser.parse_args()
29+
30+
# writes the lines list to the file
31+
def writeLines(lines, filename):
32+
# print them
33+
# for line in lines:
34+
# print(line, end="")
35+
# print()
36+
37+
# write them
38+
with open(filename, "w") as fh:
39+
for line in lines:
40+
fh.write(line)
41+
42+
# print("Wrote file:",filename)
43+
44+
# helper for adjustMarkdown
45+
def allUpper(line):
46+
line = line.rstrip()
47+
48+
# check if line is empty, if so, return false
49+
if len(line) == 0:
50+
return False
51+
#Some titles have punctuations like '-' in them
52+
noPuncLine = ''
53+
for letter in line:
54+
if letter.isalpha():
55+
noPuncLine+=letter
56+
# nromal operation
57+
for letter in noPuncLine:
58+
if not letter.isupper():
59+
return False
60+
61+
return True
62+
63+
# figure out what text to add to links in the see also section
64+
def addLink(mpiFile):
65+
line = ""
66+
67+
# print(mpiFile)
68+
69+
mpiFile = mpiFile.rstrip()
70+
71+
if " " in mpiFile:
72+
mpiFile = mpiFile.replace(" ", "")
73+
74+
if "\\" in mpiFile:
75+
mpiFile = mpiFile.replace("\\", "")
76+
77+
if newLinks:
78+
# Format: [`MPI_Bcast`(3)](./?file=MPI_Bcast.md)
79+
line = "[`{}(3)`](./?file={}.md)\n".format(mpiFile, mpiFile)
80+
81+
else:
82+
# Format: [`MPI_Bcast`(3)](MPI_Bcast.html)
83+
line = "[`{}(3)`]({}.html)\n".format(mpiFile, mpiFile)
84+
85+
return line
86+
87+
# helper for adjustMarkdown
88+
def startOfCodeBlock(line):
89+
if 'C' in line:
90+
return "```c\n"
91+
elif 'Fortran' in line:
92+
return "```fortran\n"
93+
94+
#Add appropriate `` around function names and parameters
95+
def adjustWords(words):
96+
for index in range(len(words)):
97+
last_mark = ''
98+
#check function names
99+
if '_' in words[index]:
100+
#Move the punctuation out of ``
101+
if(words[index][len(words[index])-1].isalnum()==False):
102+
last_mark = words[index][len(words[index])-1]
103+
words[index]=words[index][0:len(words[index])-1]
104+
words[index]='`{}`'.format(words[index])
105+
#check parameters
106+
elif words[index][0]=='*' and words[index][len(words[index])-1] == '*':
107+
if(words[index][len(words[index])-2].isalnum()==False):
108+
last_mark = words[index][len(words[index])-2]
109+
words[index]=words[index][0:len(words[index])-2]+words[index][len(words[index])-1:]
110+
words[index]=words[index].replace('*','`')
111+
#Delete unnecassary escape signs
112+
elif '\\' in words[index]:
113+
words[index]=words[index].replace('\\','')
114+
words[index]+=last_mark
115+
line = (' ').join(words)
116+
return line
117+
118+
# adds newline inside the code block if necessary
119+
def checkBreak(line):
120+
editedLine = ""
121+
# check beginning of c
122+
if "#include" in line:
123+
editedLine += "\n"
124+
# check beginning of fortran
125+
elif "USE MPI" in line:
126+
editedLine += "\n"
127+
# check beginning of fortran2008
128+
elif "USE mpi_f08" in line:
129+
editedLine += "\n"
130+
# check beginning of function in c
131+
elif " MPI_" in line:
132+
editedLine += "\n"
133+
# check beginning of function in both fortrans
134+
elif "MPI_" in line and not ':' in line:
135+
editedLine += "\n"
136+
137+
138+
# add line and return
139+
editedLine += line
140+
return editedLine
141+
142+
# reads a markdown file and calls helper function processLine to process the markdown file further
143+
def adjustMarkdown(filename):
144+
workingLines = []
145+
newLines = []
146+
fixedWidthWords = []
147+
148+
with open(filename, "r") as fh:
149+
for line in fh.readlines():
150+
workingLines.append(line)
151+
152+
inCodeBlock = False
153+
addText = False
154+
parameterLine = False
155+
#check whether it is in the name section
156+
name = False
157+
#Normal text section includes all sections except for parameterLine and Syntax
158+
normalText = False
159+
seeAlso = False
160+
for i in range(1, len(workingLines)):
161+
line = ""
162+
163+
#delete unnecassary blank lines
164+
if workingLines[i].isspace():
165+
continue
166+
# titles
167+
elif "====" in workingLines[i]:
168+
if (inCodeBlock):
169+
newLines.append("```\n")
170+
newLines.append('\n')
171+
inCodeBlock = False
172+
173+
addText = False
174+
175+
# if all caps, then heading 2
176+
if allUpper(workingLines[i-1]):
177+
if "SEE ALSO" in workingLines[i-1]:
178+
seeAlso = True
179+
#add a new line after each title
180+
if workingLines[i-1] != "NAME\n":
181+
line+='\n'
182+
line+= '# ' + workingLines[i-1].title()+'\n'
183+
184+
#Mark that this is a normal section
185+
if 'Syntax' not in line and 'Parameter' not in line:
186+
normalText = True
187+
else:
188+
normalText = False
189+
# else, heading 2
190+
else:
191+
line = '## ' + workingLines[i-1].title()+'\n'
192+
193+
# indented blocks
194+
elif " " in workingLines[i] and not normalText:
195+
# start code block
196+
inCodeBlock = True
197+
if len(newLines) > 1:
198+
if "##" in newLines[len(newLines)-1]:
199+
newLines.append(startOfCodeBlock(newLines[len(newLines)-1]))
200+
line = workingLines[i][4:]
201+
202+
else:
203+
# line = workingLines[i][4:]
204+
line = checkBreak(workingLines[i][4:])
205+
#When changing a new line in a code block, use six spaces instead of a tab
206+
if(line[0]=='\t'):
207+
line = ' '+line[1:]
208+
else:
209+
print("HERE")
210+
line = "-----------HERE----------------"
211+
212+
# non-indented blocks
213+
# check to make sure not going out of bounds
214+
elif i + 2 < len(workingLines):
215+
# get name at beginning
216+
if "**" in workingLines[i]:
217+
# line += "`"
218+
for letter in workingLines[i]:
219+
if letter != "*":
220+
line += letter
221+
# line += "`"
222+
223+
# handle ':' sections
224+
elif workingLines[i+2][0] == ':':
225+
parameterLine = True
226+
line += '* `' # ticks will not be added later
227+
# line += '* '
228+
line += workingLines[i].rstrip()
229+
line += '`'
230+
line += ' : '
231+
line += workingLines[i+2][4:]
232+
# add word to go through other lines and syntax highlight later
233+
fixedWidthWords.append(workingLines[i].rstrip())
234+
235+
# text blocks below description and errors
236+
elif len(newLines)>2:
237+
#If the text is not in a paramter or syntax section, add text
238+
if normalText:
239+
addText=True
240+
241+
# filter headers and blank lines
242+
if addText and not allUpper(workingLines[i]):
243+
# create see also links
244+
if workingLines[i][len(workingLines[i]) - 2] == '\\':
245+
# Format: [`MPI_Bcast`(3)](MPI_Bcast.html)
246+
# TODO: Make a regex find for 2 'MPI_' in the same line - if so, add 2 different lines
247+
print('HERE: ',re.findall('MPI_'),line)
248+
if len(re.findall('MPI_')) > 1:
249+
print("split lines")
250+
else:
251+
line = addLink(workingLines[i])
252+
253+
seeAlso = True
254+
255+
# normal text
256+
else:
257+
line = workingLines[i]
258+
#if a normal text is under name section, also add it to newLines
259+
elif(normalText and workingLines[i].isupper()==False):
260+
261+
line = workingLines[i]
262+
263+
264+
else:
265+
line = workingLines[i]
266+
267+
# #adjust words for each line
268+
try:
269+
# make sure not in a code block
270+
if not inCodeBlock and not parameterLine and not seeAlso:
271+
line = adjustWords(line.split(' '))
272+
except:
273+
#if the line only has one word, skip this line
274+
pass
275+
276+
277+
# make things in fixedWidthWords fixed-width font if needed
278+
if not inCodeBlock and not parameterLine and not seeAlso:
279+
# check if any of the words are in the line
280+
for word in fixedWidthWords:
281+
wordAndBuffer = ' ' + word + ' ' # adds spaces around to prevent things like `comm`unicator
282+
# go through the line
283+
if wordAndBuffer in line:
284+
line = line.replace(word, '`' + word + '`')
285+
286+
# replace any remaining tabs with spaces
287+
if "\t" in line:
288+
# print("replacing tab")
289+
line = line.replace("\t", " ")
290+
291+
# remove any unwanted backslashes
292+
if "\\" in line:
293+
line = line.replace("\\", "")
294+
295+
# get rid of all * characters that aren't required <- doesn't work if there are code blocks in the description
296+
if not inCodeBlock and not parameterLine and "*" in line:
297+
line = line.replace("*", "")
298+
299+
if seeAlso and "MPI_" in workingLines[i]:
300+
# line = addLink(workingLines[i][:-2])
301+
if len(re.findall('MPI_', line)) > 1:
302+
# print('HERE: ',re.findall('(MPI_[a-zA-Z_]+)', line),line)
303+
toAdd = re.findall('(MPI_[a-zA-Z_]+)', line)
304+
305+
for i in range(1, len(toAdd)):
306+
newLines.append(addLink(toAdd[i]))
307+
# print("split lines")
308+
309+
line = addLink(toAdd[0])
310+
311+
else:
312+
line = addLink(workingLines[i])
313+
314+
315+
# finally, add line
316+
if(line):
317+
newLines.append(line)
318+
319+
# at the end of the line, reset the line tag for the next iteration
320+
parameterLine = False
321+
322+
return newLines
323+
324+
def runPandoc(file):
325+
execLine = "pandoc {} -f man -t markdown -s -o {}".format(file, file[:-3]+"md")
326+
# print("Running:", execLine)
327+
os.system(execLine)
328+
329+
330+
'''
331+
Calls all methods to convert from .3in to md
332+
'''
333+
def convert(nroffFilename):
334+
mdFilename = nroffFilename[:-3]+"md"
335+
336+
runPandoc(nroffFilename)
337+
lines = adjustMarkdown(mdFilename)
338+
writeLines(lines, mdFilename)
339+
340+
def convertAll():
341+
for filename in os.listdir():
342+
if ".3in" in filename:
343+
try:
344+
convert(filename)
345+
except:
346+
print("Couldn't convert", filename)
347+
348+
if (args.convertAll):
349+
convertAll()
350+
else:
351+
convert(args.file)
352+

0 commit comments

Comments
 (0)