summaryrefslogtreecommitdiff
path: root/pygnulib
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2022-08-07 22:53:32 +0200
committerBruno Haible <bruno@clisp.org>2022-08-07 23:04:30 +0200
commit81b8c4d5565dbbea10eb3561063d2e8da52148d7 (patch)
tree2e1054c688f7d41bf10a9ea2527f9438b1979e74 /pygnulib
parent586000e597d4ef7cba8de869a15ad5c922a2010c (diff)
downloadgnulib-81b8c4d5565dbbea10eb3561063d2e8da52148d7.tar.gz
gnulib-tool.py: Fix section extraction from module descriptions.
The code with self.content.split(section)[-1] was broken because it recognizes an indented section label. Similar code with ('\n' + self.content).split('\n' + section)[-1] would still be broken because it recognizes an indented section label in the first line of the file. The code with section_label_regex was broken because sometimes it returns the second-to-last section with the given label, not the last one. Also, whitespace after the colon was not ignored. * pygnulib/GLModuleSystem.py (GLModule.__init__): Dissect the module description's contents immediately, once only, in a reliable way. (GLModule.getDescription, GLModule.getComment): Simplify. (GLModule.getStatus): Simplify. Return a string. (GLModule.getStatuses): New function. Return a list. (GLModule.getNotice, GLModule.getApplicability, GLModule.getFiles, GLModule.getDependencies, GLModules.getAutoconfSnippet_Early, GLModules.getAutoconfSnippet, GLModule.getAutomakeSnippet_Conditional, GLModule.getInclude, GLModule.getLink, GLModule.getLicense_Raw): Simplify. (GLModule.getLicense): Remove whitespace after calling getLicense_Raw. (GLModule.getMaintainer): Simplify. (GLModuleTable.transitive_closure): Call getStatuses() instead of getStatus(). * pygnulib/GLEmiter.py: Likewise. * gnulib-tool.py (main): For --extract-description, --extract-comment, --extract-status, --extract-notice, --extract-autoconf-snippet, --extract-automake-snippet, --extract-include-directive, --extract-link-directive, --extract-maintainer, don't add an extra newline after the snippet.
Diffstat (limited to 'pygnulib')
-rw-r--r--pygnulib/GLEmiter.py4
-rw-r--r--pygnulib/GLModuleSystem.py387
2 files changed, 100 insertions, 291 deletions
diff --git a/pygnulib/GLEmiter.py b/pygnulib/GLEmiter.py
index e0164ed7cc..61cabc92fa 100644
--- a/pygnulib/GLEmiter.py
+++ b/pygnulib/GLEmiter.py
@@ -974,9 +974,9 @@ AC_DEFUN([%V1%_LIBSOURCES], [
# Skip the contents if it's entirely empty.
if snippet.strip():
# Check status of the module.
- status = module.getStatus()
+ statuses = module.getStatuses()
islongrun = False
- for word in status:
+ for word in statuses:
if word == 'longrunning-test':
islongrun = True
break
diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py
index ec2ff0c35c..32a133009a 100644
--- a/pygnulib/GLModuleSystem.py
+++ b/pygnulib/GLModuleSystem.py
@@ -177,14 +177,11 @@ class GLModule(object):
path. GLModule can get all information about module, get its dependencies,
files, etc.'''
- section_label_regex = '(?:Description:|Comment:|Status:|Notice:|Applicability:|\
-Files:|Depends-on:|configure\\.ac-early:|configure\\.ac:|Makefile\\.am:|\
-Include:|Link:|License:|Maintainer:)'
-
section_label_pattern = \
re.compile('^(Description|Comment|Status|Notice|Applicability|'
+ 'Files|Depends-on|configure\\.ac-early|configure\\.ac|'
- + 'Makefile\\.am|Include|Link|License|Maintainer):$')
+ + 'Makefile\\.am|Include|Link|License|Maintainer):$',
+ re.M)
def __init__(self, config, path, patched=False):
'''GLModule.__init__(config, path[, patched]) -> GLModule
@@ -209,8 +206,20 @@ Include:|Link:|License:|Maintainer:)'
self.config = config
self.filesystem = GLFileSystem(self.config)
self.modulesystem = GLModuleSystem(self.config)
+ # Read the module description file into memory.
with codecs.open(path, 'rb', 'UTF-8') as file:
self.content = file.read().replace('\r\n', '\n')
+ # Dissect it into sections.
+ self.sections = dict()
+ last_section_label = None
+ last_section_start = 0
+ for match in GLModule.section_label_pattern.finditer(self.content):
+ if last_section_label != None:
+ self.sections[last_section_label] = self.content[last_section_start : match.start()]
+ last_section_label = match.group(1)
+ last_section_start = match.end() + 1
+ if last_section_label != None:
+ self.sections[last_section_label] = self.content[last_section_start:]
def __eq__(self, module):
'''x.__eq__(y) <==> x==y'''
@@ -378,117 +387,51 @@ Include:|Link:|License:|Maintainer:)'
'''GLModule.getDescription() -> str
Return description of the module.'''
- section = 'Description:'
- if 'description' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- pattern = '^%s[\t ]*(.*?)%s' % (section, GLModule.section_label_regex)
- pattern = re.compile(pattern, re.S | re.M)
- result = pattern.findall(self.content)
- if type(result) is list:
- if not result:
- result = ''
- else: # if result
- result = result[-1]
- result = result.strip()
- self.cache['description'] = result
- return self.cache['description']
+ return self.sections.get('Description', '')
def getComment(self):
'''GLModule.getComment() -> str
Return comment to module.'''
- section = 'Comment:'
- if 'comment' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- pattern = '^%s[\t ]*(.*?)%s' % (section, GLModule.section_label_regex)
- pattern = re.compile(pattern, re.S | re.M)
- result = pattern.findall(self.content)
- if type(result) is list:
- if not result:
- result = ''
- else: # if result
- result = result[-1]
- result = result.strip()
- self.cache['comment'] = result
- return self.cache['comment']
+ return self.sections.get('Comment', '')
def getStatus(self):
'''GLModule.getStatus() -> str
Return module status.'''
- section = 'Status:'
- if 'status' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = [ part.strip()
- for part in parts
- if part.strip() ]
- self.cache['status'] = list(result)
- return list(self.cache['status'])
+ return self.sections.get('Status', '')
+
+ def getStatuses(self):
+ '''GLModule.getStatuses() -> list
+
+ Return module status.'''
+ if 'statuses' not in self.cache:
+ snippet = self.getStatus()
+ result = [ line.strip()
+ for line in snippet.split('\n')
+ if line.strip() ]
+ self.cache['statuses'] = result
+ return self.cache['statuses']
def getNotice(self):
'''GLModule.getNotice() -> str
Return notice to module.'''
- section = 'Notice:'
- if 'notice' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = ''.join(parts)
- self.cache['notice'] = result
- return self.cache['notice']
+ return self.sections.get('Notice', '')
def getApplicability(self):
'''GLModule.getApplicability() -> str
Return applicability of module.'''
- section = 'Applicability:'
if 'applicability' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- parts = [ part.strip()
- for part in parts ]
- result = ''.join(parts)
- if not result.strip():
- if self.getName().endswith('-tests'):
+ result = self.sections.get('Applicability', '')
+ result = result.strip()
+ if not result:
+ # The default is 'main' or 'tests', depending on the module's name.
+ if self.isTests():
result = 'tests'
- else: # if not self.getName().endswith('-tests')
+ else:
result = 'main'
- result = result.strip()
self.cache['applicability'] = result
return self.cache['applicability']
@@ -497,115 +440,56 @@ Include:|Link:|License:|Maintainer:)'
Return list of files.
GLConfig: ac_version.'''
- ac_version = self.config['ac_version']
- section = 'Files:'
- result = list()
if 'files' not in self.cache:
- if section not in self.content:
- result = list()
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = [ part.strip()
- for part in parts
- if part.strip() ]
- result += [joinpath('m4', '00gnulib.m4')]
- result += [joinpath('m4', 'zzgnulib.m4')]
- result += [joinpath('m4', 'gnulib-common.m4')]
- self.cache['files'] = list(result)
- return list(self.cache['files'])
+ snippet = self.sections.get('Files', '')
+ result = [ line.strip()
+ for line in snippet.split('\n')
+ if line.strip() ]
+ result.append(joinpath('m4', '00gnulib.m4'))
+ result.append(joinpath('m4', 'zzgnulib.m4'))
+ result.append(joinpath('m4', 'gnulib-common.m4'))
+ self.cache['files'] = result
+ return self.cache['files']
def getDependencies(self):
'''GLModule.getDependencies() -> list
Return list of dependencies.
GLConfig: localpath.'''
- result = list()
- section = 'Depends-on:'
if 'dependencies' not in self.cache:
- if section not in self.content:
- depmodules = list()
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- modules = ''.join(parts)
- modules = [ line
- for line in modules.split('\n')
- if line.strip() ]
- modules = [ module
- for module in modules
- if not module.startswith('#') ]
- for line in modules:
- split = [ part
- for part in line.split(' ')
- if part.strip() ]
- if len(split) == 1:
- module = line.strip()
- condition = None
- else: # if len(split) != 1
- module = split[0]
- condition = split[1]
- result += [tuple([self.modulesystem.find(module), condition])]
+ snippet = self.sections.get('Depends-on', '')
+ modules = [ line.strip()
+ for line in snippet.split('\n')
+ if line.strip() ]
+ modules = [ module
+ for module in modules
+ if not module.startswith('#') ]
+ result = list()
+ for line in modules:
+ split = [ part
+ for part in line.split(' ')
+ if part.strip() ]
+ if len(split) == 1:
+ module = line.strip()
+ condition = None
+ else: # if len(split) != 1
+ module = split[0]
+ condition = split[1]
+ result += [tuple([self.modulesystem.find(module), condition])]
self.cache['dependencies'] = result
- return list(self.cache['dependencies'])
+ return self.cache['dependencies']
def getAutoconfSnippet_Early(self):
'''GLModule.getAutoconfSnippet_Early() -> str
Return autoconf-early snippet.'''
- section = 'configure.ac-early:'
- if 'autoconf-early' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = ''.join(parts)
- self.cache['autoconf-early'] = result
- return self.cache['autoconf-early']
+ return self.sections.get('configure.ac-early', '')
def getAutoconfSnippet(self):
'''GLModule.getAutoconfSnippet() -> str
Return autoconf snippet.'''
- section = 'configure.ac:'
- if 'autoconf' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = ''.join(parts)
- self.cache['autoconf'] = result
- return self.cache['autoconf']
+ return self.sections.get('configure.ac', '')
def getAutomakeSnippet(self):
'''getAutomakeSnippet() -> str
@@ -625,23 +509,7 @@ Include:|Link:|License:|Maintainer:)'
'''GLModule.getAutomakeSnippet_Conditional() -> str
Return conditional automake snippet.'''
- section = 'Makefile.am:'
- if 'makefile-conditional' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = ''.join(parts)
- self.cache['makefile-conditional'] = result
- return self.cache['makefile-conditional']
+ return self.sections.get('Makefile.am', '')
def getAutomakeSnippet_Unconditional(self):
'''GLModule.getAutomakeSnippet_Unconditional() -> str
@@ -718,24 +586,10 @@ Include:|Link:|License:|Maintainer:)'
'''GLModule.getInclude() -> str
Return include directive.'''
- section = 'Include:'
if 'include' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = ''.join(parts)
- result = result.strip()
- pattern = re.compile('^(["<].*[>"])', re.M)
- result = pattern.sub('#include \\1', result)
+ snippet = self.sections.get('Include', '')
+ pattern = re.compile('^(["<])', re.M)
+ result = pattern.sub('#include \\1', snippet)
self.cache['include'] = result
return self.cache['include']
@@ -743,64 +597,36 @@ Include:|Link:|License:|Maintainer:)'
'''GLModule.getLink() -> str
Return link directive.'''
- section = 'Link:'
- if 'link' not in self.cache:
- parts = list()
- if section in self.content:
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- parts = [ part.strip()
- for part in parts
- if part.strip() ]
- # result = ' '.join(parts)
- self.cache['link'] = parts
- return self.cache['link']
-
- def getLicense(self):
- '''GLModule.getLicense(self) -> str
-
- Get license and warn user if module lacks a license.'''
- if str(self) == 'parse-datetime':
- # This module is under a weaker license only for the purpose of some
- # users who hand-edit it and don't use gnulib-tool. For the regular
- # gnulib users they are under a stricter license.
- return 'GPL'
- else:
- license = self.getLicense_Raw()
- if not self.isTests():
- if not license:
- if self.config['errors']:
- raise GLError(18, str(self))
- else: # if not self.config['errors']
- sys.stderr.write('gnulib-tool: warning: module %s lacks a license\n' % str(self))
- if not license:
- license = 'GPL'
- return license
+ return self.sections.get('Link', '')
def getLicense_Raw(self):
'''GLModule.getLicense_Raw() -> str
Return module license.'''
- section = 'License:'
+ return self.sections.get('License', '')
+
+ def getLicense(self):
+ '''GLModule.getLicense(self) -> str
+
+ Get license and warn user if module lacks a license.'''
if 'license' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- pattern = '^%s[\t ]*(.*?)%s' % (section, GLModule.section_label_regex)
- pattern = re.compile(pattern, re.S | re.M)
- result = pattern.findall(self.content)
- if type(result) is list:
- if not result:
- result = ''
- else: # if result
- result = result[-1]
- result = result.strip()
+ result = None
+ if str(self) == 'parse-datetime':
+ # This module is under a weaker license only for the purpose of some
+ # users who hand-edit it and don't use gnulib-tool. For the regular
+ # gnulib users they are under a stricter license.
+ result = 'GPL'
+ else:
+ license = self.getLicense_Raw().strip()
+ if not self.isTests():
+ if not license:
+ if self.config['errors']:
+ raise GLError(18, str(self))
+ else: # if not self.config['errors']
+ sys.stderr.write('gnulib-tool: warning: module %s lacks a license\n' % str(self))
+ if not license:
+ license = 'GPL'
+ result = license
self.cache['license'] = result
return self.cache['license']
@@ -808,24 +634,7 @@ Include:|Link:|License:|Maintainer:)'
'''GLModule.getMaintainer() -> str
Return maintainer directive.'''
- section = 'Maintainer:'
- if 'maintainer' not in self.cache:
- if section not in self.content:
- result = ''
- else: # if section in self.content
- snippet = self.content.split(section)[-1]
- lines = [ '%s\n' % line
- for line in snippet.split('\n') ]
- parts = list()
- for line in lines:
- findflag = GLModule.section_label_pattern.findall(line)
- if findflag:
- break
- parts += [line]
- result = ''.join(parts)
- result = result.strip()
- self.cache['maintainer'] = result
- return self.cache['maintainer']
+ return self.sections.get('Maintainer', '')
#===============================================================================
@@ -1002,8 +811,8 @@ class GLModuleTable(object):
conditions += [None]
for depmodule in depmodules:
include = True
- status = depmodule.getStatus()
- for word in status:
+ statuses = depmodule.getStatuses()
+ for word in statuses:
if word == 'obsolete':
if not self.config.checkInclTestCategory(TESTS['obsolete']):
include = False