# Parse an L<> formatting code in POD text. # # This module implements parsing of the text of an L<> formatting code as # defined in perlpodspec. It should be suitable for any POD formatter. It # exports only one function, parselink(), which returns the five-item parse # defined in perlpodspec. # # SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl ############################################################################## # Modules and declarations ############################################################################## package Pod::ParseLink; use 5.010; use strict; use warnings; use Exporter; our @ISA = qw(Exporter); our @EXPORT = qw(parselink); our $VERSION = '5.01'; ############################################################################## # Implementation ############################################################################## # Parse the name and section portion of a link into a name and section. sub _parse_section { my ($link) = @_; $link =~ s/^\s+//; $link =~ s/\s+$//; # If the whole link is enclosed in quotes, interpret it all as a section # even if it contains a slash. return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); # Split into page and section on slash, and then clean up quoting in the # section. If there is no section and the name contains spaces, also # guess that it's an old section link. my ($page, $section) = split (/\s*\/\s*/, $link, 2); $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; if ($page && $page =~ / / && !defined ($section)) { $section = $page; $page = undef; } else { $page = undef unless $page; $section = undef unless $section; } return ($page, $section); } # Infer link text from the page and section. sub _infer_text { my ($page, $section) = @_; my $inferred; if ($page && !$section) { $inferred = $page; } elsif (!$page && $section) { $inferred = '"' . $section . '"'; } elsif ($page && $section) { $inferred = '"' . $section . '" in ' . $page; } return $inferred; } # Given the contents of an L<> formatting code, parse it into the link text, # the possibly inferred link text, the name or URL, the section, and the type # of link (pod, man, or url). sub parselink { my ($link) = @_; $link =~ s/\s+/ /g; my $text; if ($link =~ /\|/) { ($text, $link) = split (/\|/, $link, 2); } if ($link =~ /\A\w+:[^:\s]\S*\Z/) { my $inferred; if (defined ($text) && length ($text) > 0) { return ($text, $text, $link, undef, 'url'); } else { return ($text, $link, $link, undef, 'url'); } } else { my ($name, $section) = _parse_section ($link); my $inferred; if (defined ($text) && length ($text) > 0) { $inferred = $text; } else { $inferred = _infer_text ($name, $section); } my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; return ($text, $inferred, $name, $section, $type); } } ############################################################################## # Module return value and documentation ############################################################################## # Ensure we evaluate to true. 1; __END__ =for stopwords markup Allbery URL =head1 NAME Pod::ParseLink - Parse an LEE formatting code in POD text =head1 SYNOPSIS use Pod::ParseLink; my $link = get_link(); my ($text, $inferred, $name, $section, $type) = parselink($link); =head1 DESCRIPTION This module only provides a single function, parselink(), which takes the text of an LEE formatting code and parses it. It returns the anchor text for the link (if any was given), the anchor text possibly inferred from the name and section, the name or URL, the section if any, and the type of link. The type will be one of C, C, or C, indicating a URL, a link to a POD page, or a link to a Unix manual page. Parsing is implemented per L. For backward compatibility, links where there is no section and name contains spaces, or links where the entirety of the link (except for the anchor text if given) is enclosed in double-quotes are interpreted as links to a section (LE/sectionE). The inferred anchor text is implemented per L: L => L L => L<"section"|/section> L => L<"section" in name|name/section> The name may contain embedded EEE and ZEE formatting codes, and the section, anchor text, and inferred anchor text may contain any formatting codes. Any double quotes around the section are removed as part of the parsing, as is any leading or trailing whitespace. If the text of the LEE escape is entirely enclosed in double quotes, it's interpreted as a link to a section for backward compatibility. No attempt is made to resolve formatting codes. This must be done after calling parselink() (since EEE formatting codes can be used to escape characters that would otherwise be significant to the parser and resolving them before parsing would result in an incorrect parse of a formatting code like: LbarEslash> which should be interpreted as a link to the C POD page and not as a link to the C section of the C POD page with an anchor text of C. Note that not only the anchor text will need to have formatting codes expanded, but so will the target of the link (to deal with EEE and ZEE formatting codes), and special handling of the section may be necessary depending on whether the translator wants to consider markup in sections to be significant when resolving links. See L for more information. =head1 AUTHOR Russ Allbery =head1 COPYRIGHT AND LICENSE Copyright 2001, 2008, 2009, 2014, 2018-2019, 2022 Russ Allbery This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO L The current version of this module is always available from its web site at L. =cut # Local Variables: # copyright-at-end-flag: t # End: