AI
Animation
CGI
Compression
Console
Database
Debug
Dialects
Dialogs
Editor
Email
Encryption
Extension
External Library
File
File Handling
Files
Financial
FTP
Game
Games
Graphics
GUI
HTML
HTTP
Internet
LDC
Markup
Math
Module
Network
Networking
None
Other - Net
Parse
Patch
Printing
Protocol
Rebol
Scheme
Scientific
SDK
Security
Shell
Sound
SQL
TCP
Testing
Text
Text Processing
UI
User Interface
Util
Utility
VID
Visualization
Web
Win API
X-File
XML
REBOL [
	Title: "Extract URLs"
	File: %extract-urls.r
	Version: 1.0.0
	Home: http://www.ross-gill.com/
	Date: 29-Nov-2009
	Purpose: "To identify and extract URIs from plain text"
	Author: "Christopher Ross-Gill"

	Library: [
		level: 'intermediate
		platform: 'all
		type: [function module]
		domain: [markup parse text text-processing web]
		tested-under: [view 2.7.6.2.4 view 2.100.95.2.5]
		support: none
		license: 'cc-by-sa
		see-also: http://daringfireball.net/2009/11/liberal_regex_for_matching_urls
	]
]

extract-urls: use [out rule word uri space punct chars][
	word: charset [#"_" #"0" - #"9" #"A" - #"Z" #"a" - #"z"] ; per regex
	space: charset "^/^- ()<>"
	punct: charset "!'#$%&`*+,-./:;=?@[/]^^{|}~" ; regex 'punct without ()<>
	chars: complement union space punct

	uri: [
		[some [word | "-"] ":/" opt "/" | "www."]
		some [opt [some punct] some chars opt "/"]
		opt [any punct "(" some word ")"]
	]

	rule: use [emit-link emit-text link text mk ex][
		emit-link: [(append out to-url link)]
		emit-text: [(unless mk = ex [append out copy/part mk ex])]

		[
			mk: any [
				ex: copy link uri emit-text emit-link mk:
				| some [chars | punct] some space ; non-uri words, line not required
				| skip
			]
			ex: emit-text
		]
	]

	func [
		"Separates URLs from plain text"
		txt [string!] "Text to be "
	][
		out: copy []
		if parse/all txt rule [out]
	]
]

            
            
        
Copyright © 2018 Rebol Software Foundation