REBOL [
date: 17/01/2011
author: "nicolas"
file: %download-page.r
title: "Download page"
version: 0.2.1
purpose: {
Download in a tmp directory all elements of a URL :
- CSS scripts
- Javascript scripts
- Images
Return the page size.
}
comments: {
This function do not handle cache like a webbrowser and parallel downloading of resources.
Must simulate parallel programming : in further release make it work with 3 tiny servers
- one for images downloading
- one for css script downloading
- one for javascript downloading
And manage cache.
Do not download others resources like Adobe Flash content.
}
category: [internet]
library: [
level: 'beginner
platform: 'all
type: [internet html]
domain: [internet html]
tested-under: [win]
support: none
license: 'public-domain
see-also: none
]
history [
0.2.1 17/01/2011 "Secure downloading with attempt"
0.2.0 15/01/2011 "Make a release on rebol.org"
0.1.0 05/01/2011 "Creation of the script"
]
usage: {
>> url-page: to-url ask "URL ? "
>> print ["Page size" download-page/temp-dir url-page %page1 "kb."]
}
]
download-page: funct [url [url!] /temp-dir dir [file!]] [
either temp-dir [tmp: copy dir][tmp: %tmp]
kb?: funct [d] [
t: type? d
either any [t == file! t == url!] [
i: info? d
round/to i/size / 1024 .1
][
round/to (length? d) / 1024 .1
]
]
remove-comments: funct [page] [
foreach line page [
t: mold first page
if not none? find t "