AI
Animation
CGI
Compression
Console
Database
Debug
Dialects
Dialogs
Editor
Email
Encryption
Extension
External Library
File
File Handling
Files
Financial
FTP
Game
Games
Graphics
GUI
HTML
HTTP
Internet
LDC
Markup
Math
Module
Network
Networking
None
Other - Net
Parse
Patch
Printing
Protocol
Rebol
Scheme
Scientific
SDK
Security
Shell
Sound
SQL
TCP
Testing
Text
Text Processing
UI
User Interface
Util
Utility
VID
Visualization
Web
Win API
X-File
XML
REBOL [
    File: %split.r
    Date: 6-May-2012
    Title: "Split.r"
    Author: "Gregg Irwin"
    Purpose: {
        Provide functions to split a series into pieces, according to 
        different criteria and needs.
    }
    library: [
        level: 'intermediate
        platform: 'all
        type: [module function dialect]
        domain: [parse dialects text text-processing]
        tested-under: [View 2.7.8 on Win7]
        license: 'MIT
    ]
]

use [map-each test split-tests] [

    ; R3-compatible map-each interface.
    ;
    ; This is local to the context for SPLIT, because it is not designed
    ; to be fully R3 MAP-EACH compatible. For that, you should look at
    ; Brian Hawley's %r2-forward.r.
    ;
    ; What happens if the result of the DO is unset!? For now, we'll
    ; ignore unset values. The example case being SPLIT, which uses
    ; MAP-EACH with an unset value for negative numeric vals used to
    ; skip in the series.
    map-each: func [
        'word 
        data [block!] 
        body [block!]
        /local tmp
    ] [
        collect compose/deep [
            repeat (word) data [
                set/any 'tmp do bind/copy body (to lit-word! word)
                if value? 'tmp [keep/only :tmp]
            ]
        ]
    ]

    ; R2 version.
    ;
    ; There are differences from the version in R3 which, itself, will likely
    ; need to be revisited due to changes in R3.
    split: func [
        [catch]
        {Split a series into pieces; fixed or variable size, fixed number, or at delimiters}
        series [series!] "The series to split"
        dlm [block! integer! char! bitset! any-string!] "Split size, delimiter(s), or rule(s)."
        /into {If dlm is an integer, split into n pieces, rather than pieces of length n.}
        /local size count mk1 mk2 res piece-size fill-val add-fill-val type
    ][
        ; This is here becaus using "to series", which should work, fails if the
        ; target type is paren!. If we ignore that case, all the "to type" bits
        ; can go away completely.
        type: type? series
        
        either all [block? dlm parse dlm [some integer!]] [
            map-each len dlm [
                either positive? len [
                    to type copy/part series series: skip series len
                ] [
                    series: skip series negate len
                    ()
                ]
            ]
        ] [
            size: dlm
            res: collect [
                parse/all series case [
                    all [integer? size into] [
                        if size < 1 [throw make error! compose  [script invalid-arg size]]
                        count: size - 1
                        ; Max 1 is to catch when size is larger than the series, giving us 0.
                        piece-size: max 1 round/down divide length? series size
                        [
                            count [copy series piece-size skip (keep/only to type series)]
                            copy series to end (keep/only to type series)
                        ]
                    ]
                    integer? dlm [
                        if size < 1 [throw make error! compose  [script invalid-arg size]]
                        [any [copy series 1 size skip (keep/only to type series)]]
                    ]
                    'else [
                        ; This is quite a bit different under R2, in order to stop 
                        ; at the end properly and collect the final value.
                        [
                            any [
                                mk1: [
                                    to dlm mk2: dlm (keep to type copy/part mk1 mk2)
                                    | to end mk2: (keep to type copy mk1) skip
                                ]
                            ]
                        ]
                    ]
                ]
            ]
            ;-- Special processing, to handle cases where the spec'd more items in
            ;   /into than the series contains (so we want to append empty items),
            ;   or where the dlm was a char/string/charset and it was the last char
            ;   (so we want to append an empty field that the above rule misses).
            fill-val: make type none
            add-fill-val: does [append/only res fill-val]
            case [
                all [integer? size  into] [
                    ; If the result is too short, i.e., less items than 'size, add
                    ; empty items to fill it to 'size.
                    ; We loop here, because insert/dup doesn't copy the value inserted.
                    if size > length? res [
                        loop (size - length? res) [add-fill-val]
                    ]
                ]
                ; integer? dlm [
                ; ]
                'else [ ; = any [bitset? dlm  any-string? dlm  char? dlm]
                    ; If the last thing in the series is a delimiter, there is an
                    ; implied empty field after it, which we add here.
                    case [
                        bitset? dlm [
                            ; ATTEMPT is here because LAST will return NONE for an 
                            ; empty series, and finding none in a bitest is not allowed.
                            if attempt [find dlm last series] [add-fill-val]
                        ]
                        ; These cases are now handled, under R2, by the parse rule, and
                        ; no longer require special handling.
                        ;char? dlm [
                        ;    if dlm = last series [add-fill-val]
                        ;]
                        ;string? dlm [
                        ;    if all [
                        ;        find series dlm
                        ;        empty? find/last/tail series dlm
                        ;    ] [add-fill-val]
                        ;]
                    ]
                ]
            ]
                    
            res
            
        ]
    ]

    test: func [block expected-result /local res err] [
        ;print ['TEST tab mold block mold expected-result]
        if error? set/any 'err try [
            res: do block
            ;print [mold/only :block newline tab mold res]
            if res <> expected-result [print [tab 'FAILED! tab 'expected mold expected-result]]
        ][
            print [mold/only :block newline "ERROR!" mold disarm err]
        ]
    ]

    split-tests: [    
        >> [split "1234567812345678" 4]     == ["1234" "5678" "1234" "5678"]
        
        >> [split "1234567812345678" 3]     == ["123" "456" "781" "234" "567" "8"]
        >> [split "1234567812345678" 5]     == ["12345" "67812" "34567" "8"]
        
        >> [split/into [1 2 3 4 5 6] 2]       == [[1 2 3] [4 5 6]]
        >> [split/into "1234567812345678" 2]  == ["12345678" "12345678"]
        >> [split/into "1234567812345678" 3]  == ["12345" "67812" "345678"]
        >> [split/into "1234567812345678" 5]  == ["123" "456" "781" "234" "5678"]
        
        ; Dlm longer than series
        >> [split/into "123" 6]             == ["1" "2" "3" "" "" ""] ;or ["1" "2" "3"]
        >> [split/into [1 2 3] 6]           == [[1] [2] [3] [] [] []] ;or [1 2 3]
        >> [split/into first [(1 2 3)] 6]   == [(1) (2) (3) () () () ] ;or [1 2 3]
        ;>> [split/into [1 2 3] 6]     == [[1] [2] [3] none none none] ;or [1 2 3]
        
        
        >> [split [1 2 3 4 5 6] [2 1 3]]                  == [[1 2] [3] [4 5 6]]
        >> [split "1234567812345678" [4 4 2 2 1 1 1 1]]   == ["1234" "5678" "12" "34" "5" "6" "7" "8"]
        >> [split first [(1 2 3 4 5 6 7 8 9)] 3]          == [(1 2 3) (4 5 6) (7 8 9)]
        >> [split #{0102030405060708090A} [4 3 1 2]]      == [#{01020304} #{050607} #{08} #{090A}]
        
        >> [split [1 2 3 4 5 6] [2 1]]      == [[1 2] [3]]
        
        >> [split [1 2 3 4 5 6] [2 1 3 5]]  == [[1 2] [3] [4 5 6] []]
        
        >> [split [1 2 3 4 5 6] [2 1 6]]    == [[1 2] [3] [4 5 6]]
        
        ; Old design for negative skip vals
        ;>> [split [1 2 3 4 5 6] [3 2 2 -2 2 -4 3]]    == [[1 2 3] [4 5] [6] [5 6] [3 4 5]]
        ; New design for negative skip vals
        >> [split [1 2 3 4 5 6] [2 -2 2]]   == [[1 2] [5 6]]
        
        >> [split "abc,de,fghi,jk" #","]            == ["abc" "de" "fghi" "jk"]
        >> [split "abc
de
fghi
jk"
] == ["abc" "de" "fghi" "jk"] >> [split "a.b.c" "."] == ["a" "b" "c"] >> [split "c c" " "] == ["c" "c"] >> [split "1,2,3" " "] == ["1,2,3"] >> [split "1,2,3" ","] == ["1" "2" "3"] >> [split "1,2,3," ","] == ["1" "2" "3" ""] >> [split "1,2,3," #","] == ["1" "2" "3" ""] >> [split "1..2..3" ".."] == ["1" "2" "3"] >> [split "1..2..3.." ".."] == ["1" "2" "3" ""] ; Doesn't work under R2, because PARSE doesn't support [to charset!] ; or [to block!]. ; Need to look at Ladislav's parse enhancements to see about that. ;>> [split "1,2,3," charset ",."] == ["1" "2" "3" ""] ;>> [split "1.2,3." charset ",."] == ["1" "2" "3" ""] ; This doesn't work under R2 either. ;>> [split "-a-a" ["a"]] == ["-" "-"] ;>> [split "-a-a'" ["a"]] == ["-" "-" "'"] ;>> [split "abc^M^Jde^Mfghi^Jjk" [crlf | #"^M" | newline]] == ["abc" "de" "fghi" "jk"] ;>> [split "abc de fghi jk" [some #" "]] == ["abc" "de" "fghi" "jk"] ] run-split-tests: has [chevron =test =expected-result n] [ chevron: (to lit-word! ">>") n: 0 prin "Tests parsed successfully: " print parse split-tests [ some [ chevron set =test block! '== set =expected-result block! (n: n + 1 test =test =expected-result) ] ] print [n "tests run"] ] ]
Copyright © 2018 Rebol Software Foundation