IMPORT t.bas
xml = t::LoadString("sample.xml")
SPLITA xml BY ">" TO a
FOR x = 0 TO UBOUND(a)
IF LEFT(TRIM(a[x]),1) = "<" THEN GOTO IT
p = INSTR(a[x],"<")
PRINT MID(a[x],p+2) & " = " & LEFT(a[x],p-1),"\n"
IT:
NEXT
LN = 62981
PTYP = RESI
LAG = 27022
ST = CT
LP = 599950.00
SP = 0.00
OLP = 599950.00
HSN = 4538
STR = 23rd
SSUF = Ave
DRS = SW
CIT = Seattle
STA = WA
ZIP = 98126
BR = 5.00
BTH = 3.50
ASF = 3650
LSF = 4800
UD = 2010-04-24 14:59:25
AR = 140
DSRNUM = 7215
LDR = 2010-04-24 00:00:00
LD = 2010-04-24 00:00:00
CLO = 1800-01-01 00:00:00
YBT = 2010
LO = 1401
TAX = 1773600264
MAP = 594
GRDX = G
GRDY = 4
SAG = 0
SO = 0
NIA = Y
MR = Third of three New Contemporary Homes w/fantastic open floor plans and great level and fenced backyards.These homes have wonderful tall ceilings,designer paint,fully wrapped windows,solid core/glass int doors and top of the line strand Bamboo flrs.The kitchen is an entertainers dream w/an enormous open eating bar,honed granite counters,custom wood cabinets,top of the line stainless steel appls and french doors to the ent backyard. Quality and Designer features from top to Bottom, a must see!!
LONG = -122.362210
LAT = 47.561975
PDR = 1800-01-01 00:00:00
CLA = 0
SHOADR = Y
DD = From Delridge Way head east on Oregon which becomes 23rd.
AVDT = 1800-01-01 00:00:00
INDT = 1800-01-01 00:00:00
COU = King
CDOM = 0
CTDT = 2010-04-24 00:00:00
SCA = 0
SCO = 0
SD = SEA
SDT = 2010-04-24 00:00:00
MAPBOOK = THOM
DSR = Pigeon Point
QBT = 0
COLO = 0
PIC = 1
ARC = K
BDL = 2
BDM = 0
BDU = 3
BLD = JDR Development Inc
BLK = 14
BUS = Y
DNO = L
DRM = M
ENT = M
F17 = A
FAM = M
FBG = 0
FBL = 1
FBM = 0
FBT = 3
FBU = 2
FP = 1
FPL = 0
FPM = 1
FPU = 0
GAR = 2
HBG = 0
HBL = 0
HBM = 1
HBT = 1
HBU = 0
HOD = 0
KES = M
LRM = M
LT = 16
MBD = U
MOR = 0
NC = U
POC = SEA
PRJ = Cottage Grove # 3
PTO = Y
TQBT = 0
RRM = L
SAP = 0
SFF = 0
SFS = Per Builder Plans
SFU = 0
SML = Y
SNR = N
STY = 18
SWC = SEA
TBG = 0
TBL = 0
TBM = 0
TBU = 0
TX = 0
TXY = 0
UTR = U
WAC = SEA
APS = A|D|E|F|G
BDI = E
BSM = A|B
ENS = B
EXT = J|E
FEA = A|D|F|G|J|M|P|T
FLS = J|A|G
FND = E|F
GR = C
HTC = B
LDE = H|J
LTV = E|F
POS = A
RF = C
SIT = G|H|M|Y|N
SWR = A
TRM = B|C
VEW = D|L
WAS = D
ZJD = A
ZNC = SF 5000
ProhibitBLOG = Y
AllowAVM = Y
PARQ = N
BREO = N
Latitude = 32.9659843
Longitude = 96.74525
AllocationFactor = 0.002192
FipsCode = 48
PlaceName = RICHARDSON
StateCode = TX
Day = Sunday, June 07, 2009
WeatherImage = url
MaxTemperatureF = 94
MinTemperatureF = 74
MaxTemperatureC = 34
MinTemperatureC = 23
Day = Monday, June 08, 2009
WeatherImage = url
MaxTemperatureF = 94
MinTemperatureF = 74
MaxTemperatureC = 34
MinTemperatureC = 23
Day = Tuesday, June 09, 2009
WeatherImage = url
MaxTemperatureF = 95
MinTemperatureF = 76
MaxTemperatureC = 35
MinTemperatureC = 24
Day = Wednesday, June 10, 2009
WeatherImage = url
MaxTemperatureF = 93
MinTemperatureF = 74
MaxTemperatureC = 34
MinTemperatureC = 23
Day = Thursday, June 11, 2009
WeatherImage = url
MaxTemperatureF = 93
MinTemperatureF = 73
MaxTemperatureC = 34
MinTemperatureC = 23
Day = Friday, June 12, 2009
WeatherImage = url
MaxTemperatureF = 93
MinTemperatureF = 73
MaxTemperatureC = 34
MinTemperatureC = 23
Day = Saturday, June 13, 2009
WeatherImage = url
MaxTemperatureF = 94
MinTemperatureF = 73
MaxTemperatureC = 34
MinTemperatureC = 23
$execon
dim s$*8192,i,dest$[8192]
s$ = LoadFile$("residential.xml")
for i = 2 to split(dest$,s$,lf$)-3
dest$[i]= left$(dest$[i],instr(dest$[i],"</")-1)
replace ">" with " = " in dest$[i]
remove "<" from dest$[i]
if len(dest$[i]) then print trim$(dest$[i])
next
$execon "-lmxml"
$nomain
$HEADER
#include <mxml.h>
typedef mxml_node_t* XMLROOT;
typedef XMLROOT XMLNODE;
typedef mxml_index_t* XMLINDEX;
$HEADER
FUNCTION main(argc as INTEGER, argv as PCHAR PTR) as INTEGER
dim tree as XMLROOT, node as XMLNODE, sub_node as XMLNODE,ind as XMLINDEX
dim category$, value$
OPEN "residential.xml" FOR INPUT AS xmlFile
tree = mxmlLoadFile(NULL, xmlFile, MXML_OPAQUE_CALLBACK)
CLOSE xmlFile
ind = mxmlIndexNew(tree, NULL,NULL)
mxmlIndexReset(ind)
for integer cnt = 0 to ind->num_nodes
node = mxmlIndexEnum(ind)
if node = NULL then iterate
sub_node = mxmlWalkNext(node, tree,MXML_DESCEND_FIRST)
category$ = trim$(node->value.opaque$)
value$ = trim$(sub_node->value.opaque$)
if value$ != NUL$ then print category$;" = ";value$
next
mxmlIndexDelete(ind)
mxmlDelete(tree)
END FUNCTION
APS = A|D|E|F|G
AR = 140
ARC = K
ASF = 3650
AVDT = 1800-01-01 00:00:00
AllowAVM = Y
BDI = E
BDL = 2
BDM = 0
BDU = 3
BLD = JDR Development Inc
BLK = 14
BR = 5.00
BREO = N
BSM = A|B
BTH = 3.50
BUS = Y
CDOM = 0
CIT = Seattle
CLA = 0
CLO = 1800-01-01 00:00:00
COLO = 0
COU = King
CTDT = 2010-04-24 00:00:00
DD = From Delridge Way head east on Oregon which becomes 23rd.
DNO = L
DRM = M
DRS = SW
DSR = Pigeon Point
DSRNUM = 7215
ENS = B
ENT = M
EXT = J|E
F17 = A
FAM = M
FBG = 0
FBL = 1
FBM = 0
FBT = 3
FBU = 2
FEA = A|D|F|G|J|M|P|T
FLS = J|A|G
FND = E|F
FP = 1
FPL = 0
FPM = 1
FPU = 0
GAR = 2
GR = C
GRDX = G
GRDY = 4
HBG = 0
HBL = 0
HBM = 1
HBT = 1
HBU = 0
HOD = 0
HSN = 4538
HTC = B
INDT = 1800-01-01 00:00:00
KES = M
LAG = 27022
LAT = 47.561975
LD = 2010-04-24 00:00:00
LDE = H|J
LDR = 2010-04-24 00:00:00
LN = 62981
LO = 1401
LONG = -122.362210
LP = 599950.00
LRM = M
LSF = 4800
LT = 16
LTV = E|F
MAP = 594
MAPBOOK = THOM
MBD = U
MOR = 0
MR = Third of three New Contemporary Homes w/fantastic open floor plans and great level and fenced backyards.These homes have wonderful tall ceilings,designer paint,fully wrapped windows,solid core/glass int doors and top of the line strand Bamboo flrs.The kitchen is an entertainers dream w/an enormous open eating bar,honed granite counters,custom wood cabinets,top of the line stainless steel appls and french doors to the ent backyard. Quality and Designer features from top to Bottom, a must see!!
NC = U
NIA = Y
OLP = 599950.00
PARQ = N
PDR = 1800-01-01 00:00:00
PIC = 1
POC = SEA
POS = A
PRJ = Cottage Grove # 3
PTO = Y
PTYP = RESI
ProhibitBLOG = Y
QBT = 0
RF = C
RRM = L
SAG = 0
SAP = 0
SCA = 0
SCO = 0
SD = SEA
SDT = 2010-04-24 00:00:00
SFF = 0
SFS = Per Builder Plans
SFU = 0
SHOADR = Y
SIT = G|H|M|Y|N
SML = Y
SNR = N
SO = 0
SP = 0.00
SSUF = Ave
ST = CT
STA = WA
STR = 23rd
STY = 18
SWC = SEA
SWR = A
TAX = 1773600264
TBG = 0
TBL = 0
TBM = 0
TBU = 0
TQBT = 0
TRM = B|C
TX = 0
TXY = 0
UD = 2010-04-24 14:59:25
UTR = U
VEW = D|L
WAC = SEA
WAS = D
YBT = 2010
ZIP = 98126
ZJD = A
ZNC = SF 5000
I have a HUGE need for a reliable XML parser (data extractor) for Linux. I remember you created a ScriptBasic extension module for mini-XML to replace the GNOME libxml2 module that seemed buggy. Did you release the source to your SB version of mini-XML? I would like to give it a try under Ubuntu 32 if all possible.
I noticed your first attempt at a brute force XML parser but the Achilles Heel of that approach with BCX is that you have to know how big to DIM your work array. Under SB, array allocation is dynamic and the array that SPLITA created for the 10 MB XML I tried my tinyXML parser on must have over a million elements. As you say, I need a bit more control than just a utility that acts as a filter stripping XML tags top down.
you can utilize the dynamic strings
Or, you could always get the file size, and redimension a C string.....
$execon
CONST FName = "residential.xml"
dim s as PCHAR, i, dest$[MAX_PATH]
redim s * LOF(FName)+1
s$ = LoadFile$(FName)
for i = 2 to split(dest$,s$,lf$)-3
dest$[i]= left$(dest$[i],instr(dest$[i],"</")-1)
replace ">" with " = " in dest$[i]
remove "<" from dest$[i]
if len(dest$[i]) then print trim$(dest$[i])
next
free(s)