1 # -*- encoding: utf-8 -*-
6 from __future__ import with_statement
8 import urllib, urlparse
14 from cStringIO import StringIO
15 from gettext import gettext as _
16 from itertools import *
17 from functools import *
18 from subprocess import *
21 from amara.xslt import transform
22 from amara.xpath.util import simplify
23 from amara.bindery import html
24 from amara.lib.util import *
26 from akara.services import simple_service, response
28 VAR_PAT = re.compile('VARIABLE\s+LABELS\s+(((\w+)\s+"([^"]+)"\s*)+)\.')
29 VAR_DEF_PAT = re.compile('(\w+)\s+"([^"]+)"')
31 VALUE_PAT = re.compile('VALUE\s+LABELS\s+((/(\w+)\s+(\'(\w+)\'\s+"([^"]+)"\s*)+)+)\.')
32 VALUE_DEF_SET_PAT = re.compile('/(\w+)\s+((\'(\w+)\'\s+"([^"]+)"\s*)+)')
33 VALUE_DEF_PAT = re.compile('\'(\w+)\'\s+"([^"]+)"')
35 VALUE_SET_TYPE = 'value_set'
36 VARIABLE_LABELS_TYPE = 'variable_labels'
37 VALUE_LABELS_TYPE = 'value_labels'
39 #R_SCRIPT = '''library(foreign)
40 #mydata <- read.spss(file='%s')
44 R_SCRIPT = '''library(Hmisc)
45 mydata <- spss.get(file='%s')
50 R_FILE_CMD = AKARA_MODULE_CONFIG.get('r_command', 'r')
52 #Not running from Akara
55 POR_REQUIRED = _("The 'POR' POST parameter is mandatory.")
57 SERVICE_ID = 'http://purl.org/akara/services/builtin/spss.json'
58 @simple_service('POST', SERVICE_ID, 'spss.json', 'application/json')
59 def spss2json(body, ctype, **params):
61 Uses GNU R to convert SPSS to JSON
62 Optionally tries to guess long labels from an original .SPS file
64 Requires POST body of multipart/form-data
67 curl -F "POR=@foo.por" http://localhost:8880/spss.json
68 curl -F "POR=@foo.por" -F "SPSS=@foo.sps" http://localhost:8880/spss.json
70 #curl --request POST -F "POR=@lat506.por" -F "SPSS=@LAT506.SPS" http://labs.zepheira.com:8880/spss.json
73 # * [[http://wiki.math.yorku.ca/index.php/R:_Data_conversion_from_SPSS|R: Data conversion from SPSS]]
78 form = cgi.FieldStorage(fp=body, environ=WSGI_ENVIRON)
80 # print >> sys.stderr, (k, form[k][:100])
81 por = form.getvalue('POR')
82 assert_not_equal(por, None, msg=POR_REQUIRED)
83 spss = form.getvalue('SPSS')
85 (items, varlabels, valuelabels) = parse_spss(por, spss)
87 for count, item in enumerate(items):
88 #print >> sys.stderr, row
89 item['id'] = item['label'] = '_' + str(count)
90 item['type'] = VALUE_SET_TYPE
92 return simplejson.dumps({'items': items, VARIABLE_LABELS_TYPE: varlabels, VALUE_LABELS_TYPE: valuelabels}, indent=4)
95 def parse_spss(spss_por, spss_syntax=None):
97 Uses GNU R to convert SPSS to a simple Python data structure
98 Optionally tries to guess long labels from an original .SPS file
103 matchinfo = VAR_PAT.search(spss_syntax)
105 #print >> sys.stderr, matchinfo.groups
106 defns = matchinfo.group(1)
107 for defn in VAR_DEF_PAT.finditer(defns):
108 varlabels[defn.group(1)] = defn.group(2)
110 matchinfo = VALUE_PAT.search(spss_syntax)
111 defsets = matchinfo.group(1)
112 for defset in VALUE_DEF_SET_PAT.finditer(defsets):
114 for defn in VALUE_DEF_PAT.finditer(defset.group(2)):
115 valuelabelset[defn.group(1)] = defn.group(2)
116 valuelabels[defset.group(1)] = valuelabelset
118 #print >> sys.stderr, varlabels
119 #print >> sys.stderr, valuelabels
121 #print >> sys.stderr, por[:100]
122 #print >> sys.stderr, spss[:100]
123 temp = tempfile.mkstemp(suffix=".por")
124 os.write(temp[0], spss_por)
127 process = Popen(cmdline, stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
129 csvdata, perr = process.communicate(input=R_SCRIPT%temp[1])
133 print >> sys.stderr, R_SCRIPT%temp[1]
134 print >> sys.stderr, perr
136 raise ValueError('Empty output from the command line. Probably a failure. Command line: "%s"'%cmdline)
139 if k in valuelabels and v in valuelabels[k]:
140 return valuelabels[k][v]
144 r_reader = csv.DictReader(csvdata.splitlines(), delimiter=';')
146 dict(((k, value(k, v.strip())) for (k, v) in row.iteritems()))
150 return (rows, varlabels, valuelabels)