python - Sending large amount of data from one system to another -
actually trying send trained data system 1 system 2, can knn classification in system 2. find difficult sent trained data large. there way send bulky data 1 system through socket.
system 1
import sys import time import pickle sklearn.datasets import load_files sklearn.neighbors import kneighborsclassifier sklearn.feature_extraction.text import countvectorizer sklearn.feature_extraction.text import tfidftransformer socket import socket, gethostbyname, af_inet, sock_dgram port_number = 5000 hostname = gethostbyname('0.0.0.0') mysocket = socket( af_inet, sock_dgram ) mysocket.bind( (hostname, port_number) ) print ("test server listening on port {0}".format(port_number)) (data,addr) = mysocket.recvfrom(15) print data mysocket.sendto("connected...", addr) (data,addr) = mysocket.recvfrom(20000000) msg=pickle.loads(data) twenty_train=msg mysocket.sendto("one", addr) (data,addr) = mysocket.recvfrom(300000000) ms=pickle.loads(data) x_train_tfidf=ms knn=kneighborsclassifier(n_neighbors=3) clf = knn.fit(x_train_tfidf, twenty_train) f=open(sys.argv[1],'r') g=f.read() ans = g.strip('\n') if ans.endswith(' '): ans = ans.rstrip(' ') docs_new = [ans] mysocket.sendto(ans, addr) (data,addr) = mysocket.recvfrom(1000000) msg2=pickle.loads(data) x_new_tfidf=msg2 mysocket.sendto("two", addr) predicted = clf.predict(x_new_tfidf) (data,addr) = mysocket.recvfrom(100000) msg3=pickle.loads(data) names = msg3 doc, category in zip(docs_new, predicted): print('%r => %s' % (doc, names[category])) sys.exit()
system 2
import sys import pickle import time sklearn.feature_extraction.text import countvectorizer sklearn.feature_extraction.text import tfidftransformer sklearn.datasets import load_files sklearn.neighbors import kneighborsclassifier socket import socket, af_inet, sock_dgram server_ip = '10.0.8.132' port_number = 5000 print ("test client sending packets ip {0}, via port{1}\n".format(server_ip, port_number)) sock = socket( af_inet, sock_dgram ) sock.connect((server_ip,port_number)) sock.send("connecting...") (msg,addr) = sock.recvfrom(15) print(msg) print "the categories are:" categories = ['terrorism','jellikettu'] print (categories) ans='dataset' ans = ans.strip('\n') if ans.endswith(' '): ans = ans.rstrip(' ') twenty_train = load_files(ans, description=none, categories=categories, load_content=true, shuffle=true, encoding='utf-8', decode_error='ignore', random_state=42) count_vect = countvectorizer() x_train_counts = count_vect.fit_transform(twenty_train.data) sock.sendto(pickle.dumps(twenty_train.target),addr) (ms,addr) = sock.recvfrom(2000000) tfidf_transformer = tfidftransformer() x_train_tfidf = tfidf_transformer.fit_transform(x_train_counts) sock.sendto(pickle.dumps(x_train_tfidf),addr) (ans,addr) = sock.recvfrom(2000) docs_new=[ans] x_new_counts = count_vect.transform(docs_new) x_new_tfidf = tfidf_transformer.transform(x_new_counts) sock.sendto(pickle.dumps(x_new_tfidf),addr) (m,addr) = sock.recvfrom(2000000) sock.sendto(pickle.dumps(twenty_train.target_names),addr) print >>sys.stderr, 'closing socket' sock.close() sys.exit()
error
traceback (most recent call last): file "cl.py", line 43, in <module> sock.sendto(pickle.dumps(x_train_tfidf),addr) socket.error: [errno 90] message long
yes. should use sock_stream
(tcp) socket send large data. using sock_dgram
(udp) means each message stands alone , must fit within maximum size of udp datagram (just under 64k). if use tcp session, there no limit size can transmit.
you will need frame individual messages since tcp doesn't maintain message boundaries. that's typically done sending kind of header in front of message receiver knows how read before decoding. in case, want ensure receive entire data block before calling pickle.loads
. header simple single 32-bit integer containing length of remaining message. (probably best put in binary know how big it [the length] is. can struct
module's pack
, unpack
.)
an alternative create brand new connection every data block sent: i.e. connect, send data, close. way, receiver can receive until gets eof, @ point knows has entire data block.
Comments
Post a Comment