$ls -l `which sh`
/bin/sh -> dash
$sudo dpkg-reconfigure dash #Select "no" when you're asked
[...]
$ls -l `which sh`
/bin/sh -> bash
reference:
https://stackoverflow.com/questions/13702425/source-command-not-found-in-sh-shell
2017年7月31日 星期一
caffe
圖像轉DB
convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME
creat_filelist.sh
# /usr/bin/env sh
DATA=examples/images
echo "Create train.txt..."
rm -rf $DATA/train.txt
find $DATA -name *cat.jpg | cut -d '/' -f3 | sed "s/$/ 1/">>$DATA/train.txt
find $DATA -name *bike.jpg | cut -d '/' -f3 | sed "s/$/ 2/">>$DATA/tmp.txt
cat $DATA/tmp.txt>>$DATA/train.txt
rm -rf $DATA/tmp.txt
echo "Done.."
create_lmdb.sh
#!/usr/bin/en sh
DATA=examples/images
rm -rf $DATA/img_train_lmdb
build/tools/convert_imageset --shuffle \
--resize_height=256 --resize_width=256 \
/home/xxx/caffe/examples/images/ $DATA/train.txt $DATA/img_train_lmdb
計算mean值/opt/caffe/build/tools/compute_image_mean ./my_data/img_train_lmdb ./my_caffe/my_mean.binaryproto
conver_mean.py
#!/usr/bin/env python
import numpy as np
import sys,caffe
if len(sys.argv)!=3:
print "Usage: python convert_mean.py mean.binaryproto mean.npy"
sys.exit()
blob = caffe.proto.caffe_pb2.BlobProto()
bin_mean = open( sys.argv[1] , 'rb' ).read()
blob.ParseFromString(bin_mean)
arr = np.array( caffe.io.blobproto_to_array(blob) )
npy_mean = arr[0]
np.save( sys.argv[2] , npy_mean )
2017年7月27日 星期四
pyodbc connect to sql server
Install:
https://github.com/mkleehammer/pyodbc/wiki/Install
code:
import pyodbc
#connect to db
conn = pyodbc.connect(
r'DRIVER={ODBC Driver 13 for SQL Server};'
r'SERVER=127.0.0.1;'
r'DATABASE=DB_table;'
r'UID=yasam;'
r'PWD=password'
)
cursor = conn.cursor()
sqlInsert="INSERT INTO [dbo].[test_table](RecordID,Model,SubmitDate,CountryCode,Score,Comment) VALUES "
for i, d in enumerate(df):
print(i)
#truncate to 1024 for db size
if len(d[5])>1024:
d[5]=d[5][:1024]
if len(d[1])>32:
d[2]=d[1][:32]
d[5]=d[5].replace("'","''") # Comment, replace ' for sql
d[2]=d[2].replace("'","''") #Model
d[4]=str(d[4]) # float to str
temp="("+",".join(["N'"+dd+"'" for dd in d])+")" #N for encoding
tList.append(temp)
if i == len(df)-1:
text=','.join(tList)
cursor.execute(sqlInsert+text) #last insert
elif i % 10 == 9:
text=','.join(tList)
print(text)
cursor.execute(sqlInsert+text) #batch insert
tList=[]
temp=''
conn.commit()
https://github.com/mkleehammer/pyodbc/wiki/Install
code:
import pyodbc
#connect to db
conn = pyodbc.connect(
r'DRIVER={ODBC Driver 13 for SQL Server};'
r'SERVER=127.0.0.1;'
r'DATABASE=DB_table;'
r'UID=yasam;'
r'PWD=password'
)
cursor = conn.cursor()
sqlInsert="INSERT INTO [dbo].[test_table](RecordID,Model,SubmitDate,CountryCode,Score,Comment) VALUES "
for i, d in enumerate(df):
print(i)
#truncate to 1024 for db size
if len(d[5])>1024:
d[5]=d[5][:1024]
if len(d[1])>32:
d[2]=d[1][:32]
d[5]=d[5].replace("'","''") # Comment, replace ' for sql
d[2]=d[2].replace("'","''") #Model
d[4]=str(d[4]) # float to str
temp="("+",".join(["N'"+dd+"'" for dd in d])+")" #N for encoding
tList.append(temp)
if i == len(df)-1:
text=','.join(tList)
cursor.execute(sqlInsert+text) #last insert
elif i % 10 == 9:
text=','.join(tList)
print(text)
cursor.execute(sqlInsert+text) #batch insert
tList=[]
temp=''
conn.commit()
reference:
pyodbc 用法
https://my.oschina.net/zhengyijie/blog/35587
Inserting multiple rows in a single SQL query
https://stackoverflow.com/questions/452859/inserting-multiple-rows-in-a-single-sql-query
Pyodbc query string quote escaping
使用兩個''避免或用?方式
unicode 問題(沒遇到)
http://blog.csdn.net/samed/article/details/50539742
2017年7月13日 星期四
python3 gensim id2token not fund
rspList=sorted(glob.glob('./data/*'))
df=[]
for rsp in rspList:
data=pd.read_csv(rsp)
df.append(data)
df=pd.concat(df)
stoplist= set('i am you are he she is a for of the and to in'.split())
sents=df['translated_feedback'][df['translated_feedback']!='\\N'] #remove no response
texts=[[word for word in sent.translate(trans_table).lower().split()
if word not in stoplist] for sent in sents.values] #remove stopwords and punctuation
texts=list(filter(None,texts)) #filter empty list
#print(texts)
feq=defaultdict(int)
for text in texts:
for token in text:
feq[token]+=1
texts=[[token for token in text if feq[token]>1] for text in texts] #remove low frequency
dic=corpora.Dictionary(texts) #build dictionary
dic.save('./dictionary.dict')
#print(dic)
corpus =[dic.doc2bow(text) for text in texts] #build bag of words corpus
corpora.MmCorpus.serialize('./corpus.mm',corpus)
#print(corpus)
when i want to get id2token in dic, it is empty dictionary {}
dic.id2token
{}
I have to traverse(iterate) the dic ones
for k,v in dic.items():
pass
dic.id2token
{0:'yes',1:'got',2:'it'}
df=[]
for rsp in rspList:
data=pd.read_csv(rsp)
df.append(data)
df=pd.concat(df)
stoplist= set('i am you are he she is a for of the and to in'.split())
sents=df['translated_feedback'][df['translated_feedback']!='\\N'] #remove no response
texts=[[word for word in sent.translate(trans_table).lower().split()
if word not in stoplist] for sent in sents.values] #remove stopwords and punctuation
texts=list(filter(None,texts)) #filter empty list
#print(texts)
feq=defaultdict(int)
for text in texts:
for token in text:
feq[token]+=1
texts=[[token for token in text if feq[token]>1] for text in texts] #remove low frequency
dic=corpora.Dictionary(texts) #build dictionary
dic.save('./dictionary.dict')
#print(dic)
corpus =[dic.doc2bow(text) for text in texts] #build bag of words corpus
corpora.MmCorpus.serialize('./corpus.mm',corpus)
#print(corpus)
when i want to get id2token in dic, it is empty dictionary {}
dic.id2token
{}
I have to traverse(iterate) the dic ones
for k,v in dic.items():
pass
dic.id2token
{0:'yes',1:'got',2:'it'}
訂閱:
文章 (Atom)