# Created by Leo from: C:\Python23\Tom\leo\Image_seeker.leo # << ImgQueryLib declarations >> """ /*************************************************************************** Image Querying :: python database engine and wavelet/image transforms ------------------- begin : Sat Dec 14 2002 copyright : (C) 2002 by Ricardo Niederberger Cabral email : nieder@mail.ru ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * *************************************************************************** * Wavelet algorithms, metric and query ideas based on the paper * * Fast Multiresolution Image Querying * * by Charles E. Jacobs, Adam Finkelstein and David H. Salesin. * * * ***************************************************************************/ TODO: - Try using the same query and standard transforms on another color space and then using a weighted sum as the final candidate score """ try: import time,sys,os,traceback,bisect,md5 import marshal # for loading and saving img databases from string import * import Image # from wxPython.wx import * # import urllib2 import StringIO except: traceback.print_exc() print "Error importing the necessary python modules. Unable to continue." sys.exit() #try: # from qt import * #except: # traceback.print_exc() # print "You system doesn't seem to have PyQT installed. Please install it before running this application. Please, see http://www.riverbankcomputing.co.uk/pyqt/download.php" # sys.exit() ####### DEFINES supported_ext=["jpg","gif","bmp","png","xbm","pnm"] # thats what QT seems to support. gif and jpg may not be present. TODO: detect it manually_added="Manually added/" # name of the db section files added manually will appear when doing a browse by dir. ####### GLBOAL VARS bin=[] # -- end -- << ImgQueryLib declarations >> # << ImgQueryLib methods >> (1 of 6) def initBin(): """ initialize table telling which coeff bin to use for a given wavelet coef.""" for i in range(128): for j in range(128): bin.append(min(max(i,j),5)) # << ImgQueryLib methods >> (2 of 6) class sortpair: # << class sortpair declarations >> """ helper class used when sorting the list of top X matches""" # -- end -- << class sortpair declarations >> # << class sortpair methods >> (1 of 2) def __init__(self, a,b): self.pair=[a,b] # << class sortpair methods >> (2 of 2) def __lt__(self, other): return self.pair[1]> # << ImgQueryLib methods >> (3 of 6) class ImgDB: # << class ImgDB methods >> (1 of 15) def __init__(self,ndbname="",chgCb=None): """ndbname is the filename for a database to open on object creation chgCb is a callback function invoked on every db change """ self.dbversion=2 # saved on the beginning of marshalled db files. For version db detection and handling self.files=[] # pairs, [filename,coefficients on index 0] self.meta={} # key is filename, value is dictionary #TODO: change self.files into a dictionary. So the checking done on self.addFile for re-adding an existing file is faster. self.buckets=[] # coef buckets. This is a 128x128 list of [color][positive][negative] lists self.dirs={manually_added:[]} # key is dir name, value is filename list self.dirtree={manually_added:None} # key is dir name, value is parent dir name. None if root dir self.fname=ndbname # the filename of the currently opened database self.opened=0 # weether a db has been succesfully opened self.dirty=0 # has db been changed since last change ? self.groups={} # key is filename value is list of similar images fnames #### Thumbnail stuff self.thdir=os.path.expanduser(os.path.join("~",".thumbnails","")) # path for storing thumbnails. Calculate it only once here. self.thdir2=os.path.expanduser(os.path.join("~",".thumbnails","normal","")) # path for storing thumbnails. Calculate it only once here. #### image metadata: self.imageParms=["Description","Dimensions","Filesize","Filename"] self.blankMetaDict={} # blank dictionary with available keys. To be copied later to every new db file for par in self.imageParms: self.blankMetaDict[par]=None # just to make sure the thumbnail dir exists, so I dont need to be checking it over and over on self.getThumb() if not os.path.exists(self.thdir): os.mkdir(self.thdir) if not os.path.exists(self.thdir2): os.mkdir(self.thdir2) #### end self.changeCallback=chgCb if len(ndbname): self.opendb() else: self.reset() self.eng=Engine(self) # << class ImgDB methods >> (2 of 15) def remove_dead(self): for fidx in range(len(self.files)): if not os.path.exists(self.files[fidx][0]): print "Removed from database the dead file: "+self.files[fidx][0] self.files[fidx][0]="" for k in self.dirs.keys(): try: self.dirs[k].remove(self.files[fidx][0]) except: pass self.dirty=1 if self.changeCallback: self.changeCallback() # << class ImgDB methods >> (3 of 15) def rescan(self): """recalculate coefficients for all images on the database and regenerate the db buckets """ #TODO: store filesize to determine if a file should be rescanned or not ? #this feature doesn't seem to be needed, as its very unlikely that images will be changed, only removed. if self.changeCallback: self.changeCallback() pass # << class ImgDB methods >> (4 of 15) def reset(self): """ reset current db, should be called on File-> New """ self.buckets=[] self.files=[] self.opened=1 self.dirty=1 self.dirs={manually_added:[]} # key is dir name, value is filename list self.dirtree={} # key is dir name, value is parent dir name. None if root dir self.groups={} # key is filename value is list of similar images if self.changeCallback: self.changeCallback() # << class ImgDB methods >> (5 of 15) def opendb(self): print "Opening database file "+self.fname+" ..." rdbversion=1 # database version read from db file try: f=open(self.fname,"rb") rdbversion=marshal.load(f) self.files=marshal.load(f) self.buckets=marshal.load(f) self.dirs=marshal.load(f) self.dirtree=marshal.load(f) self.groups=marshal.load(f) if rdbversion>1: self.meta=marshal.load(f) f.close() self.opened=1 except: print "Error opening database, starting with an empty one." self.reset() return 0 if rdbversion==1: # import a dbversion 1 db and increment it to the next version so the next for catches it and do the proper imports for the next version print "Importing version 1 image database" rdbversion=rdbversion+1 for fil in self.files: self.meta[fil[0]]=self.blankMetaDict.copy() #set fsize self.meta[fil[0]]["Filesize"]=str(os.stat(fil[0]).st_size) try: self.meta[fil[0]]["Filename"]=os.path.split(fil[0])[-1] except: self.meta[fil[0]]["Filename"]=fil[0] print "Done." if self.changeCallback: self.changeCallback() return 1 # << class ImgDB methods >> (6 of 15) def changefname(self,nname): self.fname=nname self.dirty=1 if self.changeCallback: self.changeCallback() # << class ImgDB methods >> (7 of 15) def savedb(self): try: f=open(self.fname,"wb") marshal.dump(self.dbversion,f) #marshal.dump(1,f) marshal.dump(self.files,f) marshal.dump(self.buckets,f) marshal.dump(self.dirs,f) marshal.dump(self.dirtree,f) marshal.dump(self.groups,f) marshal.dump(self.meta,f) f.close() except: traceback.print_exc() print "Error saving database." return 0 self.dirty=0 return 1 print "Saved database file: ",self.fname # << class ImgDB methods >> (8 of 15) def updateGroups(self,app,simthresd=50): """ groups by similarity. app is the main application (for processEvents calls). simthresd is the similarity threshold for choosing groups""" solved=[] print "Grouping with threshold of ",simthresd self.groups={} #addDirProgressBar=QProgressDialog( "Creating groups, please wait...", "Abort", 10,app.mainWidget(), "addProgress", 1 ) #addDirProgressBar.setTotalSteps(len(self.files)) #addDirProgressBar.setAutoReset(0) self.dirty=1 for file in self.files: #addDirProgressBar.setProgress(addDirProgressBar.progress()+1) app.processEvents() #if addDirProgressBar.wasCancelled(): #addDirProgressBar.cancel() #addDirProgressBar.done(1) #addDirProgressBar.close(1) #return if file[0] in solved: continue solved.append(file[0]) self.groups[file[0]]=[] aImage=self.getThumb(file[0]) if not aImage: print "Autogroupping error: Unable to generate thumbnail" continue results=self.eng.query(aImage,50) if not len(results): print "exception: image returned no similar img" continue for res in results: if (-1*res.pair[1]) > simthresd: self.groups[file[0]].append(res.pair[0]) solved.append(res.pair[0]) #TODO: whats the right way of making sure this damn progressdialogs disappear ? addDirProgressBar.cancel() addDirProgressBar.done(1) addDirProgressBar.close(1) # << class ImgDB methods >> (9 of 15) def hashStr(self,str): return md5.new(str).hexdigest() # << class ImgDB methods >> (10 of 15) def getThumb(self,fname): """ returns a 128x128 image of the file fname. If thummbn found, open it, otherwise create one. Using the standard from http://triq.net/~pearl/thumbnail-spec/ """ #TODO: not taking in consideration that fname may contain ~. I decided not to do a os.path.expanduser because that may slow down this code aImage=None thname=self.thdir2+ md5.new("file://"+fname).hexdigest()+".png" if not os.path.exists(thname): aImage=Image.open(fname) #if not aImage.load(fname): # probloading=1 #imgDims=[aImage.width(),aImage.height()] imgDims = aImage.size #aImage=aImage.scale(128,128) aImage.resize((128,128)) #aImage=QImage() #if not aImage.load(fname): # print "Error loading image file while creating a thumbnail for it:" + fname # return None #aImage=aImage.scale(128,128) aImage.save(thname) if not aImage: # load it from thumbnail file aImage=Image.open(fname) if (aImage.size[1] != 128) or (aImage.size[0] != 128): #TODO: if the thumbnail available is not 128x128 I'll be unpolite and replace it with a 128x128 one try: os.remove(thname) return self.getThumb(fname) except: print "Error regenerating thumbnail for "+thname return None return aImage # << class ImgDB methods >> (11 of 15) def addFile(self,fname,isDir="",callBack=None): """adds this filename to database. First, loading it, then calculating its haar transform and then adding the image index to all respective buckets. Each self.files list element is [filename,avg lum] isDir should be set to the path file if this file is being added as a batch dir callBack is [MainApp,QProgress] """ for fl in self.files: if fl[0]==fname: print fname + " already on database." return if not fname or not len(fname): print "Attempt to open an empty file string" return () imgDims=None try: #aImage=QImage() aImage=Image.open(fname).convert('RGB') #if not aImage.load(fname): # probloading=1 #imgDims=[aImage.width(),aImage.height()] imgDims = aImage.size #aImage=aImage.scale(128,128) aImage=aImage.resize((128,128)) except: traceback.print_exc() print "Error opening/decoding image file " + fname return sig=self.eng.calcHaar(aImage,40) if not len(sig): print "Error adding file ",fname return data=sig[1] if not len(self.buckets): for i in range(16384): self.buckets.append([[[],[]],[[],[]],[[],[]]]) # color[positive,negative] lenf=len(self.files) for i in range(16384): for c in range(3): #print i if data[i][c]>0: self.buckets[i][c][0].append(lenf) elif data[i][c]<0: self.buckets[i][c][1].append(lenf) self.files.append([fname,sig[0]]) self.dirty=1 if not isDir: #file being manually added. self.dirtree[manually_added]=None self.dirs[manually_added].append(fname) else: if not self.dirs.has_key(isDir): self.dirs[isDir]=[] self.dirs[isDir].append(fname) ## save thumbnail thname=self.thdir2+ md5.new("file://"+fname).hexdigest()+".png" if not os.path.exists(thname): aImage.save(thname,"PNG") ## init metadata try: self.meta[fname]=self.blankMetaDict.copy() try: self.meta[fname]["Filename"]=os.path.split(fname)[-1] except: self.meta[fname]["Filename"]=fname self.meta[fname]["Filesize"]=str(os.stat(fname).st_size) self.meta[fname]["Dimensions"]=str(imgDims[0])+" x "+str(imgDims[1]) except: print "Error gathering metadata for ",fname traceback.print_exc() # << class ImgDB methods >> (12 of 15) def addImage(self,aImage,fname): """adds this filename to database. First, loading it, then calculating its haar transform and then adding the image index to all respective buckets. Each self.files list element is [filename,avg lum] isDir should be set to the path file if this file is being added as a batch dir callBack is [MainApp,QProgress] """ isDir = '' for fl in self.files: if fl[0]==fname: print fname + " already on database." return if not fname or not len(fname): print "Attempt to open an empty file string" return () imgDims = aImage.size aImage=aImage.resize((128,128)) sig=self.eng.calcHaar(aImage,40) if not len(sig): print "Error adding file ",fname return data=sig[1] if not len(self.buckets): for i in range(16384): self.buckets.append([[[],[]],[[],[]],[[],[]]]) # color[positive,negative] lenf=len(self.files) for i in range(16384): for c in range(3): #print i if data[i][c]>0: self.buckets[i][c][0].append(lenf) elif data[i][c]<0: self.buckets[i][c][1].append(lenf) self.files.append([fname,sig[0]]) self.dirty=1 if not isDir: #file being manually added. self.dirtree[manually_added]=None self.dirs[manually_added].append(fname) else: if not self.dirs.has_key(isDir): self.dirs[isDir]=[] self.dirs[isDir].append(fname) ## save thumbnail thname=self.thdir2+ md5.new("file://"+fname).hexdigest()+".png" if not os.path.exists(thname): aImage.save(thname,"PNG") ## init metadata try: self.meta[fname]=self.blankMetaDict.copy() try: self.meta[fname]["Filename"]=os.path.split(fname)[-1] except: self.meta[fname]["Filename"]=fname self.meta[fname]["Filesize"]=str(os.stat(fname).st_size) self.meta[fname]["Dimensions"]=str(imgDims[0])+" x "+str(imgDims[1]) except: print "Error gathering metadata for ",fname traceback.print_exc() # << class ImgDB methods >> (13 of 15) def addURL(self,fname,isDir="",callBack=None): """adds this URL to database. First, loading it, then calculating its haar transform and then adding the image index to all respective buckets. Each self.files list element is [filename,avg lum] isDir should be set to the path file if this file is being added as a batch dir callBack is [MainApp,QProgress] """ # rootpath ='c:\\python22\\urlimg' # try: # tom = urllib2.urlopen(fname) # tomstr = tom.read() # #typofimage = os.path.splitext(path)[1] # #print typofimage # aImage = WXToPIL(wxImageFromStream(StringIO.StringIO(tomstr)),'RGB') # newpath = os.path.join(rootpath,os.path.split(fname)[1]) # del tom # aImage.save(newpath) # print 'new path',newpath # self.addFile(newpath) # return aImage # del aImage # except: # traceback.print_exc() # print "Error opening/decoding image file " + fname # return # # << class ImgDB methods >> (14 of 15) def extIsImg(self,file): """ tests if this file has a supported image format extension""" ext=rfind(file,".") if ext==-1:return 0 ext=lower(file[ext+1:]) if ext in supported_ext: # if it is a supported image filetype return 1 return 0 # << class ImgDB methods >> (15 of 15) def addDir(self,path,recursive=0,callBack=None,calledRecurs=0): """Add all the files on this dir. CallBack is called with the file name as a parameter on every file added """ print "Adding dir ",path startt=time.time() dfiles=os.listdir(path) if not len(dfiles):return if path[-1] != '/': path=path+'/' myLabel="Adding directory \""+path+"\" ..." mySteps=len(dfiles) myStep=0 hasImage=0 if callBack: callBack[1].setLabelText(myLabel) callBack[1].reset() callBack[1].setTotalSteps(mySteps) if not self.dirs.has_key(path): self.dirs[path]=[] for file in dfiles: if callBack: callBack[1].setProgress(callBack[1].progress()+1) callBack[0].processEvents() if callBack[1].wasCancelled(): if self.changeCallback: self.changeCallback() return if os.path.isdir(path+file) and recursive: if file[0]=='.': continue # do not add dirs starting with . (also excludes .thumbnail dirs) # update paths tree self.addDir(path+file,1,callBack,1) if not self.dirs.has_key(path): self.dirs[path]=[] self.dirtree[path+file+"/"]=path #myStep=callBack[1].progress() #callBack[1].setLabelText(myLabel) #callBack[1].setTotalSteps(mySteps) #callBack[1].setProgress(myStep) #callBack[0].processEvents() continue if self.extIsImg(file): #its a file, now just check if its an image if file[0]=='.': continue # do not add imgs starting with . (also excludes .thumbnail dirs) hasImage=1 self.addFile(path+file,path)#,callBack) if not calledRecurs: self.dirtree[path]=None #if self.changeCallback: self.changeCallback() print "Finished adding directory." #print "Adding took %f" % (time.time()-startt) return hasImage # -- end -- << class ImgDB methods >> # << ImgQueryLib methods >> (4 of 6) def cenRoot(i): return [i[0] /11.314,i[1] /11.314,i[2] /11.314] # << ImgQueryLib methods >> (5 of 6) def cenRoot2(i): return i/11.314 # << ImgQueryLib methods >> (6 of 6) class Engine: # << class Engine methods >> (1 of 5) def __init__(self,ndb): self.curdb=ndb # << class Engine methods >> (2 of 5) def calcHaar(self,aImage,M=40): #convert to yiq colorspace data=[] for i in list(aImage.getdata()): curpixel=[(i[0] * 0.299 + i[1] * 0.587 + i[2] * 0.114) / 256.0,(i[0] * 0.596 + i[1] * (-0.274) + i[2] * (-0.322)) / 256.0,(i[0] * 0.212+ i[1] * (-0.523) + i[2] * 0.311) / 256.0] data.append(curpixel) # for i in range(128): # for j in range(128): # r = aImage.getpixel((j,i))[0] # g = aImage.getpixel((j,i))[1] # b = aImage.getpixel((j,i))[2] # curpixel=[(r * 0.299 + g * 0.587 + b * 0.114) / 256.0,(r * 0.596 + g * (-0.274) + b * (-0.322)) / 256.0,(r * 0.212+ g * (-0.523) + b * 0.311) / 256.0] # data.append(curpixel) #decompose rows for row in range(128): #A=A/root(h) h=128 data[row*128:(row+1)*128]=map(cenRoot ,data[row*128:(row+1)*128]) while(h>1): h=h/2 Ab=2*h*[0] for i in range(h): Ab[i]=[(data[row*128+2*i][0]+data[row*128+2*i+1][0])/1.414, (data[row*128+2*i][1]+data[row*128+2*i+1][1])/1.414, (data[row*128+2*i][2]+data[row*128+2*i+1][2])/1.414] Ab[i+h]=[(data[row*128+2*i][0]-data[row*128+2*i+1][0])/1.414,(data[row*128+2*i][1]-data[row*128+2*i+1][1])/1.414,(data[row*128+2*i][2]-data[row*128+2*i+1][2])/1.414] data[row*128:row*128+2*h]=Ab #decompose cols for col in range(128): #A=A/root(h) h=128 for w in range(128): data[w*128+col]=map(cenRoot2,data[w*128+col]) while(h>1): h=h/2 Ab=2*h*[0] for i in range(h): Ab[i]=[(data[2*i*128+col][0]+data[(2*i+1)*128+col][0])/1.414,(data[2*i*128+col][1]+data[(2*i+1)*128+col][1])/1.414,(data[2*i*128+col][2]+data[(2*i+1)*128+col][2])/1.414] Ab[i+h]=[(data[2*i*128+col][0]-data[(2*i+1)*128+col][0])/1.414,(data[2*i*128+col][1]-data[(2*i+1)*128+col][1])/1.414,(data[2*i*128+col][2]-data[(2*i+1)*128+col][2])/1.414] for w in range(2*h): data[w*128+col]=Ab[w] avgl=data[0] # average luminance #get largest M coeffs best=[range(-9999,-9999+M),range(-9999,-9999+M),range(-9999,-9999+M)] for c in range(3): # get largest for i in range(len(data)): bisect.insort_left(best[c],abs(data[i][c])) del best[c][0] # truncate and quantize data=map(lambda i: [(abs(i[0])>best[0][0])*i[0],(abs(i[1])>best[1][0])*i[1],(abs(i[2])>best[2][0])*i[2]],data) return (avgl,data) # << class Engine methods >> (3 of 5) def imagefun(self,pixal): print pixal # << class Engine methods >> (4 of 5) def query(self,aImage,numres=10,scanned=1): if scanned: w=[[5.00,19.21,34.37],[0.83,1.26,0.36],[1.01,0.44,0.45],[0.52,0.53,0.14],[0.47,0.28,0.18],[0.3,0.14,0.27]] else: w=[[4.04,15.14,22.62],[0.78,0.92,0.40],[0.46,0.53,0.63],[0.42,0.26,0.25],[0.41,0.14,0.15],[0.32,0.07,0.38]] sig=self.calcHaar(aImage) if not len(sig): #error opening file return [] data=sig[1] score=len(self.curdb.files)*[0] for i in range(len(self.curdb.files)): score[i]=sortpair(i,0) for c in range(3): score[i].pair[1]=score[i].pair[1]+w[0][c]*abs(self.curdb.files[i][1][c]-sig[0][c]) for i in range(16384): for c in range(3): if data[i][c]: pn=0 if data[i][c]<0: pn=1 for buck in self.curdb.buckets[i][c][pn]: score[buck].pair[1]=score[buck].pair[1]-w[bin[i]][c] #get best results best=[] # get largest numres=numres+1 for i in range(len(self.curdb.files)): if not len(self.curdb.files[i][0]): continue #removed file bisect.insort_left(best,score[i]) if len(best)>numres: del best[numres] best=best[1:] #set pair[0] to filename (fullpath) for i in range(len(best)): best[i].pair[0]=self.curdb.files[best[i].pair[0]][0] return best # << class Engine methods >> (5 of 5) def textquery(self,txt,numres=10): res=[] found=[] if not txt: return res for fil in self.curdb.meta.keys(): for field in self.curdb.imageParms: try: if not self.curdb.meta[fil][field]: pass else: if find(self.curdb.meta[fil][field],txt) != -1: if fil not in found: # do not add this file again if its already a search results found.append(fil) res.append(sortpair(fil,0)) if len(res)>=numres: return res except: pass return res # -- end -- << class Engine methods >> # -- end -- << ImgQueryLib methods >> initBin()