############################################################################# ## ## RISImporty.py ## ## I wrote a quick python script that reads RIS format files containing ## one or more references and inserts them into the default ## OpenOffice.org Bibliography/'biblio' database. The setup of this ## script is relatively straight-forward, and the current RIS tag to ## 'bilbio' column mapping has worked great for me for the few ## engineering electronic portals that I use (e.g., IEEEXplore, Inspec, ## MathSciNet). Additionally, as I will briefly explain here, the mapping ## (in some cases relatively advanced) can be easily customized to fit ## your need (i.e., different reference sources and their slightly ## different interpretations of the RIS format). ## ## Copyright (c) 2005 Michael Sowka ## ############################################################################# ## ## Modified by David Wilson 11/11/2006 dnwilson@bigpond.com ## ## Added mapping of RIS tag KY to Annote, to deal with Zotero's use of the KY tag ## ## ############################################################################# import getopt import re import sys import uno # import com.sun.star.sdbcx.Privilege TODO: why can't I do this? from com.sun.star.sdb.CommandType import TABLE # the mapping of RIS article types to OOo biblio types # ABST: Abstract -> Miscellaneous # ADVS: Audiovisual -> Miscellaneous # ART: Art Work -> Miscellaneous # BILL: Bill/Resolution -> Miscellaneous # BOOK: Book, Whole -> Book # CASE: Case -> Miscellaneous # CHAP: Chapter -> Book excerpt with title # COMP: Computer program -> Miscellaneous # CONF: Conference proceedings -> Conference proceedings # CTLG: Catalog -> Brochures # DATA: Data file -> Miscellaneous # ELEC: Electronic Citation -> WWW document # GEN: Generic -> Miscellaneous # HEAR: Hearing -> Miscellaneous # ICOMM: Internet Communication -> e-mail # INPR: In Press -> Unpublished, ***CHECK*** # JFULL: Journal, Full -> Journal # JOUR: Journal -> Journal # MAP: Map -> Miscellaneous # MGZN: Magazine -> Journal # MPCT: Motion picture -> Miscellaneous # MUSIC: Music score -> Miscellaneous # NEWS: Newspaper -> Miscellaneous # PAMP: Pamphlet -> Brochures # PATH: Patent -> Miscellaneous # PCOMM: Personal communication -> e-mail # RPRT: Report -> Research report # SER: Serial (Book, Monograph) -> Miscellaneous # SLIDE: Slide -> Miscellaneous # SOUND: Sound recording -> Miscellaneous # STAT: Statute -> Miscellaneous # THES: Thesis -> Thesis # UNBIL: Unenacted bill/resolution -> Miscellaneous # UNPB: Unpublished work -> Unpublished # VIDEO: Video recordingn -> Miscellaneous # STD: Standard -> Techn. documentation (!UNDOCUMENTED!) typeMapping = {"ABST":10, "ADVS":10, "ART":10, "BILL":10, "BOOK":1, \ "CASE":10, "CHAP":5, "COMP":10, "CONF":3, "CTLG":2, \ "DATA":10, "ELEC":16, "GEN":10, "HEAR":10, "ICOMM":15, \ "INPR":14, "JFULL":7, "JOUR":7, "MAP":10, "MGZN":7, \ "MPCT":10, "MUSIC":10, "NEWS":10, "PAMP":2, "PAT":10, \ "PCOMM":15, "RPRT":13, "SER":10, "SLIDE":10, "SOUND":10, \ "STAT":10, "THES":9, "UNBIL":10, "UNPB":14, "VIDEO":10, \ "STD":8} # the mapping of RIS tags to OOo biblio db columns # BT - title for book references # CT - title for chapter references # T3 - series title # Y2 - this could be YYYY/MM/DD, CHECK # RP - the reprint status of a reference # SP - start page number, CHECK # EP - end page number, CHECK # KW - assuming annotiotions "is" keywords # ET - edition, ?UNDOCUMENTED? # U1 - a sort of ISBN for articles # U2 - Keywords -> Annote # JA - Periodical (abbreviation) -> Journal (!UNDOCUMENTED!) fieldMapping = {"TY":"Type", "TI":"Title", "BT":"Booktitle", "CT":"Chapter", \ "T1":"Title", "T2":"Title", "T3":"Series", "AU":"Author", \ "A1":"Author", "ED":"Editor", "A2":"Editor", \ "A3":"Editor", "PY":"Year", "Y1":"Year", "Y2":"Month", \ "RP":"Note", "N1":"Note", "AB":"Custom1", \ "N2":"Custom1", "SP":"Pages", "EP":"Pages", \ "JF":"Journal", "JO":"Journal", "VL":"Volume", \ "IS":"Number", "AD":"School", "CP":"Address", \ "CY":"Address", "PB":"Publisher", "SN":"ISBN", \ "UR":"URL", "KW":"Annote", "U1":"ISBN", \ "U2":"Annote", "U3":"Custom3", "U4":"Custom4", \ "U5":"Custom5", "M1":"Custom3", "M2":"Custom4", \ "M3":"Custom5", "ID":"Identifier", "ET":"Edition", \ "JA":"Journal","KY":"Annote"} # the type (RIS TY) sensitive mapping of RIS tags to OOo biblio db columns sensFieldMapping = {("CONF", "CT"):"Title", ("RPRT", "CT"):"Title", \ ("ELEC", "CT"):"Title"} def parseRISFile(fileName): file = open(fileName, 'r') # the list of references to be returned references = [] # a reference placeholder ref = {} for line in file: pattern = r'(?P[A-Z0-9]{2}) +- *(?P.*)' matching = re.search(pattern, line) if matching <> None: # we matched tag = matching.group("tag") # check if this is the terminating tag if tag == "ER": references.append(ref) ref = {} continue # otherwise, store the tag and value value = matching.group("value") if not tag in ref: ref[tag] = value else: ref[tag] += ", " + value elif line.strip() <> "": # didn't match, but not empty, i.e., value cont'd ref[tag] += ", " + line.strip() return references def transformRISToOOo(referencesRIS): # the list of OOo format references to be returned referencesOOo = [] for refRIS in referencesRIS: # a OOo references placeholder refOOo = {} # check if there exists an ID, if not make one up # TODO: do this according to the "AUTH05" 'standard' if not "ID" in refRIS.keys(): refOOo["Identifier"] = "bla" + str(len(refRIS)) # there must be a type tag... else bitch about it try: typeRIS = refRIS["TY"] refOOo["Type"] = typeMapping[typeRIS] # remove the Type tag del refRIS["TY"] except KeyError: print "\n*********************************************\n" + \ " ERROR: The RIS tag TY is missing \n" + \ "*********************************************\n" for tag in refRIS.keys(): # map the RIS tag to OOo column + copy the value # else, just map, checking the sensitive mapping first if (typeRIS, tag) in sensFieldMapping: column = sensFieldMapping[(typeRIS, tag)] elif tag in fieldMapping: column = fieldMapping[tag] else: print "\n*********************************************\n" + \ " ERROR: The RIS tag", tag, "is not recognized!\n" + \ "*********************************************\n" sys.exit() if not column in refOOo: refOOo[column] = refRIS[tag] else: print "\n*********************************************\n" + \ " NOTE, COMBINING:", refOOo[column], "AND", refRIS[tag] + \ "*********************************************\n" refOOo[column] += ", " + refRIS[tag] referencesOOo.append(refOOo) return referencesOOo def insertToOOo(references): connectionString = "socket,host=localhost,port=2002" url = "uno:" + connectionString + ";urp;StarOffice.ComponentContext" localCtx = uno.getComponentContext() localSmgr = localCtx.ServiceManager resolver = localSmgr.createInstanceWithContext( "com.sun.star.bridge.UnoUrlResolver", localCtx) ctx = resolver.resolve(url) smgr = ctx.ServiceManager rowSet = smgr.createInstanceWithContext("com.sun.star.sdb.RowSet", ctx) rowSet.DataSourceName = "Bibliography" rowSet.CommandType = TABLE rowSet.Command = "biblio" rowSet.execute() # get the table column names, ordered wrt. their indices rsMetaData = rowSet.getMetaData() columnCount = rsMetaData.getColumnCount() columnNames = [rsMetaData.getColumnName(i) for i in range(1, columnCount + 1)] for ref in references: rowSet.moveToInsertRow() for column in ref.keys(): rowSet.updateString(columnNames.index(column) + 1, ref[column]) rowSet.insertRow() rowSet.dispose() # get the command line options optlist, args = getopt.getopt(sys.argv[1:], "t") print "Loading RIS file:", sys.argv[-1] references = transformRISToOOo(parseRISFile(sys.argv[-1])) # do the actual insert if we're not testing if not ("-t", "") in optlist: insertToOOo(references) else: print "NOTE: This was a test, values weren't actually inserted."