#!/bin/tcsh -f #(ie run the cshell on this but don't read the .cshrc) echo version = 1.89 of medquery 2022 Oct 27 # 2022 Oct 27, 1.89: give note part of doi in $doifile when doi updates # 2022 Oct 13, 1.88: medlinebib now fixed to produce \url, not \htmladdnormallink # 2022 Jan 10, 1.87: doi not being recorded in /tmp/doi? no ... actually missing # 2021 Sep 23, 1.86: \htmladdnormallink -> \url and remove 2nd url # 2021 May 15, 1.85: if no comment with date, use pdfdate to construct it # 2021 Mar 15, 1.84: grep: invalid repetition count(s) because of macOS # 2020 Sep 09, 1.83: abandon wget, use curl as default # 2019 Jun 05, 1.82: clean up # 2019 Apr 30, 1.81: if there is no doi, try to get one using pmiddoi # 2019 Apr 30, 1.80: replace wget with curl if there is no wget # 2018 Aug 06, 1.79: backup # 2018 Aug 06, 1.78: say bibkey for doi # 2018 Aug 03, 1.77: say 'got doi' only once # 2018 Jul 23, 1.76: capture pmcid along with doi if it is new # 2018 Jun 19, 1.75: upgrade the generated medlinebibp # 2018 Jan 13, 1.74: Schneider2006 should be only one url ... # 2018 Jan 13, 1.73: refining ... # 2018 Jan 13, 1.72: also give htmladdnormallink give new doi material! # 2018 Jan 13, 1.71: give new doi material! # 2017 Jul 07, 1.70: fix documentation single quotes on sig # 2017 Mar 02, 1.69: add to dependencies # 2017 Jan 25, 1.68: spelling correction # 2017 Jan 18, 1.66: smarter todo/missing statement # 2017 Jan 15, 1.65: clearly report what sd compares # 2016 Sep 08, 1.64: todo or MISSING # 2016 Aug 05, 1.63: pubmed changed the way the pmid is reported - broken # problem was in yvp # 2016 Jul 29, 1.62: https # 2016 Jun 22, 1.61: debug # 2016 Jun 22, 1.60: debug # 2016 Jun 22, 1.59: if already in allbib, compare and look for todos # 2016 Jan 20, 1.58: key check must include ends: '{',',' # 2016 Jan 14, 1.57: put a copy of the query in /tmp # 2015 Dec 10, 1.56: reduce say of all.bib # 2015 Oct 22, 1.55: reset to -4 for wget # 2015 Oct 14, 1.53: unmatched ' # 2015 Oct 14, 1.52: .checkref to check against main bibliography # 2015 Sep 14, 1.51: report exact wget string; removing -4 flag now works!!!! # 2015 Mar 02, 1.50: add -4 flag to wget to avoid eutil delays # 2015 Feb 26, 1.49: pretend to be mozilla browser to avoid eutil delays? # 2015 Jan 29, 1.48: remotesay problem # 2015 Jan 28, 1.47: include start and end times for time record # 2015 Jan 28, 1.46: instrument medquery to record time if > 1 second # 2014 Feb 19, 1.45: remove dashes from names # 2013 Aug 26, 1.44: remove query2 # 2013 Jun 17, 1.43: upgrade website reference # 2011 Oct 05, 1.42: remove underscore (_) from key names! # 2010 Sep 04, 1.41: update documentation # 2010 Aug 31, 1.40: update address # 2009 Apr 22, 1.39: if error # 2008 Aug 15, 1.38: clean up use of files. # 2008 Jul 31, 1.37: more unmatched quote hunt, -q involved ... got it! # 2008 Jul 30, 1.36: unmatched quote hunt # 2008 Jun 25, 1.35: bibkey produced # 2008 Jun 10, 1.34: -q # 2008 Jun 07, 1.33: allow PubmedCentral numbers like PMC178597 # 2008 May 28, 1.32: wget refused by pubmed! Message: # 1: id: 16790843 Error occurred: Error 111 (Connection refused) # change wget call # 2008 Apr 16, 1.31: document 'pure' better # 2007 Nov 14, 1.30: better failure message; pmid gives result! # 2007 Nov 14, 1.29: documentation of what medquery looks for # 2007 Jun 11, 1.28: documentation # 2007 Jun 11, 1.27: cleanup # 2007 Jun 11, 1.26: require argument for file name # 2006 Jul 19, 1.25: fails - PubMed changed again? "ERROR 500: Server Error" # 2006 Jun 6, 1.24: Bus error crash # 2006 Mar 5, 1.23: crash? # 2005 Aug 18, 1.22: upgrade medlinebibp # 2005 May 11, 1.21: save the PAGE now (as query.fcgi.html or query.fcgi) # 2004 May 15, 1.20: pubmedgrab 12086598 crashes - [] problem # 2004 May 13, 1.19: handle 'PMID: 13918161 PubMed - OLDMEDLINE for Pre1966' # 2004 Apr 22, 1.18: eutils now functional # 2004 Apr 7, 1.17: use E-utilities to get entry more cleanly (failed) # 2004 Mar 18, 1.16: handle
to get PMID properly # 2003 Jul 15, 1.15: now working with "E-Utilities" # 2003 Jul 15, 1.14: PubMed format changed to "E-Utilities" - broke this script! # 2002 May 4, 1.13: handle bibquery being empty when medlinebib fails # 2001 May 24, 1.12: pubmed format changed! This fixes it # 2001 Mar 29, 1.11: make medquery handle html if the person uses that to save. # 2001 Mar 29, 1.10: rename query0 query2 # 2000 Jan 24, 1.05: use pmid preferentially # 1999 Nov 22, 1.03: medquery now uses query.fcgi from the new pubmed # origin 1999 Sep 5 from mq # Variables to record delays set timing = 1 # default is to report time set timereport = medquery-time.txt set alloweddelay = 2 # seconds allowed for initial wget # temporary files: set doifile = /tmp/doi echo -n '' > $doifile set tmpsdiff = "/tmp/`whoami`sdiff-.medquery" # Location that PDFs are stored using the ~/now directory set pdflocation = none if (-d ~/now) then if (-d ~/now/pdf) then set pdflocation = ~/now/pdf endif endif set dotmedquery = "~/.medquery" # 2019 Apr 30: the output files should be local, not '~/'! #set bibformat = ~/bibformat # original #set bibkey = ~/bibkey # original #set bib = ~/bib # original set bibformat = bibformat # revised 2019 Apr 30 set bibkey = bibkey # revised 2019 Apr 30 set bib = bib # revised 2019 Apr 30 if ($#argv == 1) then if ("$1" == "-q") then echo "Quitting by '-q' to prevent giving entire help list." exit endif endif if ($#argv == 0) then echo 'usage: medquery [pubmed page OR pubmed ID OR pubmed central ID]' echo 'Convert a saved PubMed reference page into BibTeX format' echo echo 'The medquery script accepts a single argument, the name of the file' echo 'containing a PubMed web page.' echo '(Note: The only item that medquery cares about on the page is the' echo 'PubMed id. It looks for the string "PMID: " at the beginning' echo 'of a line, followed by the pubmed ID number.' echo 'It uses only that number for further processing.' echo 'You can make a file containing "PMID: [number]" alone.)' echo echo 'If you give a PubMed Id as the argument, then the program' echo 'constructs the necessary files and will give you the results.' echo echo 'In the current directory several files are created:' echo ' - query (the original saved PubMed page is moved here)' echo ' - The bibformat file will contain the last entry.' echo ' - The bibkey file will contain the key for the last entry.' echo ' - The bib file will contain all entries.' echo ' - The medlinebibp file controls the medlinebib program.' echo 'Note that the bib file is always appended to,' echo 'so results from previous runs will be included.' # echo 'Spaces in the key name are repaced in medlinebib by dashes.' # 2014 Feb 19: echo 'Spaces in the key name are removed so that the key name has two' echo 'parts in medlinebib. This makes picking up the key name easier' echo 'by cut/paste.' # zzz echo echo 'PubMed is a database of biology-related references at' # echo 'https://www.ncbi.nlm.nih.gov/PubMed/medline.html' # gone: 2007jun11 echo 'https://www.ncbi.nlm.nih.gov/sites/entrez?db=pubmed' echo echo 'Using your browser, save the entire web page that contains a' echo 'single reference of interest and medquery will extract the' echo 'PubMed ID (PMID) from that. Then medquery obtains the medline' echo 'format file (using curl or wget) and converts it automatically to' echo 'BibTeX format.' echo echo 'The key of the entry is generated from the first author,' echo 'the last author and the year.' echo 'You may need to edit the entry to make sure that italics' echo 'and special symbols are correct.' echo echo '* Information about LaTeX (a typesetting language) and BibTeX' echo '(a database language for references in papers) is at:' echo 'https://alum.mit.edu/www/toms/latex.html' echo echo '* Medquery uses the program medlinebib:' echo 'https://alum.mit.edu/www/toms/delila/medlinebib.html' echo echo '* Medquery uses curl by default now.' echo 'It can also use wget if there is no curl is availble:' echo 'https://alum.mit.edu/www/toms/wget.html' echo echo '* You can use medquery with atchange:' echo 'https://alum.mit.edu/www/toms/atchange.html' echo "To use it, put an 'automate' in your home directory containing" echo 'three lines:' echo '' echo 'query' echo ' clear' echo ' medquery query -q' echo '' echo 'This assumes that the file saved from PubMed is called "query"' echo 'Start the automation by typing' echo '' echo ' atchange automate' echo '' echo 'Then find your reference in PubMed and simply save it to your' echo 'home directory in the name "query".' echo echo 'PubMed changes their format once in a while so this script' echo 'needs to be updated frequenty.' echo echo 'If the argument is "-q", medquery quits to prevent' echo 'this error message from being shown when an argument' echo 'is empty in a script.' echo echo 'If the user has defined a ~/.medquery file, then this ie' echo 'expected to contain the location of bibliography file(s)' echo 'to check for duplicate key names in.' echo echo 'If the user has a ~/now directory with a link pdf pointing' echo 'to the location of pdfs, then when a bibentry does not have a' echo 'dated comment, one can be constructed in $doifile' echo echo "If the curl/wget takes more than a second, the time will be" echo "recorded into the file $timereport" echo echo 'Dependencies:' echo ' medlinebib' echo ' curl' echo ' wget (if no curl is available)' echo 'Dependencies for the time report:' echo ' tomdate' echo ' diffdate' echo ' clmn' echo 'Dependencies for capturing new doi data:' echo ' rb' echo ' heta' echo ' remotesay' echo echo 'Thomas D. Schneider, Ph.D.' echo 'toms@alum.mit.edu' echo 'https://alum.mit.edu/www/toms ' exit else set pubmedpage = "$1" if ("$pubmedpage" == '') then echo "empty argument for medquery" exit endif if !(-f "$pubmedpage") then echo "there is no file named $pubmedpage" # is it a number? set pure = `echo $pubmedpage | tr -d '0123456789'` echo "Remove digits from argument, is it a pure number? Result: '$pure'" # if ("$pure" == "") then if (("$pure" == "")||("$pure" == "PMC")) then # example of Pubmedcentral number: # PMC178597 set tmp = "/tmp/`whoami`.medquery" if ("$pure" == "PMC") then echo "PMC id found, so creating 'fake' pubmed page" echo "PMCID: $pubmedpage" > $tmp else echo "Pure number found, so creating 'fake' pubmed page" echo "PMID: $pubmedpage" > $tmp endif set pubmedpage = $tmp else echo "This is not a pure number, can't be a PMID." exit endif else echo "Using $1 as the pubmed page" endif endif # ****************************************************************************** # 2007 Jun 11: NOTE that the name has apparently changed again, to # entrez.html # 2008 Aug 15: they abandoned that and now give [PMID].html # ****************************************************************************** # 2003 July 15 # # https://www.nlm.nih.gov/pubs/techbull/ma03/ma03_technote.html#eutil # PubMed® to Complete Transition to E-Utilities # and Manually Constructed URLs # # April 03, 2003 [posted] # # In July 2002, NCBI announced the availability of new # programming for the Entrez Utilities (E-Utilities) and informed # utility users that they should convert URLs to the new format by # the end of 2002. # # NCBI will phase out the old utilities completely in June 2003. This # may affect customers of some products such as EndNote®, # ProCite®, and Reference Manager®. Please contact user support # for your respective product if you have questions. Questions # concerning the use of E-Utilities can be sent to: # eutilities@ncbi.nlm.nih.gov. # # If you have manually created links to PubMed that contain the # string: /htbin-post/, these should be changed to follow the # specifications provided on the page, Linking to PubMed and other # Entrez Databases. These changes must be in place prior to June # 2003. # # Entrez Utilities (E-Utilities) # https://www.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html # # Linking to PubMed and other Entrez Databases. # https://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html # ****************************************************************************** # $query is the name of the file used by medlinebib: set query = query set query1 = /tmp/`whoami`-1.medquery set query2 = /tmp/`whoami`-2.medquery if (-f $pubmedpage) then mv $pubmedpage $query endif if (-f $query) then if !(-f medlinebibp) then echo 'creating new medlinebibp file' cat > medlinebibp << EOF 2.08 version of medlinebibp that this parameter file is designed for. n 'd' = debug n 'e' = do everything f 'f' = use final author, otherwise second author d 'd' = double dash page numbers: 1--5, otherwise single dash. 70 The title line size, in characters. f date in a comment: ' ': none; f: full MISSING EOF endif # detect html form # set line = `grep "" $query` # failed on 2007 Jun 11 set line = `grep "" $query` # 2007 Jun 11: should be more reliable if ("$line" == '') then echo the file is not html # the tr changes control M's to returns # in case the mac format was used # cat $query # extract the id line: # 2001 May 24: bug version: # set line = `cat "$query" | tr " " "\n" | tr ";" "," | grep "PMID:"` # remove brackets they just introduced!! # set line = `cat "$query" | tr " " "\n" | tr -d '[]' | tr ";" "," | grep "PMID:"` # normal line: # PMID: 3357886 PubMed - indexed for MEDLINE # ancient line: # PMID: 13918161 PubMed - OLDMEDLINE for Pre1966 #echo "*******************************************************************" #cat "$query" #echo "*******************************************************************" #exit # set line = "`grep '^PMID: ' $query`" # 2008 Jun 07 accoutn for PMCID; set line = "`egrep '(^PMID|^PMCID): ' $query`" # tr -d '[]' | tr ";" "," | ` echo The PMID or PMCID containing line is: echo "$line" # clean the line: # set pmid0 = `echo $line | tr "," "\n" | grep "PMID: " | sed -e "s/PMID: //"` # remove the new junk they put in just before 2001 May 24 # set pmid = `echo $pmid0 | sed -e "s/PubMed - indexed for MEDLINE//"` # that's not enough, they change the message, # eg PMID: 11358999 [PubMed - in process] # echo "PMID line is '$pmid0'" # set pmid = `echo $pmid0 | tr -d "A-Z"` # naw... try again: # from man tr: # When the -c option is specified with -d, all characters except those # specified by string1 will be deleted. The contents of string2 will be # ignored, unless the -s option is also specified. # so... delete everything on the line EXCEPT digits: # set pmid = `echo $line | tr -cd "[:digit:]"` # that fails on the ancient line that has numbers!! set pmid = `echo "$line" | tr " " '\012' | head -2| tail -1` echo "PMID is '$pmid'" # find the UI if it is on the line: set ui = `echo $line | tr "," "\n" | grep "UI: " | sed -e "s/UI: //"` echo " UI is '$ui'" else echo the file is html set pmid = \ `cat $query | tr '<>[]' "\n\n\n\n" | grep PMID | sed -e "s/PMID: //"|head -1` echo PMID is '"'$pmid'"' # No UI for HTML: set ui = "" endif # see what we found and act accordingly: give preference to PMID if ("$pmid" == "") then if ("$ui" == "") then echo "PMID and UI are missing" echo "HALT" exit else set uid = "$ui" endif else # set uid = "$pmid" if (`echo $pmid | grep PMC` == '') then set dbkind = pubmed set uid = "$pmid" else set dbkind = pmc set uid = `echo "$pmid" | sed "s/PMC//"` endif endif echo echo ID used is: $uid echo kind of database is: $dbkind # *************************************************************************** # given the $uid, grab the entry # *************************************************************************** # NEW FUNCTIONAL METHOD: using eutils # 2004 Apr 22 # Retmode & rettype are outlined in the EFetch documentation at: # https://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.ht- # ml#Retrieval Mode # # If you add retmode=text, you should get what you want. # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&i- # d=14602927&r ettype=medline&retmode=text # # Please forward any questions about e-utilities to: # eutilities@ncbi.nlm.nih.gov. # # Sincerely, N. Ruiz National Library of Medicine # 2008 Jun 7 # help page for efetch at NCBI: # https://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html # Obtain the medline format entry for the reference # using retmode=text: if ($dbkind == 'pubmed') then # original: set reportingtime = 0 if ($timing) then set begtime = `tomdate` endif set wgetstring = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${uid}&rettype=medline&retmode=text" # # wget -O $query1 "$wgetstring" # # 2015 Feb 26: PRETEND TO BE A MOZILLA BROWSER # 2015 Mar 02: -4 to "to force an IPv4 connection" # 2015 Sep 14: this FAILED REPEATEDLY SUDDENLY! # echo "wget -4 --user-agent=mozilla -O $query1 $wgetstring" # wget -4 --user-agent=mozilla -O $query1 "$wgetstring" # echo "wget --user-agent=mozilla -O $query1 $wgetstring" # wget --user-agent=mozilla -O $query1 "$wgetstring" # test: # wget --user-agent=mozilla -O /tmp/test 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=25486560&re ttype=medline&retmode=text' # # 2015 Oct 22: 75 second delays again! add -4 ... # wget -4 --user-agent=mozilla -O /tmp/test 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=25486560&re ttype=medline&retmode=text' # # 2020 Sep 09 - wget failed 'Unable to establish SSL connection.' # on toot but not lap or egg. Use curl as default. # if (`which wget|grep '/'|wc -l` > 0) then if (`which curl|grep '/'|wc -l` == 0) then echo "No curl available, using wget" if (`which wget|grep '/'|wc -l` == 0) then echo "Neither curl nor wget are availble. Halting" exit endif echo "wget -4 --user-agent=mozilla -O $query1 $wgetstring" wget -4 --user-agent=mozilla -O $query1 "$wgetstring" else # 2019 Apr 30: replace wget with curl set Q = '"' echo "curl --user-agent mozilla -o $query1 ${Q}$wgetstring${Q}" curl --user-agent mozilla -o $query1 "$wgetstring" endif echo "$wgetstring" # sleep 2 # use this to test the timing trigger if ($timing) then set endtime = `tomdate` set t = `diffdate $begtime $endtime | clmn 1` if ("$t" > "$alloweddelay") then echo "" >> $timereport echo "$wgetstring" >> $timereport echo -n "$begtime $endtime " >> $timereport diffdate $begtime $endtime >> $timereport set reportingtime = 1 endif endif else echo ==================== if ($dbkind == 'pmc') then # wget -O $query1 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$dbkind&id=${uid}&rettype=medline&retmode=text" if (`which wget|grep '/'|wc -l` > 0) then wget -4 -O $query1 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$dbkind&id=${uid}&rettype=medline&retmode=text" echo wget -4 -O $query1 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$dbkind&id=${uid}&rettype=medline&retmode=text" else # 2019 Apr 30: replace wget with curl set Q = '"' echo "curl -o $query1 ${Q}https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$dbkind&id=${uid}&rettype=medline&retmode=text${Q} curl -o $query1 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$dbkind&id=${uid}&rettype=medline&retmode=text" endif endif echo ==================== cat $query1 a $query1 > /dev/null echo ==================== echo not funtional as of 2008 jun 8, email sent exit endif # 2008 May 28 # error!! # for call: # medquery 16790843 # 1: id: 16790843 Error occurred: Error 111 (Connection refused) # try setting agent: # wget --user-agent=seamonkey -O $query1 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${uid}&rettype=medline&retmode=text" # that failed. # 2004 Mar 18: Keep the html so that one keeps the PMID: # this is now handled by medlinebib cat $query1 |\ cat > $query echo ----============================= medline format: cat $query echo ----============================= if (`cat $query |wc -c` == 0) then echo "$query file is empty\!" echo "for PMID ${uid}" exit endif echo begin running ----- medlinebib medlinebib echo done running ----- medlinebib if !(-f bibformat) then echo 'The medlinebib program failed to produce a bibformat file!' exit endif if (0) then # 2022 Oct 13, 1.88: medlinebib now fixed to produce \url, not \htmladdnormallink # 2021 Sep 23 echo "*** Temporary code until I can compile medlinebib again." echo "*** \htmladdnormallink -> \url" echo "*** remove second URL from note" echo "*** BEFORE:" heta -a bibformat set tb = /tmp/`whoami`-tmp.mq mv bibformat $tb # cat $tb |\ # sed 's/\\htmladdnormallink/\\url/' |\ # cat > bibformat echo '' > bibformat # first line is blank but foreach misses it set skipnext = 0 foreach line ("`cat $tb`") if (`echo "$line"|grep '^note = '|wc -l` > 0) then echo "$line" |\ sed 's/\\htmladdnormallink/\\url/' |\ cat >> bibformat set skipnext = 1 else if ($skipnext == 0) then echo "$line" >> bibformat endif set skipnext = 0 endif end echo "*** AFTER:" sd $tb bibformat heta -a bibformat endif set filesize = `cat bibformat | wc -c | tr -d " "` if ($filesize == 0) then echo 'The medlinebib program failed: the bibformat file is empty!' exit endif if ($filesize < 5) then echo 'The medlinebib program failed:' echo "filesize is $filesize" echo 'The bibformat file is NEARLY empty!' exit endif cat bibformat >> bib echo "THE BIBLIOGRAPHY is IN FILE $bibformat" echo "CONCATENATED BIBLIOGRAPHIES ARE IN FILE $bib" cat bibformat |\ head -2 |\ tail -1 |\ tr '{' '\n' |\ tr -d ',' |\ tail -1 |\ cat > bibkey echo "The key for this entry is in $bibkey" echo "---- bibformat file contains: ----" cat bibformat echo "----------------------------------" echo " " # remove query file so that it is not in the way for the next file # mv $query $query2 # $query is just a copy of $query1 so just delete it!!!! cp $query /tmp rm -f $query # echo "The query file used by medlinebib was moved to $query2" else # There is no query file, but we don't want to say this because # it is designed to be used with atchange. When the file is moved away # atchange will call medquery and we should just end gracefully. echo "Medquery is DONE - there is no query file." echo "" endif if ($reportingtime) then echo "Time for wget/curl exceeded ${alloweddelay} seconds" echo "Result reported in file ${timereport}:" tail -2 $timereport remotesay "`tail -1 $timereport | clmn 3` seconds" endif if (`grep doi.org $bibformat|grep MISSING|wc -l` > 0) then echo "*** The new entry does NOT have a doi ..." echo "*** See if there is a doi link on the pubmed page using pmiddoi:" if (`which pmiddoi|grep '/'|wc -l` > 0) then # set pmid = ${pmid}5 echo "pmid: $pmid" set tmppmiddoi = /tmp/`whoami`-pmiddoi.medquery pmiddoi $pmid > $tmppmiddoi cat $tmppmiddoi set newdoi = `tail -1 $tmppmiddoi` echo "newdoi: '$newdoi'" if ("$newdoi" != '') then set fixdoi = `echo "$newdoi"|sed 's/dx.doi.org/doi.org/'` # echo "fixdoi: '$fixdoi'" set missingdoi = 'https://doi.org/\\todobf{MISSING}' set tmpoldbibformat = /tmp/`whoami`-bibformat.medquery set tmpfixbibformat = /tmp/`whoami`-fixedbibformat.medquery cat $bibformat |\ sed "s|$missingdoi|$fixdoi|" |\ #sed "s|\\todobf{MISSING}|molly cat|" |\ #sed "s|\\todobf{MISSING}|molly cat|" |\ #sed "s|{MISSING}|sky bird|" |\ #sed "s|MISSING|fun city|" |\ cat > $tmpfixbibformat # heta -a $tmpfixbibformat if (`diff $bibformat $tmpfixbibformat|wc -l` > 0) then echo "The fixed doi is: '$fixdoi'" echo "The doi was identified by pmiddoi. Old format:" cp $bibformat $tmpoldbibformat heta -a $tmpoldbibformat echo "New bibformat format:" cp $tmpfixbibformat $bibformat heta -a $bibformat endif else echo "There is no doi on the PubMed page of $pmid - giving up" endif else echo "You need pmiddoi to get DOIs from LinkOut on pubmed pages." endif endif if (-f $dotmedquery) then set bibkey = `cat $bibkey` set and = /tmp/`whoami`.and # file produced by the 'and' script foreach bibfile (`cat $dotmedquery`) if (0) then # 2021 Mar 15 this now fails with 'grep: invalid repetition count(s)' # if (`grep "\{${bibkey}," $bibfile|wc -l` > 0) then # tests: echo BUBBA 2 echo "BUBBA 3 grep \{${bibkey}, $bibfile" grep "\{${bibkey}," $bibfile|wc -l echo "BUBBA 3.5" grep "${bibkey}," $bibfile|wc -l echo "BUBBA 3.6" grep "\{${bibkey}," $bibfile echo "BUBBA 3.7" grep "\{${bibkey}," $bibfile echo "BUBBA 3.8" set BACKSLASH = '\' grep "$BACKSLASH$bibkey," $bibfile echo "BUBBA 3.9" grep "$BACKSLASH${bibkey}," $bibfile echo "BUBBA 4.0" endif if (`grep "$bibkey," $bibfile|wc -l` > 0) then set shortbibfile = `echo "$bibfile"|tr '/' '\n'|tail -1` set s = "$bibkey is already in $shortbibfile" # the ',' makes it less likely to pick up other entries "from ..." # 2018 Aug 06: ',' was not in use! # echo "and $bibkey," # and "$bibkey" article # 2018 Aug 06: use '\{'? # echo "and \{$bibkey," # No, the \ is ignored and the shell objects to the 'Missing }'! # Use the trailing comma to try to get the bibkey to be unique. and "${bibkey}," article echo "----- comparison: ----- sd $and $bibformat -----" sd $and $bibformat if (`cat $and |grep 'todo'|wc -l` > 0) then set s = "$s but it has a 'todo'" set hastodo = 1 else set hastodo = 0 endif if (`cat $and |grep 'MISSING'|wc -l` > 0) then if ($hastodo) then set s = "$s and a 'MISSING'" else set s = "$s but it has a 'MISSING'" endif endif if (`grep doi.org $and|wc -l` == 0) then echo "The current entry does not have doi ..." if (`grep doi.org $bibformat|wc -l` > 0) then echo "The new entry DOES have doi ..." sdiff $and $bibformat |\ # change tabs to spaces: \ tr ' ' ' ' |\ cat > $tmpsdiff echo -n '' > $doifile # # 2018 Jul 23 SPECIAL: NEW: capture pmcid too set pmcid = pmcid if (`cat $tmpsdiff| egrep '\||>'|grep $pmcid|wc -l` > 0) then # 2022 Oct 27 don't add pmcid if it is MISSING: grep "$pmcid" $bibformat |\ grep -v MISSING >> $doifile echo "GRABBING PMCID TOO" endif # set h = htmladdnormallink if (`cat $tmpsdiff| egrep '\||>'|grep $h|wc -l` > 0) then set Q = '"' echo "Provide the $h" echo "note = $Q\$h" >> $doifile #else # echo -n '' > $doifile endif #echo "tmpsdiff = $tmpsdiff" #heta -a $tmpsdiff # 2022 Oct 27 add the note part for the user: echo 'note = "\url' >> $doifile cat $tmpsdiff |\ egrep ' > | \| ' |\ sed 's/ > //' |\ rev |\ rb |\ rev |\ # isolate the doi http \ tr ' ' '\n' |\ grep 'http' |\ grep 'doi.org' |\ grep -v 'MISSING' |\ # grep -v 'comment = ' |\ cat >> $doifile echo "The doi text is in $doifile" heta -a $doifile if (`cat $tmpsdiff|grep MISSING|grep doi.org|wc -l` > 0) then set s = "MISSING doi for $bibkey" else set s = "got doi for $bibkey" endif endif endif if (`grep '^comment = "' $and|wc -l` == 0) then echo "The current entry does not have a comment ..." # 2021 May 15, 1.85: # if no comment with date, use pdfdate to construct it set bibkeypdf = $pdflocation/${bibkey}.pdf if (-f $bibkeypdf) then echo "The pdf exists at $bibkeypdf" #pdfdate $bibkeypdf set originalpdfdate = `pdfdate $bibkey` echo "Original pdf date = $originalpdfdate" echo -n 'comment = "' >> $doifile echo -n "$originalpdfdate" >> $doifile echo ' date from original PDF",' >> $doifile echo "Revise the ${doifile}:" heta -a $doifile endif endif else echo "$bibkey is not already in $bibfile" set s = '' endif if ("$s" != '') then echo echo "$s" remotesay "$s" & endif end endif echo; echo "$0 end" exit ******************************************************************************** ******************************************************************************** ******************************************************************************** ********************************************************************************