Nov 1, 2006 python

Since string operation is frequently used, I wrote a quick cheat-sheet/reference guide.

Python string

""
	# cannot include ' inside.
' '
	# can include "" inside.
' ' vs " "
	There is no difference between the two
""" ... """
	#long string wc can extend to multiple lines.
" "+" "
	# Concat string.  (Use .join for faster op when joining lots of strings)
	print "abc","def","123"
r"" : raw string literal, no need to escape characters
	r"this\is\still\good\to"
	It is often used for regular expression.
	p = re.compile (r'(\b\w)+\s')
	
str(numericVar or Number) -> "number" in string
str( ) : (python3 is all unicode)
	by default, unicode are in UTF-8.  
	unless changed with # -*- coding: <encoing ame> -*- at top of source code

len(' ')
	#get length of string
if not myStr:  # best way to check for empty string
	if var could be some other type than string, then use if myStr =="".
a[n]
	#get nth char of string. Exceeding range will generate error. Cannot change the string by index because string is immutable.
a[-n]
	#get nth char starting from rightmost.
a[i:j]
	# slice, from i to j. i,j can be negative or out of bounds.
	a[:10]; # from 0 to 10.
"i" in "team" => False
for a in Var: do_something(a);
	#iterate through string to get each char

" "*n
	#string repetition
	"a"*3 # output: aaa. PERL uses 'x'
"str"%(tuple)
	# sim to sprintf(), returns a str, uses same ctrl char in sprintf
	# %m.pd, m=min length, p=precision, d=directive(s,d,f,etc)
	"a=%s,b=%s"%(a,b)  # returns 'a=5,b=7' str
	"%f is big\n"%(3.14)  #
	# %c=char, %f=float w/out exponent
	# %e=float w/ exponent, %g=float w/ or w/out exp
	# %o = octal w/out leading 0, %x=hex w/out leading 0x,
	# %#o= octal w/ leading 0, %#x=hex w/ leading 0x
	# %d=signed decimal, %u=unsigned integer
	# %5d = length of field is 5
	# %4.4f= 4.4 float (ex: 32.1445)
	# %8s=8 character long str
	# "%-5d"  left justified
	# "%+5d" right justified, forces "+" on positive
	# "%#g"(12)  forces a decimal point : 12.000
	# "% d" precede + # w/ blank space
	# "%05d" 0 are filled with '0'.
	
	"This is %s and I am %i" % ("Dan", 42)
	
str%dict
	# sim to str%tuple but uses dict's key value to subst the val
	info={"name":"dave","age":5,"hobby":"tv"}
	"%(name) is %(age)yrs old and likes %(hobby)"%info

oct(x)
	# converts octal to string
hex(x)
	# converts hex to str
chr(x)
	# convert ascii code to str
	chr(90)  # output: 'Z'
ord('a')
	# returns ascii # of char
raw=r'this\t\n\that'   #output: 'this\t\n\that'
multi=""" this is multiple
		line string """

s.lower(), s.upper() : returns lower-case, upper-case
	newStr = s.lower()
s.strip() : strip whitespace at start and end
	newStr=s.strip()
s.isalpha(), s.isdigit(), s.isspace()
s.startswith('abc'), s.endswith('xyz')  : True if starts or ends with the string
s.find('txt', [,start[,end]]): returns index of string if found, -1 if not found
	finds the lowest index.  For highest index, use rfind()
s.rfind('txt', {,start[,end}}):
s.replace('old','new') : search and replace ALL occurrence
s.split('delim'): splits string based on delimiter
	s.split(): assumes all whitespace as delimiter
	"a,b,c".split(',') ==> ['a','b','c']
    "a,b,c".split('\t') # split using tab as delimeter
    * for more advanced split, use re.split() [[python regex]] 
delim.join([lists]): joins using delimiter string
	'--'.join(['a','b',c']) ==> 'a--b--c'
s[sliceBegin:sliceEnd]
	Hello = 01234, or -5,-4,-3,-2,-1
	'Hello'[1:4] ==> 'ell'
	'Hello'[-4] ==> 'e'
	'Hello'[1:] => 1 to end
	'Hello'[:] => whole string
	'Hello'[2:1000] => 2 to end of string
	'Hello'[-2:] => "lo"
	'Hello'[:-2] => "Hel"

s.count(".")  # count # of occurrences of a char in string.

% operator :  printf-like 
	s = "%s is %d years old" % ('james',25)
	line-break '\' continuation doesn't work with %. Must enclose whole line with ( )
	s = ( "%s is %d years old" % 
		( ('james',25) )
    new python, use .format() instead

Unicode (python 2)

In python3, no need to specify unicode.

u""  (python 2)
	# unicode string. Use \u00xx to add hex unicode char. 
	\u0020=space char(32)
	u'abc'
	str(u"abc")

ur""
	# raw unicode str. Doesn't interpret \ special char. Good if lots of \ are used.
unicode('abc') ==> u'abc'
	type(s) =><type 'unicode'>

unicodeStr = u'Unicode \u018e \xf1'

Convert Unicode string to encoding

u'Hello'.encode('utf-8')

Convert a utf-8 encoded string into internal Unicode format

s = unicode('Hello','utf-8')

s = 'Hello'.decode('utf-8')  #same as above, but slower

Print

print "abc"    # prints a line and includes newline.
print "Jan\nFeb\n"  #\n = newline (but will be printed raw when using %r)
	\\ = \
	\n = newline
	\t = tab
	\", 
	\N{name} named unicode character
	\uxxxx unicode char
	
" this is 5\" long"   # \" , escape double quote inside string using \

print """  # 3 quotes, can do multiple lines. This also escapes all quotes, so no need to \"
	this is "something" strange.
"""	

print without newline.

#py2 use "," at end. However, this adds a blank space at end.
print "....",

for x in range(0,5):
    print x,
# ==> "0 1 2 3 4"

#py3 
print ("...",end="")

printing in place

#(py3)
#http://stackoverflow.com/a/5291396
import time
for x in range (0,5):  
    b = "Loading" + "." * x
    print (b, end="\r")
    time.sleep(1)

# (py2,3) - less elegan
import sys,time

a = 0  
for x in range (0,3):  
    a = a + 1  
    b = ("Loading" + "." * a)
    # \r prints a carriage return first, so `b` is printed on top of the previous line.
    sys.stdout.write('\r'+b)
    time.sleep(0.5)
print a

alternative, works on both py2 and py3?

sys.stdout.write('....')

Formatting

choices, has more than what I have written

old way

print ".... %s" % myStrVar

%s = string, %d = decimal, %f = fixed ,  %g = general numbers (both d & g)
%r = raw (repr), should use only for debugging / inspection

multiple format% print “… %r %r %r”, %(5,6,7)

formatter = "%r %r"
print formatter % (1,2) ==> "1 2"

new way : use format()

See http://pyformat.info/ & https://news.ycombinator.com/item?id=9382055

'{} {}'.format('Apple','Banana')  #==> "Apple Banana"
'{1} {0}'.format('Apple','Banana') #==> "Banana Apple"

'{name} is {age} years old'.format(name="Dan",age=10)

String class

import string
string.find(var,"searchText");
	# find string and returns index position
	# Returns -1 if none.
string.strip("str")
	# returns str with leading & trailing whitespace removed
string.upper("");
	# returns uppercase of string
string.lower("");
	# returns lowercase of string
string.ljust("",size);
	# left justify a string based on size, fills rest with blanks
	# sim to "%-5s" in printf for size=5
string.rjust("",size);
	# sim to "%5s" in printf for size=5
string.center("",size);
string.zfill(number,size);
	# instead fills leading space with 0 for numbers
	string.zfill(1.2,5) #output '001.2'
	string.zfill("5",31) #"00031"
string.join(["str1","str2",...])
	# joins list of strings.... separated by ' ', returns joined str
string.joinfields(["str1","str2",...], separator)
	# same as PERL join()
string.splitfields("str",separator)
	# same as PERL split()
	# returns list of strings
string.replace(str,old,new)
	# replace ("abc,def,ghi",",","_") -> "abc_def_ghi" replaces , with _

stdin, stdout

keyboard input

a=raw_input("prompt")
	# get input from user like PERL <STDIN>, returns str w/out NL.
	age=raw_input("enter your age");

age=raw_input()
name=raw_input("name? ")

also see stdin

StringIO

Simulate stdin/stdout?

def main():
    import StringIO
    sys.stdin = StringIO.StringIO(test_text)
    mapper()
    sys.stdin = sys.__stdin__

Others

sys.stdin.readline()

Tricks and tips

Reverse letters in string

revstr = ''.join(reversed("somestring"))

Sort letters in string

alphabetical = ''.join(sorted("somestring"))
alpha_case_insensitive =''.join(sorted("something"))).lower()

Reverse words in string

revword = " ".join(mystr.split()[::-1])

Debugging messages, use warnings

import warnings
...
warnings.warn("Some warning",DeprecationWarning)