| 1 | """Common operations on Posix pathnames.
|
| 2 |
|
| 3 | Instead of importing this module directly, import os and refer to
|
| 4 | this module as os.path. The "os.path" name is an alias for this
|
| 5 | module on Posix systems; on other systems (e.g. Mac, Windows),
|
| 6 | os.path provides the same operations in a manner specific to that
|
| 7 | platform, and is an alias to another module (e.g. macpath, ntpath).
|
| 8 |
|
| 9 | Some of this can actually be useful on non-Posix systems too, e.g.
|
| 10 | for manipulation of the pathname component of URLs.
|
| 11 | """
|
| 12 |
|
| 13 | import os
|
| 14 | import sys
|
| 15 | import stat
|
| 16 | import genericpath
|
| 17 | import warnings
|
| 18 | from genericpath import *
|
| 19 | from genericpath import _unicode
|
| 20 |
|
| 21 | __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
|
| 22 | "basename","dirname","commonprefix","getsize","getmtime",
|
| 23 | "getatime","getctime","islink","exists","lexists","isdir","isfile",
|
| 24 | "ismount","walk","expanduser","expandvars","normpath","abspath",
|
| 25 | "samefile","sameopenfile","samestat",
|
| 26 | "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
|
| 27 | "devnull","realpath","supports_unicode_filenames","relpath"]
|
| 28 |
|
| 29 | # strings representing various path-related bits and pieces
|
| 30 | curdir = '.'
|
| 31 | pardir = '..'
|
| 32 | extsep = '.'
|
| 33 | sep = '/'
|
| 34 | pathsep = ':'
|
| 35 | defpath = ':/bin:/usr/bin'
|
| 36 | altsep = None
|
| 37 | devnull = '/dev/null'
|
| 38 |
|
| 39 | # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
|
| 40 | # On MS-DOS this may also turn slashes into backslashes; however, other
|
| 41 | # normalizations (such as optimizing '../' away) are not allowed
|
| 42 | # (another function should be defined to do that).
|
| 43 |
|
| 44 | def normcase(s):
|
| 45 | """Normalize case of pathname. Has no effect under Posix"""
|
| 46 | return s
|
| 47 |
|
| 48 |
|
| 49 | # Return whether a path is absolute.
|
| 50 | # Trivial in Posix, harder on the Mac or MS-DOS.
|
| 51 |
|
| 52 | def isabs(s):
|
| 53 | """Test whether a path is absolute"""
|
| 54 | return s.startswith('/')
|
| 55 |
|
| 56 |
|
| 57 | # Join pathnames.
|
| 58 | # Ignore the previous parts if a part is absolute.
|
| 59 | # Insert a '/' unless the first part is empty or already ends in '/'.
|
| 60 |
|
| 61 | def join(a, *p):
|
| 62 | """Join two or more pathname components, inserting '/' as needed.
|
| 63 | If any component is an absolute path, all previous path components
|
| 64 | will be discarded. An empty last part will result in a path that
|
| 65 | ends with a separator."""
|
| 66 | path = a
|
| 67 | for b in p:
|
| 68 | if b.startswith('/'):
|
| 69 | path = b
|
| 70 | elif path == '' or path.endswith('/'):
|
| 71 | path += b
|
| 72 | else:
|
| 73 | path += '/' + b
|
| 74 | return path
|
| 75 |
|
| 76 |
|
| 77 | # Split a path in head (everything up to the last '/') and tail (the
|
| 78 | # rest). If the path ends in '/', tail will be empty. If there is no
|
| 79 | # '/' in the path, head will be empty.
|
| 80 | # Trailing '/'es are stripped from head unless it is the root.
|
| 81 |
|
| 82 | def split(p):
|
| 83 | """Split a pathname. Returns tuple "(head, tail)" where "tail" is
|
| 84 | everything after the final slash. Either part may be empty."""
|
| 85 | i = p.rfind('/') + 1
|
| 86 | head, tail = p[:i], p[i:]
|
| 87 | if head and head != '/'*len(head):
|
| 88 | head = head.rstrip('/')
|
| 89 | return head, tail
|
| 90 |
|
| 91 |
|
| 92 | # Split a path in root and extension.
|
| 93 | # The extension is everything starting at the last dot in the last
|
| 94 | # pathname component; the root is everything before that.
|
| 95 | # It is always true that root + ext == p.
|
| 96 |
|
| 97 | def splitext(p):
|
| 98 | return genericpath._splitext(p, sep, altsep, extsep)
|
| 99 | splitext.__doc__ = genericpath._splitext.__doc__
|
| 100 |
|
| 101 | # Split a pathname into a drive specification and the rest of the
|
| 102 | # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty.
|
| 103 |
|
| 104 | def splitdrive(p):
|
| 105 | """Split a pathname into drive and path. On Posix, drive is always
|
| 106 | empty."""
|
| 107 | return '', p
|
| 108 |
|
| 109 |
|
| 110 | # Return the tail (basename) part of a path, same as split(path)[1].
|
| 111 |
|
| 112 | def basename(p):
|
| 113 | """Returns the final component of a pathname"""
|
| 114 | i = p.rfind('/') + 1
|
| 115 | return p[i:]
|
| 116 |
|
| 117 |
|
| 118 | # Return the head (dirname) part of a path, same as split(path)[0].
|
| 119 |
|
| 120 | def dirname(p):
|
| 121 | """Returns the directory component of a pathname"""
|
| 122 | i = p.rfind('/') + 1
|
| 123 | head = p[:i]
|
| 124 | if head and head != '/'*len(head):
|
| 125 | head = head.rstrip('/')
|
| 126 | return head
|
| 127 |
|
| 128 |
|
| 129 | # Is a path a symbolic link?
|
| 130 | # This will always return false on systems where os.lstat doesn't exist.
|
| 131 |
|
| 132 | def islink(path):
|
| 133 | """Test whether a path is a symbolic link"""
|
| 134 | try:
|
| 135 | st = os.lstat(path)
|
| 136 | except (os.error, AttributeError):
|
| 137 | return False
|
| 138 | return stat.S_ISLNK(st.st_mode)
|
| 139 |
|
| 140 | # Being true for dangling symbolic links is also useful.
|
| 141 |
|
| 142 | def lexists(path):
|
| 143 | """Test whether a path exists. Returns True for broken symbolic links"""
|
| 144 | try:
|
| 145 | os.lstat(path)
|
| 146 | except os.error:
|
| 147 | return False
|
| 148 | return True
|
| 149 |
|
| 150 |
|
| 151 | # Are two filenames really pointing to the same file?
|
| 152 |
|
| 153 | def samefile(f1, f2):
|
| 154 | """Test whether two pathnames reference the same actual file"""
|
| 155 | s1 = os.stat(f1)
|
| 156 | s2 = os.stat(f2)
|
| 157 | return samestat(s1, s2)
|
| 158 |
|
| 159 |
|
| 160 | # Are two open files really referencing the same file?
|
| 161 | # (Not necessarily the same file descriptor!)
|
| 162 |
|
| 163 | def sameopenfile(fp1, fp2):
|
| 164 | """Test whether two open file objects reference the same file"""
|
| 165 | s1 = os.fstat(fp1)
|
| 166 | s2 = os.fstat(fp2)
|
| 167 | return samestat(s1, s2)
|
| 168 |
|
| 169 |
|
| 170 | # Are two stat buffers (obtained from stat, fstat or lstat)
|
| 171 | # describing the same file?
|
| 172 |
|
| 173 | def samestat(s1, s2):
|
| 174 | """Test whether two stat buffers reference the same file"""
|
| 175 | return s1.st_ino == s2.st_ino and \
|
| 176 | s1.st_dev == s2.st_dev
|
| 177 |
|
| 178 |
|
| 179 | # Is a path a mount point?
|
| 180 | # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
|
| 181 |
|
| 182 | def ismount(path):
|
| 183 | """Test whether a path is a mount point"""
|
| 184 | if islink(path):
|
| 185 | # A symlink can never be a mount point
|
| 186 | return False
|
| 187 | try:
|
| 188 | s1 = os.lstat(path)
|
| 189 | s2 = os.lstat(realpath(join(path, '..')))
|
| 190 | except os.error:
|
| 191 | return False # It doesn't exist -- so not a mount point :-)
|
| 192 | dev1 = s1.st_dev
|
| 193 | dev2 = s2.st_dev
|
| 194 | if dev1 != dev2:
|
| 195 | return True # path/.. on a different device as path
|
| 196 | ino1 = s1.st_ino
|
| 197 | ino2 = s2.st_ino
|
| 198 | if ino1 == ino2:
|
| 199 | return True # path/.. is the same i-node as path
|
| 200 | return False
|
| 201 |
|
| 202 |
|
| 203 | # Directory tree walk.
|
| 204 | # For each directory under top (including top itself, but excluding
|
| 205 | # '.' and '..'), func(arg, dirname, filenames) is called, where
|
| 206 | # dirname is the name of the directory and filenames is the list
|
| 207 | # of files (and subdirectories etc.) in the directory.
|
| 208 | # The func may modify the filenames list, to implement a filter,
|
| 209 | # or to impose a different order of visiting.
|
| 210 |
|
| 211 | def walk(top, func, arg):
|
| 212 | """Directory tree walk with callback function.
|
| 213 |
|
| 214 | For each directory in the directory tree rooted at top (including top
|
| 215 | itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
|
| 216 | dirname is the name of the directory, and fnames a list of the names of
|
| 217 | the files and subdirectories in dirname (excluding '.' and '..'). func
|
| 218 | may modify the fnames list in-place (e.g. via del or slice assignment),
|
| 219 | and walk will only recurse into the subdirectories whose names remain in
|
| 220 | fnames; this can be used to implement a filter, or to impose a specific
|
| 221 | order of visiting. No semantics are defined for, or required of, arg,
|
| 222 | beyond that arg is always passed to func. It can be used, e.g., to pass
|
| 223 | a filename pattern, or a mutable object designed to accumulate
|
| 224 | statistics. Passing None for arg is common."""
|
| 225 | warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
|
| 226 | stacklevel=2)
|
| 227 | try:
|
| 228 | names = os.listdir(top)
|
| 229 | except os.error:
|
| 230 | return
|
| 231 | func(arg, top, names)
|
| 232 | for name in names:
|
| 233 | name = join(top, name)
|
| 234 | try:
|
| 235 | st = os.lstat(name)
|
| 236 | except os.error:
|
| 237 | continue
|
| 238 | if stat.S_ISDIR(st.st_mode):
|
| 239 | walk(name, func, arg)
|
| 240 |
|
| 241 |
|
| 242 | # Expand paths beginning with '~' or '~user'.
|
| 243 | # '~' means $HOME; '~user' means that user's home directory.
|
| 244 | # If the path doesn't begin with '~', or if the user or $HOME is unknown,
|
| 245 | # the path is returned unchanged (leaving error reporting to whatever
|
| 246 | # function is called with the expanded path as argument).
|
| 247 | # See also module 'glob' for expansion of *, ? and [...] in pathnames.
|
| 248 | # (A function should also be defined to do full *sh-style environment
|
| 249 | # variable expansion.)
|
| 250 |
|
| 251 | def expanduser(path):
|
| 252 | """Expand ~ and ~user constructions. If user or $HOME is unknown,
|
| 253 | do nothing."""
|
| 254 | if not path.startswith('~'):
|
| 255 | return path
|
| 256 | i = path.find('/', 1)
|
| 257 | if i < 0:
|
| 258 | i = len(path)
|
| 259 | if i == 1:
|
| 260 | if 'HOME' not in os.environ:
|
| 261 | import pwd
|
| 262 | userhome = pwd.getpwuid(os.getuid()).pw_dir
|
| 263 | else:
|
| 264 | userhome = os.environ['HOME']
|
| 265 | else:
|
| 266 | import pwd
|
| 267 | try:
|
| 268 | pwent = pwd.getpwnam(path[1:i])
|
| 269 | except KeyError:
|
| 270 | return path
|
| 271 | userhome = pwent.pw_dir
|
| 272 | userhome = userhome.rstrip('/')
|
| 273 | return (userhome + path[i:]) or '/'
|
| 274 |
|
| 275 |
|
| 276 | # Expand paths containing shell variable substitutions.
|
| 277 | # This expands the forms $variable and ${variable} only.
|
| 278 | # Non-existent variables are left unchanged.
|
| 279 |
|
| 280 | _varprog = None
|
| 281 | _uvarprog = None
|
| 282 |
|
| 283 | def expandvars(path):
|
| 284 | """Expand shell variables of form $var and ${var}. Unknown variables
|
| 285 | are left unchanged."""
|
| 286 | global _varprog, _uvarprog
|
| 287 | if '$' not in path:
|
| 288 | return path
|
| 289 | if isinstance(path, _unicode):
|
| 290 | if not _uvarprog:
|
| 291 | import re
|
| 292 | _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE)
|
| 293 | varprog = _uvarprog
|
| 294 | encoding = sys.getfilesystemencoding()
|
| 295 | else:
|
| 296 | if not _varprog:
|
| 297 | import re
|
| 298 | _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
|
| 299 | varprog = _varprog
|
| 300 | encoding = None
|
| 301 | i = 0
|
| 302 | while True:
|
| 303 | m = varprog.search(path, i)
|
| 304 | if not m:
|
| 305 | break
|
| 306 | i, j = m.span(0)
|
| 307 | name = m.group(1)
|
| 308 | if name.startswith('{') and name.endswith('}'):
|
| 309 | name = name[1:-1]
|
| 310 | if encoding:
|
| 311 | name = name.encode(encoding)
|
| 312 | if name in os.environ:
|
| 313 | tail = path[j:]
|
| 314 | value = os.environ[name]
|
| 315 | if encoding:
|
| 316 | value = value.decode(encoding)
|
| 317 | path = path[:i] + value
|
| 318 | i = len(path)
|
| 319 | path += tail
|
| 320 | else:
|
| 321 | i = j
|
| 322 | return path
|
| 323 |
|
| 324 |
|
| 325 | # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
|
| 326 | # It should be understood that this may change the meaning of the path
|
| 327 | # if it contains symbolic links!
|
| 328 |
|
| 329 | def normpath(path):
|
| 330 | """Normalize path, eliminating double slashes, etc."""
|
| 331 | # Preserve unicode (if path is unicode)
|
| 332 | slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.')
|
| 333 | if path == '':
|
| 334 | return dot
|
| 335 | initial_slashes = path.startswith('/')
|
| 336 | # POSIX allows one or two initial slashes, but treats three or more
|
| 337 | # as single slash.
|
| 338 | if (initial_slashes and
|
| 339 | path.startswith('//') and not path.startswith('///')):
|
| 340 | initial_slashes = 2
|
| 341 | comps = path.split('/')
|
| 342 | new_comps = []
|
| 343 | for comp in comps:
|
| 344 | if comp in ('', '.'):
|
| 345 | continue
|
| 346 | if (comp != '..' or (not initial_slashes and not new_comps) or
|
| 347 | (new_comps and new_comps[-1] == '..')):
|
| 348 | new_comps.append(comp)
|
| 349 | elif new_comps:
|
| 350 | new_comps.pop()
|
| 351 | comps = new_comps
|
| 352 | path = slash.join(comps)
|
| 353 | if initial_slashes:
|
| 354 | path = slash*initial_slashes + path
|
| 355 | return path or dot
|
| 356 |
|
| 357 |
|
| 358 | def abspath(path):
|
| 359 | """Return an absolute path."""
|
| 360 | if not isabs(path):
|
| 361 | if isinstance(path, _unicode):
|
| 362 | cwd = os.getcwdu()
|
| 363 | else:
|
| 364 | cwd = os.getcwd()
|
| 365 | path = join(cwd, path)
|
| 366 | return normpath(path)
|
| 367 |
|
| 368 |
|
| 369 | # Return a canonical path (i.e. the absolute location of a file on the
|
| 370 | # filesystem).
|
| 371 |
|
| 372 | def realpath(filename):
|
| 373 | """Return the canonical path of the specified filename, eliminating any
|
| 374 | symbolic links encountered in the path."""
|
| 375 | path, ok = _joinrealpath('', filename, {})
|
| 376 | return abspath(path)
|
| 377 |
|
| 378 | # Join two paths, normalizing and eliminating any symbolic links
|
| 379 | # encountered in the second path.
|
| 380 | def _joinrealpath(path, rest, seen):
|
| 381 | if isabs(rest):
|
| 382 | rest = rest[1:]
|
| 383 | path = sep
|
| 384 |
|
| 385 | while rest:
|
| 386 | name, _, rest = rest.partition(sep)
|
| 387 | if not name or name == curdir:
|
| 388 | # current dir
|
| 389 | continue
|
| 390 | if name == pardir:
|
| 391 | # parent dir
|
| 392 | if path:
|
| 393 | path, name = split(path)
|
| 394 | if name == pardir:
|
| 395 | path = join(path, pardir, pardir)
|
| 396 | else:
|
| 397 | path = pardir
|
| 398 | continue
|
| 399 | newpath = join(path, name)
|
| 400 | if not islink(newpath):
|
| 401 | path = newpath
|
| 402 | continue
|
| 403 | # Resolve the symbolic link
|
| 404 | if newpath in seen:
|
| 405 | # Already seen this path
|
| 406 | path = seen[newpath]
|
| 407 | if path is not None:
|
| 408 | # use cached value
|
| 409 | continue
|
| 410 | # The symlink is not resolved, so we must have a symlink loop.
|
| 411 | # Return already resolved part + rest of the path unchanged.
|
| 412 | return join(newpath, rest), False
|
| 413 | seen[newpath] = None # not resolved symlink
|
| 414 | path, ok = _joinrealpath(path, os.readlink(newpath), seen)
|
| 415 | if not ok:
|
| 416 | return join(path, rest), False
|
| 417 | seen[newpath] = path # resolved symlink
|
| 418 |
|
| 419 | return path, True
|
| 420 |
|
| 421 |
|
| 422 | supports_unicode_filenames = (sys.platform == 'darwin')
|
| 423 |
|
| 424 | def relpath(path, start=curdir):
|
| 425 | """Return a relative version of a path"""
|
| 426 |
|
| 427 | if not path:
|
| 428 | raise ValueError("no path specified")
|
| 429 |
|
| 430 | start_list = [x for x in abspath(start).split(sep) if x]
|
| 431 | path_list = [x for x in abspath(path).split(sep) if x]
|
| 432 |
|
| 433 | # Work out how much of the filepath is shared by start and path.
|
| 434 | i = len(commonprefix([start_list, path_list]))
|
| 435 |
|
| 436 | rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
|
| 437 | if not rel_list:
|
| 438 | return curdir
|
| 439 | return join(*rel_list)
|