filter_df_by_regex.py 810 B

1234567891011121314151617181920212223242526272829
  1. #! /usr/bin/env python3
  2. # filter dataframe by matching regex
  3. # https://stackoverflow.com/questions/37080612/pandas-dataframe-filter-regex/37080814#37080814
  4. import pandas as pd
  5. data = {
  6. 'Company' : ['Ford','Ford','Ford','Ford','Chevy','Chevy'],
  7. 'Type' : ['Mercury','Lincoln','Lincoln','Econoline','Malabu','Pickups'],
  8. 'Profit' : [1,100,40,99,2,3]
  9. }
  10. df = pd.DataFrame(data)
  11. # print(df)
  12. # Company Type Profit
  13. # 0 Ford Mercury 1
  14. # 1 Ford Lincoln 100
  15. # 2 Ford Lincoln 40
  16. # 3 Ford Econoline 99
  17. # 4 Chevy Malabu 2
  18. # 5 Chevy Pickups 3
  19. # Now print only rows that have an "e" in the Type
  20. # preceeded by a capitol letter or has a "u"
  21. #
  22. # That's how I choose# my cars :-)
  23. print(df[df["Type"].str.contains('[A-Z]e|u',regex=True)])