In [1]: #_______________________________________FDS(Assignment No-1)______________________________
# Q-1) Write a Python program to create a dataframe containing columns name,
        # age and percentage. Add 10 rows to the dataframe. View the dataframe.
              import pandas as pd
              df=pd.DataFrame(columns=['Name','Age','Percentage'])
              df.loc[0]=['Rohit',20,74]
              df.loc[1]=['Rohan',20,84]
              df.loc[2]=['Ram',21,94]
              df.loc[3]=['Rakesh',21,77]
              df.loc[4]=['om',20,66]
              df.loc[5]=['yash',20,70]
              df.loc[6]=['saurabh',21,84]
              df.loc[7]=['akash',20,94]
              df.loc[8]=['Ronney',20,84]
              df.loc[9]=['Rahul',20,94]
              df
Out[1]:                     Name     Age    Percentage
              0             Rohit     20               74
              1         Rohan         20               84
              2             Ram       21               94
              3        Rakesh         21               77
              4               om      20               66
              5             yash      20               70
              6       saurabh         21               84
              7         akash         20               94
              8        Ronney         20               84
              9             Rahul     20               94
In [4]: # Q-2) Write a Python program to print the shape, number of rows-columns,
        # data types, feature names and the description of the data
              import pandas as pd
              df=pd.DataFrame(columns=['Name','Age','Percentage'])
              df.loc[0]=['Rohit',20,74]
              df.loc[1]=['Rohan',20,84]
              df.loc[2]=['Ram',21,94]
              df.loc[3]=['Rakesh',21,77]
              df.loc[4]=['om',20,66]
              df.loc[5]=['yash',20,70]
              df.loc[6]=['saurabh',21,84]
              df.loc[7]=['akash',20,94]
              df.loc[8]=['Ronney',20,84]
              df.loc[9]=['Rahul',20,94]
              df
              s=df.shape
              print("shape: ",s)
              c=len(df.columns)
              print("No.of columns: ",c)
              r=len(df)
              print("No.of rows: ",r)
              df.dtypes
              fname=pd.get_dummies(df)
              print(fname)
              df.describe()
          shape: (10, 3)
          No.of columns: 3
          No.of rows: 10
             Age Percentage                        Name_Rahul        Name_Rakesh   Name_Ram   Name_Rohan   Name_Rohit
          0   20         74                             False              False      False        False         True   \
          1   20         84                             False              False      False         True        False
          2   21         94                             False              False       True        False        False
          3   21         77                             False               True      False        False        False
          4   20         66                             False              False      False        False        False
          5   20         70                             False              False      False        False        False
          6   21         84                             False              False      False        False        False
          7   20         94                             False              False      False        False        False
          8   20         84                             False              False      False        False        False
          9   20         94                              True              False      False        False        False
                  Name_Ronney               Name_akash           Name_om   Name_saurabh   Name_yash
          0             False                    False             False          False       False
          1             False                    False             False          False       False
          2             False                    False             False          False       False
          3             False                    False             False          False       False
          4             False                    False              True          False       False
          5             False                    False             False          False        True
          6             False                    False             False           True       False
          7             False                     True             False          False       False
          8              True                    False             False          False       False
          9             False                    False             False          False       False
Out[4]:                              Age        Percentage
              count          10.000000          10.000000
              mean           20.300000          82.100000
                      std     0.483046          10.181137
                  min        20.000000          66.000000
               25%           20.000000          74.750000
               50%           20.000000          84.000000
               75%           20.750000          91.500000
                  max        21.000000          94.000000
In [5]: # Q-3) Write a Python program to view basic statistical details of the data.
              import pandas as pd
              import numpy as np
              dict={"rno":['1','2','3','4'],"marks":['80','90','60','70']}
              df=pd.DataFrame(dict)
              df.astype(float).describe()
Out[5]:                              rno          marks
              count          4.000000           4.000000
              mean           2.500000       75.000000
                      std    1.290994       12.909944
                  min        1.000000       60.000000
               25%           1.750000       67.500000
               50%           2.500000       75.000000
               75%           3.250000       82.500000
                  max        4.000000       90.000000
In [1]: # Q-4) Write a Python program to Add 5 rows with duplicate values and
        # missing values. Add a column ‘remarks’ with empty values.
        # Display the data.
              import pandas as pd
              df=pd.DataFrame(columns=['Name','Age','Percentage'])
              df.loc[0]=['Rohit',20,74]
              df.loc[1]=['Rohan',20,84]
              df.loc[2]=['Ram',21,94]
              df.loc[3]=['Rakesh',21,77]
              df.loc[4]=['om',20,66]
              df.loc[5]=['yash',20,70]
              df.loc[6]=['saurabh',21,84]
              df.loc[7]=['akash',20,94]
              df.loc[8]=['Ronney',20,84]
              df.loc[9]=['Rahul',20,94]
              df.loc[10]=[None,None,None]
              df.loc[11]=['saurabh',21,84]
              df.loc[12]=[None,None,None]
              df.loc[13]=['Ronney',20,84]
              df.loc[14]=['Rahul',20,94]
              df.loc[15]=[None,None,None]
              df["remarks"]=None
              df
Out[1]:                     Name       Age       Percentage      remarks
                  0          Rohit         20               74     None
                  1         Rohan          20               84     None
                  2           Ram          21               94     None
                  3     Rakesh             21               77     None
                  4            om          20               66     None
                  5          yash          20               70     None
                  6     saurabh            21               84     None
                  7         akash          20               94     None
                  8     Ronney             20               84     None
                  9          Rahul         20               94     None
              10             None     None             None        None
              11        saurabh            21               84     None
              12             None     None             None        None
              13        Ronney             20               84     None
              14             Rahul         20               94     None
              15             None     None             None        None
In [2]: # Q-5) Write a Python program to get the number of observations, missing values
        # and duplicate values.
              import pandas as pd
              df=pd.DataFrame(columns=['Name','Age','Percentage'])
              df.loc[0]=['Rohit',20,74]
              df.loc[1]=['Rohan',20,84]
              df.loc[2]=['Ram',21,94]
              df.loc[3]=['Rakesh',21,77]
              df.loc[4]=['om',20,66]
              df.loc[5]=['yash',20,70]
              df.loc[6]=['saurabh',21,84]
              df.loc[7]=['akash',20,94]
              df.loc[8]=['Ronney',20,84]
              df.loc[9]=['Rahul',20,94]
              df.loc[10]=[None,None,None]
              df.loc[11]=['saurabh',21,84]
              df.loc[12]=[None,None,None]
              df.loc[13]=['Ronney',20,84]
              df.loc[14]=['Rahul',20,94]
              df.loc[15]=[None,None,None]
              obs=df.info()
              print("No.of observations: ",obs)
              duplicate=df.duplicated()
              print("Duplicated Values: ",duplicate)
              miss=df[df.isnull()]
              print("Missing values: ",miss)
          <class 'pandas.core.frame.DataFrame'>
          Index: 16 entries, 0 to 15
          Data columns (total 3 columns):
            #  Column       Non-Null Count Dtype
          --- ------        -------------- -----
            0  Name         13 non-null     object
            1  Age          13 non-null     object
            2  Percentage 13 non-null       object
          dtypes: object(3)
          memory usage: 512.0+ bytes
          No.of observations: None
          Duplicated Values: 0       False
          1     False
          2     False
          3     False
          4     False
          5     False
          6     False
          7     False
          8     False
          9     False
          10    False
          11     True
          12     True
          13     True
          14     True
          15     True
          dtype: bool
          Missing values:       Name   Age Percentage
          0    NaN    NaN        NaN
          1    NaN    NaN        NaN
          2    NaN    NaN        NaN
          3    NaN    NaN        NaN
          4    NaN    NaN        NaN
          5    NaN    NaN        NaN
          6    NaN    NaN        NaN
          7    NaN    NaN        NaN
          8    NaN    NaN        NaN
          9    NaN    NaN        NaN
          10 None None          None
          11   NaN    NaN        NaN
          12 None None          None
          13   NaN    NaN        NaN
          14   NaN    NaN        NaN
          15 None None          None
In [6]: # Q-6) Write a Python program to drop ‘remarks’ column from the dataframe.
        # Also drop all null and empty values.
        # Print the modified data.
              import pandas as pd
              df=pd.DataFrame(columns=['Name','Age','Percentage'])
              df.loc[0]=['Rohit',20,74]
              df.loc[1]=['Rohan',20,84]
              df.loc[2]=['Ram',21,94]
              df.loc[3]=['Rakesh',21,77]
              df.loc[4]=['om',20,66]
              df.loc[5]=['yash',20,70]
              df.loc[6]=['saurabh',21,84]
              df.loc[7]=['akash',20,94]
              df.loc[8]=['Ronney',20,84]
              df.loc[9]=['Rahul',20,94]
              df.loc[10]=[None,None,None]
              df.loc[11]=['saurabh',21,84]
              df.loc[12]=[None,None,None]
              df.loc[13]=['Ronney',20,84]
              df.loc[14]=['Rahul',20,94]
              df.loc[15]=[None,None,None]
              df["remarks"]=None
              df
              df.drop(columns='remarks',axis=1,inplace=True)
              df
Out[6]:                     Name       Age       Percentage
                  0          Rohit         20               74
                  1         Rohan          20               84
                  2           Ram          21               94
                  3     Rakesh             21               77
                  4            om          20               66
                  5          yash          20               70
                  6     saurabh            21               84
                  7         akash          20               94
                  8     Ronney             20               84
                  9          Rahul         20               94
              10             None     None             None
              11        saurabh            21               84
              12             None     None             None
              13        Ronney             20               84
              14             Rahul         20               94
              15             None     None             None
In [7]: # Q-7) Write a Python program to generate a line plot of name vs percentage.
              import matplotlib.pyplot as plt
              name=['Rohit','Rahul','Rohan','Rakesh','Ram']
              percentage=[80,78,89,79,88]
              plt.plot(name,percentage)
              plt.title("Line Graph")
              plt.xlabel("Name")
              plt.ylabel("Pecentage")
              plt.show()
In [8]: # Q-8) Write a Python program to generate a scatter plot of name vs percentage
              import matplotlib.pyplot as plt
              name=['Rohit','Rohan','Rahul','Ram','Rakesh']
              percentage=[78,80,90,67,88]
              plt.scatter(name,percentage)
              plt.title("Scatter Graph")
              plt.xlabel("Name")
              plt.ylabel("Percentage")
              plt.show()
In [ ]: